Merge branch 'int' into feat/chatbot
This commit is contained in:
commit
478e139730
95 changed files with 12300 additions and 7373 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -168,3 +168,4 @@ cython_debug/
|
|||
gwserver/_database*
|
||||
gwserver/results/*
|
||||
*.log.*
|
||||
test-chat
|
||||
12
env_dev.env
12
env_dev.env
|
|
@ -64,12 +64,12 @@ Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
|
|||
Connector_AiAnthropic_TEMPERATURE = 0.2
|
||||
Connector_AiAnthropic_MAX_TOKENS = 2000
|
||||
|
||||
# LangDoc configuration
|
||||
Connector_AiLangdoc_API_URL = https://api.langdock.com/v1/chat/completions
|
||||
Connector_AiLangdoc_API_SECRET = sk-9KaNH1FfEx7SkTijsFpXeTIc9_xOmoo7e0hW6SqrYavFq_bgjcULa7PXp3kWQpp4gfk8-U0B4L91CP6YpAJxZg
|
||||
Connector_AiLangdoc_MODEL_NAME = gpt-4o
|
||||
Connector_AiLangdoc_TEMPERATURE = 0.2
|
||||
Connector_AiLangdoc_MAX_TOKENS = 2000
|
||||
# Perplexity AI configuration
|
||||
Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
|
||||
Connector_AiPerplexity_API_SECRET = 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
|
||||
Connector_AiPerplexity_MODEL_NAME = sonar
|
||||
Connector_AiPerplexity_TEMPERATURE = 0.2
|
||||
Connector_AiPerplexity_MAX_TOKENS = 2000
|
||||
|
||||
# Agent Mail configuration
|
||||
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
|
|
|
|||
12
env_int.env
12
env_int.env
|
|
@ -64,12 +64,12 @@ Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
|
|||
Connector_AiAnthropic_TEMPERATURE = 0.2
|
||||
Connector_AiAnthropic_MAX_TOKENS = 2000
|
||||
|
||||
# LangDoc configuration
|
||||
Connector_AiLangdoc_API_URL = https://api.langdock.com/v1/chat/completions
|
||||
Connector_AiLangdoc_API_SECRET = sk-9KaNH1FfEx7SkTijsFpXeTIc9_xOmoo7e0hW6SqrYavFq_bgjcULa7PXp3kWQpp4gfk8-U0B4L91CP6YpAJxZg
|
||||
Connector_AiLangdoc_MODEL_NAME = gpt-4o
|
||||
Connector_AiLangdoc_TEMPERATURE = 0.2
|
||||
Connector_AiLangdoc_MAX_TOKENS = 2000
|
||||
# Perplexity AI configuration
|
||||
Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
|
||||
Connector_AiPerplexity_API_SECRET = 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
|
||||
Connector_AiPerplexity_MODEL_NAME = sonar
|
||||
Connector_AiPerplexity_TEMPERATURE = 0.2
|
||||
Connector_AiPerplexity_MAX_TOKENS = 2000
|
||||
|
||||
# Agent Mail configuration
|
||||
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
|
|
|
|||
12
env_prod.env
12
env_prod.env
|
|
@ -64,12 +64,12 @@ Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
|
|||
Connector_AiAnthropic_TEMPERATURE = 0.2
|
||||
Connector_AiAnthropic_MAX_TOKENS = 2000
|
||||
|
||||
# LangDoc configuration
|
||||
Connector_AiLangdoc_API_URL = https://api.langdock.com/v1/chat/completions
|
||||
Connector_AiLangdoc_API_SECRET = sk-9KaNH1FfEx7SkTijsFpXeTIc9_xOmoo7e0hW6SqrYavFq_bgjcULa7PXp3kWQpp4gfk8-U0B4L91CP6YpAJxZg
|
||||
Connector_AiLangdoc_MODEL_NAME = gpt-4o
|
||||
Connector_AiLangdoc_TEMPERATURE = 0.2
|
||||
Connector_AiLangdoc_MAX_TOKENS = 2000
|
||||
# Perplexity AI configuration
|
||||
Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
|
||||
Connector_AiPerplexity_API_SECRET = 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
|
||||
Connector_AiPerplexity_MODEL_NAME = sonar
|
||||
Connector_AiPerplexity_TEMPERATURE = 0.2
|
||||
Connector_AiPerplexity_MAX_TOKENS = 2000
|
||||
|
||||
# Agent Mail configuration
|
||||
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
|
|
|
|||
|
|
@ -62,13 +62,52 @@ class AiAnthropic:
|
|||
if maxTokens is None:
|
||||
maxTokens = self.config.get("maxTokens", 2000)
|
||||
|
||||
# Transform OpenAI-style messages to Anthropic format:
|
||||
# - Move any 'system' role content to top-level 'system'
|
||||
# - Keep only 'user'/'assistant' messages in the list
|
||||
system_contents: List[str] = []
|
||||
converted_messages: List[Dict[str, Any]] = []
|
||||
for m in messages:
|
||||
role = m.get("role")
|
||||
content = m.get("content", "")
|
||||
if role == "system":
|
||||
# Collect system content; Anthropic expects top-level 'system'
|
||||
if isinstance(content, list):
|
||||
# Join text parts if provided as blocks
|
||||
joined = "\n\n".join(
|
||||
[
|
||||
(part.get("text") if isinstance(part, dict) else str(part))
|
||||
for part in content
|
||||
]
|
||||
)
|
||||
system_contents.append(joined)
|
||||
else:
|
||||
system_contents.append(str(content))
|
||||
continue
|
||||
# For Anthropic, content can be a string; pass through strings, collapse blocks
|
||||
if isinstance(content, list):
|
||||
# Collapse to text if blocks are provided
|
||||
collapsed = "\n\n".join(
|
||||
[
|
||||
(part.get("text") if isinstance(part, dict) else str(part))
|
||||
for part in content
|
||||
]
|
||||
)
|
||||
converted_messages.append({"role": role, "content": collapsed})
|
||||
else:
|
||||
converted_messages.append({"role": role, "content": content})
|
||||
|
||||
system_prompt = "\n\n".join([s for s in system_contents if s]) if system_contents else None
|
||||
|
||||
# Create Anthropic API payload
|
||||
payload = {
|
||||
payload: Dict[str, Any] = {
|
||||
"model": self.modelName,
|
||||
"messages": messages,
|
||||
"messages": converted_messages,
|
||||
"temperature": temperature,
|
||||
"max_tokens": maxTokens
|
||||
"max_tokens": maxTokens,
|
||||
}
|
||||
if system_prompt:
|
||||
payload["system"] = system_prompt
|
||||
|
||||
response = await self.httpClient.post(
|
||||
self.apiUrl,
|
||||
|
|
@ -174,8 +213,8 @@ class AiAnthropic:
|
|||
}
|
||||
]
|
||||
|
||||
# Use the existing callApi function with the Vision model
|
||||
response = await self.callApi(messages)
|
||||
# Use the existing callAiBasic function with the Vision model
|
||||
response = await self.callAiBasic(messages)
|
||||
|
||||
# Extract and return content
|
||||
return response["choices"][0]["message"]["content"]
|
||||
|
|
|
|||
|
|
@ -1,406 +0,0 @@
|
|||
import logging
|
||||
import httpx
|
||||
import asyncio
|
||||
import re
|
||||
from typing import Dict, Any, List, Union, Optional
|
||||
from fastapi import HTTPException
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
# Configure logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def loadConfigData():
|
||||
"""Load configuration data for LangDoc connector"""
|
||||
return {
|
||||
"apiKey": APP_CONFIG.get('Connector_AiLangdoc_API_SECRET'),
|
||||
"apiUrl": APP_CONFIG.get('Connector_AiLangdoc_API_URL'),
|
||||
"modelName": APP_CONFIG.get('Connector_AiLangdoc_MODEL_NAME'),
|
||||
"temperature": float(APP_CONFIG.get('Connector_AiLangdoc_TEMPERATURE')),
|
||||
"maxTokens": int(APP_CONFIG.get('Connector_AiLangdoc_MAX_TOKENS'))
|
||||
}
|
||||
|
||||
class AiLangdoc:
|
||||
"""Connector for communication with the LangDoc API (OpenAI-compatible)."""
|
||||
|
||||
def __init__(self):
|
||||
# Load configuration
|
||||
self.config = loadConfigData()
|
||||
self.apiKey = self.config["apiKey"]
|
||||
self.apiUrl = self.config["apiUrl"]
|
||||
self.modelName = self.config["modelName"]
|
||||
|
||||
# HttpClient for API calls
|
||||
self.httpClient = httpx.AsyncClient(
|
||||
timeout=120.0, # Longer timeout for complex requests
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.apiKey}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"LangDoc Connector initialized with model: {self.modelName}")
|
||||
|
||||
async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> str:
|
||||
"""
|
||||
Calls the LangDoc API with the given messages.
|
||||
|
||||
Args:
|
||||
messages: List of messages in OpenAI format (role, content)
|
||||
temperature: Temperature for response generation (0.0-1.0)
|
||||
maxTokens: Maximum number of tokens in the response
|
||||
|
||||
Returns:
|
||||
The response from the LangDoc API
|
||||
|
||||
Raises:
|
||||
HTTPException: For errors in API communication
|
||||
"""
|
||||
try:
|
||||
# Use parameters from configuration if none were overridden
|
||||
if temperature is None:
|
||||
temperature = self.config.get("temperature", 0.2)
|
||||
|
||||
if maxTokens is None:
|
||||
maxTokens = self.config.get("maxTokens", 2000)
|
||||
|
||||
payload = {
|
||||
"model": self.modelName,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"max_tokens": maxTokens
|
||||
}
|
||||
|
||||
response = await self.httpClient.post(
|
||||
self.apiUrl,
|
||||
json=payload
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
error_detail = f"LangDoc API error: {response.status_code} - {response.text}"
|
||||
logger.error(error_detail)
|
||||
|
||||
# Provide more specific error messages based on status code
|
||||
if response.status_code == 429:
|
||||
error_message = "Rate limit exceeded. Please wait before making another request."
|
||||
elif response.status_code == 401:
|
||||
error_message = "Invalid API key. Please check your LangDoc API configuration."
|
||||
elif response.status_code == 400:
|
||||
error_message = f"Invalid request to LangDoc API: {response.text}"
|
||||
else:
|
||||
error_message = f"LangDoc API error ({response.status_code}): {response.text}"
|
||||
|
||||
raise HTTPException(status_code=500, detail=error_message)
|
||||
|
||||
responseJson = response.json()
|
||||
content = responseJson["choices"][0]["message"]["content"]
|
||||
return content
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calling LangDoc API: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error calling LangDoc API: {str(e)}")
|
||||
|
||||
async def callAiImage(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
|
||||
"""
|
||||
Analyzes an image using LangDoc's vision capabilities.
|
||||
|
||||
Args:
|
||||
imageData: Either a file path (str) or image data (bytes)
|
||||
mimeType: The MIME type of the image (optional, only for binary data)
|
||||
prompt: The prompt for analysis
|
||||
|
||||
Returns:
|
||||
The analysis response as text
|
||||
"""
|
||||
try:
|
||||
# Distinguish between file path and binary data
|
||||
if isinstance(imageData, str):
|
||||
# It's a file path - import filehandling only when needed
|
||||
from modules import agentserviceFilemanager as fileHandler
|
||||
base64Data, autoMimeType = fileHandler.encodeFileToBase64(imageData)
|
||||
mimeType = mimeType or autoMimeType
|
||||
else:
|
||||
# It's binary data
|
||||
import base64
|
||||
base64Data = base64.b64encode(imageData).decode('utf-8')
|
||||
# MIME type must be specified for binary data
|
||||
if not mimeType:
|
||||
# Fallback to generic image type
|
||||
mimeType = "image/png"
|
||||
|
||||
# Prepare the payload for the Vision API
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": prompt},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:{mimeType};base64,{base64Data}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
# Use the existing callAiBasic function
|
||||
response = await self.callAiBasic(messages)
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during image analysis: {str(e)}", exc_info=True)
|
||||
return f"[Error during image analysis: {str(e)}]"
|
||||
|
||||
async def listModels(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Lists available models from the LangDoc API.
|
||||
|
||||
Returns:
|
||||
List of available models with their details
|
||||
"""
|
||||
try:
|
||||
# LangDoc uses OpenAI-compatible endpoints
|
||||
modelsUrl = self.apiUrl.replace("/chat/completions", "/models")
|
||||
|
||||
response = await self.httpClient.get(modelsUrl)
|
||||
|
||||
if response.status_code != 200:
|
||||
error_detail = f"LangDoc API error listing models: {response.status_code} - {response.text}"
|
||||
logger.error(error_detail)
|
||||
raise HTTPException(status_code=500, detail=error_detail)
|
||||
|
||||
responseJson = response.json()
|
||||
return responseJson.get("data", [])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing LangDoc models: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error listing LangDoc models: {str(e)}")
|
||||
|
||||
async def getModelInfo(self, modelName: str = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Gets information about a specific model.
|
||||
|
||||
Args:
|
||||
modelName: Name of the model to get info for (uses default if None)
|
||||
|
||||
Returns:
|
||||
Model information dictionary
|
||||
"""
|
||||
try:
|
||||
if modelName is None:
|
||||
modelName = self.modelName
|
||||
|
||||
models = await self.listModels()
|
||||
|
||||
for model in models:
|
||||
if model.get("id") == modelName:
|
||||
return model
|
||||
|
||||
raise HTTPException(status_code=404, detail=f"Model {modelName} not found")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting LangDoc model info: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error getting LangDoc model info: {str(e)}")
|
||||
|
||||
async def generateImage(self, prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid") -> Dict[str, Any]:
|
||||
"""
|
||||
Generates an image using LangDoc's DALL-E 3 integration.
|
||||
|
||||
Args:
|
||||
prompt: Text description of the image to generate
|
||||
size: Image size - "1024x1024", "1792x1024", or "1024x1792"
|
||||
quality: Image quality - "standard" or "hd"
|
||||
style: Image style - "vivid" or "natural"
|
||||
|
||||
Returns:
|
||||
Dictionary containing the generated image data and metadata
|
||||
|
||||
Raises:
|
||||
HTTPException: For errors in API communication
|
||||
"""
|
||||
try:
|
||||
# Use OpenAI-compatible images endpoint
|
||||
imagesUrl = self.apiUrl.replace("/chat/completions", "/images/generations")
|
||||
|
||||
payload = {
|
||||
"model": "dall-e-3",
|
||||
"prompt": prompt,
|
||||
"size": size,
|
||||
"quality": quality,
|
||||
"style": style,
|
||||
"n": 1
|
||||
}
|
||||
|
||||
response = await self.httpClient.post(
|
||||
imagesUrl,
|
||||
json=payload
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
error_detail = f"LangDoc Image Generation API error: {response.status_code} - {response.text}"
|
||||
logger.error(error_detail)
|
||||
|
||||
# Provide more specific error messages
|
||||
if response.status_code == 429:
|
||||
error_message = "Rate limit exceeded for image generation. Please wait before making another request."
|
||||
elif response.status_code == 401:
|
||||
error_message = "Invalid API key for image generation. Please check your LangDoc API configuration."
|
||||
elif response.status_code == 400:
|
||||
error_message = f"Invalid request to LangDoc Image API: {response.text}"
|
||||
else:
|
||||
error_message = f"LangDoc Image API error ({response.status_code}): {response.text}"
|
||||
|
||||
raise HTTPException(status_code=500, detail=error_message)
|
||||
|
||||
responseJson = response.json()
|
||||
|
||||
# Extract image data
|
||||
imageData = responseJson.get("data", [])
|
||||
if not imageData:
|
||||
raise HTTPException(status_code=500, detail="No image data returned from LangDoc API")
|
||||
|
||||
imageInfo = imageData[0]
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"image_url": imageInfo.get("url"),
|
||||
"revised_prompt": imageInfo.get("revised_prompt"),
|
||||
"size": size,
|
||||
"quality": quality,
|
||||
"style": style,
|
||||
"model": "dall-e-3",
|
||||
"created": responseJson.get("created"),
|
||||
"raw_response": responseJson
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating image with LangDoc: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error generating image with LangDoc: {str(e)}")
|
||||
|
||||
async def generateImageWithVariations(self, prompt: str, variations: int = 1, size: str = "1024x1024", quality: str = "standard", style: str = "vivid") -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generates multiple image variations using LangDoc's DALL-E 3 integration.
|
||||
|
||||
Args:
|
||||
prompt: Text description of the image to generate
|
||||
variations: Number of variations to generate (1-4)
|
||||
size: Image size - "1024x1024", "1792x1024", or "1024x1792"
|
||||
quality: Image quality - "standard" or "hd"
|
||||
style: Image style - "vivid" or "natural"
|
||||
|
||||
Returns:
|
||||
List of dictionaries containing generated image data and metadata
|
||||
|
||||
Raises:
|
||||
HTTPException: For errors in API communication
|
||||
"""
|
||||
try:
|
||||
# Limit variations to reasonable number
|
||||
variations = min(max(variations, 1), 4)
|
||||
|
||||
# Use OpenAI-compatible images endpoint
|
||||
imagesUrl = self.apiUrl.replace("/chat/completions", "/images/generations")
|
||||
|
||||
results = []
|
||||
|
||||
# Generate multiple variations by making multiple API calls
|
||||
for i in range(variations):
|
||||
# Add variation to prompt to get different results
|
||||
variationPrompt = f"{prompt} (variation {i+1})"
|
||||
|
||||
payload = {
|
||||
"model": "dall-e-3",
|
||||
"prompt": variationPrompt,
|
||||
"size": size,
|
||||
"quality": quality,
|
||||
"style": style,
|
||||
"n": 1
|
||||
}
|
||||
|
||||
response = await self.httpClient.post(
|
||||
imagesUrl,
|
||||
json=payload
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.warning(f"Failed to generate variation {i+1}: {response.status_code} - {response.text}")
|
||||
continue
|
||||
|
||||
responseJson = response.json()
|
||||
imageData = responseJson.get("data", [])
|
||||
|
||||
if imageData:
|
||||
imageInfo = imageData[0]
|
||||
results.append({
|
||||
"variation": i + 1,
|
||||
"image_url": imageInfo.get("url"),
|
||||
"revised_prompt": imageInfo.get("revised_prompt"),
|
||||
"size": size,
|
||||
"quality": quality,
|
||||
"style": style,
|
||||
"model": "dall-e-3",
|
||||
"created": responseJson.get("created")
|
||||
})
|
||||
|
||||
# Add small delay between requests to avoid rate limiting
|
||||
if i < variations - 1:
|
||||
await asyncio.sleep(1)
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating image variations with LangDoc: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error generating image variations with LangDoc: {str(e)}")
|
||||
|
||||
async def generateImageWithChat(self, prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid") -> str:
|
||||
"""
|
||||
Generates an image using LangDoc's chat interface with image generation tools.
|
||||
This method uses the chat completions endpoint with image generation capabilities.
|
||||
|
||||
Args:
|
||||
prompt: Text description of the image to generate
|
||||
size: Image size - "1024x1024", "1792x1024", or "1024x1792"
|
||||
quality: Image quality - "standard" or "hd"
|
||||
style: Image style - "vivid" or "natural"
|
||||
|
||||
Returns:
|
||||
Response text from the chat model (may include image references)
|
||||
|
||||
Raises:
|
||||
HTTPException: For errors in API communication
|
||||
"""
|
||||
try:
|
||||
# Create a prompt that requests image generation
|
||||
imagePrompt = f"Please generate an image with the following description: {prompt}. Size: {size}, Quality: {quality}, Style: {style}"
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": imagePrompt
|
||||
}
|
||||
]
|
||||
|
||||
# Use the chat completions endpoint
|
||||
response = await self.callAiBasic(messages)
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating image with chat: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error generating image with chat: {str(e)}")
|
||||
|
||||
async def _testConnection(self) -> bool:
|
||||
"""
|
||||
Tests the connection to the LangDoc API.
|
||||
|
||||
Returns:
|
||||
True if connection is successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Try to list models as a simple connection test
|
||||
await self.listModels()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"LangDoc connection test failed: {str(e)}")
|
||||
return False
|
||||
255
modules/connectors/connectorAiPerplexity.py
Normal file
255
modules/connectors/connectorAiPerplexity.py
Normal file
|
|
@ -0,0 +1,255 @@
|
|||
import logging
|
||||
import httpx
|
||||
import asyncio
|
||||
from typing import Dict, Any, List, Union, Optional
|
||||
from fastapi import HTTPException
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
# Configure logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def loadConfigData():
|
||||
"""Load configuration data for Perplexity connector"""
|
||||
return {
|
||||
"apiKey": APP_CONFIG.get('Connector_AiPerplexity_API_SECRET'),
|
||||
"apiUrl": APP_CONFIG.get('Connector_AiPerplexity_API_URL'),
|
||||
"modelName": APP_CONFIG.get('Connector_AiPerplexity_MODEL_NAME'),
|
||||
"temperature": float(APP_CONFIG.get('Connector_AiPerplexity_TEMPERATURE')),
|
||||
"maxTokens": int(APP_CONFIG.get('Connector_AiPerplexity_MAX_TOKENS'))
|
||||
}
|
||||
|
||||
class AiPerplexity:
|
||||
"""Connector for communication with the Perplexity API."""
|
||||
|
||||
def __init__(self):
|
||||
# Load configuration
|
||||
self.config = loadConfigData()
|
||||
self.apiKey = self.config["apiKey"]
|
||||
self.apiUrl = self.config["apiUrl"]
|
||||
self.modelName = self.config["modelName"]
|
||||
|
||||
# HttpClient for API calls
|
||||
self.httpClient = httpx.AsyncClient(
|
||||
timeout=120.0, # Longer timeout for complex requests
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.apiKey}",
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json"
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"Perplexity Connector initialized with model: {self.modelName}")
|
||||
|
||||
async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> str:
|
||||
"""
|
||||
Calls the Perplexity API with the given messages.
|
||||
|
||||
Args:
|
||||
messages: List of messages in OpenAI format (role, content)
|
||||
temperature: Temperature for response generation (0.0-1.0)
|
||||
maxTokens: Maximum number of tokens in the response
|
||||
|
||||
Returns:
|
||||
The response from the Perplexity API
|
||||
|
||||
Raises:
|
||||
HTTPException: For errors in API communication
|
||||
"""
|
||||
try:
|
||||
# Use parameters from configuration if none were overridden
|
||||
if temperature is None:
|
||||
temperature = self.config.get("temperature", 0.2)
|
||||
|
||||
if maxTokens is None:
|
||||
maxTokens = self.config.get("maxTokens", 2000)
|
||||
|
||||
payload = {
|
||||
"model": self.modelName,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
"max_tokens": maxTokens
|
||||
}
|
||||
|
||||
response = await self.httpClient.post(
|
||||
self.apiUrl,
|
||||
json=payload
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
error_detail = f"Perplexity API error: {response.status_code} - {response.text}"
|
||||
logger.error(error_detail)
|
||||
|
||||
# Provide more specific error messages based on status code
|
||||
if response.status_code == 429:
|
||||
error_message = "Rate limit exceeded. Please wait before making another request."
|
||||
elif response.status_code == 401:
|
||||
error_message = "Invalid API key. Please check your Perplexity API configuration."
|
||||
elif response.status_code == 400:
|
||||
error_message = f"Invalid request to Perplexity API: {response.text}"
|
||||
else:
|
||||
error_message = f"Perplexity API error ({response.status_code}): {response.text}"
|
||||
|
||||
raise HTTPException(status_code=500, detail=error_message)
|
||||
|
||||
responseJson = response.json()
|
||||
content = responseJson["choices"][0]["message"]["content"]
|
||||
return content
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calling Perplexity API: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error calling Perplexity API: {str(e)}")
|
||||
|
||||
async def callAiWithWebSearch(self, query: str, temperature: float = None, maxTokens: int = None) -> str:
|
||||
"""
|
||||
Calls Perplexity API with web search capabilities for research.
|
||||
|
||||
Args:
|
||||
query: The research query or question
|
||||
temperature: Temperature for response generation (0.0-1.0)
|
||||
maxTokens: Maximum number of tokens in the response
|
||||
|
||||
Returns:
|
||||
The response from Perplexity with web search context
|
||||
"""
|
||||
try:
|
||||
# Use parameters from configuration if none were overridden
|
||||
if temperature is None:
|
||||
temperature = self.config.get("temperature", 0.2)
|
||||
|
||||
if maxTokens is None:
|
||||
maxTokens = self.config.get("maxTokens", 2000)
|
||||
|
||||
# For web search, we use the configured model name
|
||||
webSearchModel = self.modelName
|
||||
|
||||
payload = {
|
||||
"model": webSearchModel,
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": query
|
||||
}
|
||||
],
|
||||
"temperature": temperature,
|
||||
"max_tokens": maxTokens
|
||||
}
|
||||
|
||||
response = await self.httpClient.post(
|
||||
self.apiUrl,
|
||||
json=payload
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
error_detail = f"Perplexity Web Search API error: {response.status_code} - {response.text}"
|
||||
logger.error(error_detail)
|
||||
|
||||
if response.status_code == 429:
|
||||
error_message = "Rate limit exceeded for web search. Please wait before making another request."
|
||||
elif response.status_code == 401:
|
||||
error_message = "Invalid API key for web search. Please check your Perplexity API configuration."
|
||||
elif response.status_code == 400:
|
||||
error_message = f"Invalid request to Perplexity Web Search API: {response.text}"
|
||||
else:
|
||||
error_message = f"Perplexity Web Search API error ({response.status_code}): {response.text}"
|
||||
|
||||
raise HTTPException(status_code=500, detail=error_message)
|
||||
|
||||
responseJson = response.json()
|
||||
content = responseJson["choices"][0]["message"]["content"]
|
||||
return content
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calling Perplexity Web Search API: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error calling Perplexity Web Search API: {str(e)}")
|
||||
|
||||
async def researchTopic(self, topic: str, depth: str = "basic") -> str:
|
||||
"""
|
||||
Research a topic using Perplexity's web search capabilities.
|
||||
|
||||
Args:
|
||||
topic: The topic to research
|
||||
depth: Research depth - "basic", "detailed", or "comprehensive"
|
||||
|
||||
Returns:
|
||||
Comprehensive research results on the topic
|
||||
"""
|
||||
try:
|
||||
# Create research prompts based on depth
|
||||
if depth == "basic":
|
||||
prompt = f"Provide a basic overview of: {topic}"
|
||||
elif depth == "detailed":
|
||||
prompt = f"Provide a detailed analysis of: {topic}. Include recent developments, key facts, and important information."
|
||||
else: # comprehensive
|
||||
prompt = f"Provide a comprehensive research report on: {topic}. Include recent developments, key facts, statistics, expert opinions, and current trends."
|
||||
|
||||
return await self.callAiWithWebSearch(prompt)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error researching topic: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error researching topic: {str(e)}")
|
||||
|
||||
async def answerQuestion(self, question: str, context: str = None) -> str:
|
||||
"""
|
||||
Answer a question using web search for current information.
|
||||
|
||||
Args:
|
||||
question: The question to answer
|
||||
context: Optional context to provide
|
||||
|
||||
Returns:
|
||||
Answer with web search context
|
||||
"""
|
||||
try:
|
||||
if context:
|
||||
prompt = f"Context: {context}\n\nQuestion: {question}\n\nPlease provide a comprehensive answer using current information from the web."
|
||||
else:
|
||||
prompt = f"Question: {question}\n\nPlease provide a comprehensive answer using current information from the web."
|
||||
|
||||
return await self.callAiWithWebSearch(prompt)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error answering question: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error answering question: {str(e)}")
|
||||
|
||||
async def getCurrentNews(self, topic: str = None, limit: int = 5) -> str:
|
||||
"""
|
||||
Get current news on a specific topic.
|
||||
|
||||
Args:
|
||||
topic: The topic to get news about (optional)
|
||||
limit: Number of news items to retrieve
|
||||
|
||||
Returns:
|
||||
Current news information
|
||||
"""
|
||||
try:
|
||||
if topic:
|
||||
prompt = f"Get the latest news about {topic}. Provide {limit} recent news items with sources and dates."
|
||||
else:
|
||||
prompt = f"Get the latest news. Provide {limit} recent news items with sources and dates."
|
||||
|
||||
return await self.callAiWithWebSearch(prompt)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting current news: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Error getting current news: {str(e)}")
|
||||
|
||||
async def _testConnection(self) -> bool:
|
||||
"""
|
||||
Tests the connection to the Perplexity API.
|
||||
|
||||
Returns:
|
||||
True if connection is successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Try a simple test message
|
||||
testMessages = [
|
||||
{"role": "user", "content": "Hello, please respond with just 'OK' to confirm the connection works."}
|
||||
]
|
||||
|
||||
response = await self.callAiBasic(testMessages)
|
||||
return response and len(response.strip()) > 0
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Perplexity connection test failed: {str(e)}")
|
||||
return False
|
||||
|
|
@ -4,6 +4,7 @@
|
|||
import logging
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
from tavily import AsyncTavilyClient
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
|
|
@ -29,6 +30,7 @@ logger = logging.getLogger(__name__)
|
|||
class WebSearchResult:
|
||||
title: str
|
||||
url: str
|
||||
raw_content: Optional[str] = None
|
||||
|
||||
@dataclass
|
||||
class WebCrawlResult:
|
||||
|
|
@ -83,7 +85,11 @@ class ConnectorWeb:
|
|||
return WebSearchActionResult(success=False, error=str(e))
|
||||
|
||||
result_items = [
|
||||
WebSearchResultItem(title=result.title, url=result.url)
|
||||
WebSearchResultItem(
|
||||
title=result.title,
|
||||
url=result.url,
|
||||
raw_content=getattr(result, 'raw_content', None)
|
||||
)
|
||||
for result in raw_results
|
||||
]
|
||||
|
||||
|
|
@ -246,6 +252,15 @@ class ConnectorWeb:
|
|||
urls = [result.url for result in search_results]
|
||||
return await self._crawl(urls, extract_depth=extract_depth, format=format)
|
||||
|
||||
def _clean_url(self, url: str) -> str:
|
||||
"""Clean URL by removing extra text that might be appended."""
|
||||
import re
|
||||
# Extract just the URL part, removing any extra text after it
|
||||
url_match = re.match(r'(https?://[^\s,]+)', url)
|
||||
if url_match:
|
||||
return url_match.group(1)
|
||||
return url
|
||||
|
||||
async def _search(
|
||||
self,
|
||||
query: str,
|
||||
|
|
@ -289,7 +304,11 @@ class ConnectorWeb:
|
|||
response = await self.client.search(**kwargs)
|
||||
|
||||
return [
|
||||
WebSearchResult(title=result["title"], url=result["url"])
|
||||
WebSearchResult(
|
||||
title=result["title"],
|
||||
url=self._clean_url(result["url"]),
|
||||
raw_content=result.get("raw_content")
|
||||
)
|
||||
for result in response["results"]
|
||||
]
|
||||
|
||||
|
|
@ -304,26 +323,53 @@ class ConnectorWeb:
|
|||
retry_delay = self.crawl_retry_delay
|
||||
timeout = self.crawl_timeout
|
||||
|
||||
logger.debug(f"Starting crawl of {len(urls)} URLs: {urls}")
|
||||
logger.debug(f"Crawl settings: extract_depth={extract_depth}, format={format}, timeout={timeout}s")
|
||||
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
logger.debug(f"Crawl attempt {attempt + 1}/{max_retries + 1}")
|
||||
|
||||
# Use asyncio.wait_for for timeout
|
||||
# Build kwargs for extract
|
||||
kwargs_extract: dict = {"urls": urls}
|
||||
kwargs_extract["extract_depth"] = extract_depth or "advanced"
|
||||
kwargs_extract["format"] = format or "text"
|
||||
kwargs_extract["format"] = format or "markdown" # Use markdown to get HTML structure
|
||||
|
||||
logger.debug(f"Sending request to Tavily with kwargs: {kwargs_extract}")
|
||||
|
||||
response = await asyncio.wait_for(
|
||||
self.client.extract(**kwargs_extract),
|
||||
timeout=timeout
|
||||
)
|
||||
|
||||
return [
|
||||
WebCrawlResult(url=result["url"], content=result["raw_content"])
|
||||
logger.debug(f"Tavily response received: {list(response.keys())}")
|
||||
|
||||
# Debug: Log what Tavily actually returns
|
||||
if "results" in response and response["results"]:
|
||||
logger.debug(f"Tavily returned {len(response['results'])} results")
|
||||
logger.debug(f"First result keys: {list(response['results'][0].keys())}")
|
||||
logger.debug(f"First result has raw_content: {'raw_content' in response['results'][0]}")
|
||||
|
||||
# Log each result
|
||||
for i, result in enumerate(response["results"]):
|
||||
logger.debug(f"Result {i+1}: URL={result.get('url', 'N/A')}, content_length={len(result.get('raw_content', result.get('content', '')))}")
|
||||
else:
|
||||
logger.warning(f"Tavily returned no results in response: {response}")
|
||||
|
||||
results = [
|
||||
WebCrawlResult(
|
||||
url=result["url"],
|
||||
content=result.get("raw_content", result.get("content", "")) # Try raw_content first, fallback to content
|
||||
)
|
||||
for result in response["results"]
|
||||
]
|
||||
|
||||
logger.debug(f"Crawl successful: extracted {len(results)} results")
|
||||
return results
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Crawl attempt {attempt + 1} timed out after {timeout} seconds")
|
||||
logger.warning(f"Crawl attempt {attempt + 1} timed out after {timeout} seconds for URLs: {urls}")
|
||||
if attempt < max_retries:
|
||||
logger.info(f"Retrying in {retry_delay} seconds...")
|
||||
await asyncio.sleep(retry_delay)
|
||||
|
|
@ -331,7 +377,22 @@ class ConnectorWeb:
|
|||
raise Exception(f"Crawl failed after {max_retries + 1} attempts due to timeout")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Crawl attempt {attempt + 1} failed: {str(e)}")
|
||||
logger.warning(f"Crawl attempt {attempt + 1} failed for URLs {urls}: {str(e)}")
|
||||
logger.debug(f"Full error details: {type(e).__name__}: {str(e)}")
|
||||
|
||||
# Check if it's a validation error and log more details
|
||||
if "validation" in str(e).lower():
|
||||
logger.debug(f"URL validation failed. Checking URL format:")
|
||||
for i, url in enumerate(urls):
|
||||
logger.debug(f" URL {i+1}: '{url}' (length: {len(url)})")
|
||||
# Check for common URL issues
|
||||
if ' ' in url:
|
||||
logger.debug(f" WARNING: URL contains spaces!")
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
logger.debug(f" WARNING: URL doesn't start with http/https!")
|
||||
if len(url) > 2000:
|
||||
logger.debug(f" WARNING: URL is very long ({len(url)} chars)")
|
||||
|
||||
if attempt < max_retries:
|
||||
logger.info(f"Retrying in {retry_delay} seconds...")
|
||||
await asyncio.sleep(retry_delay)
|
||||
|
|
|
|||
|
|
@ -384,6 +384,57 @@ class DatabaseConnector:
|
|||
logger.info(
|
||||
f"Created table '{table}' with columns from Pydantic model"
|
||||
)
|
||||
else:
|
||||
# Table exists: ensure all columns from model are present (simple additive migration)
|
||||
try:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE LOWER(table_name) = LOWER(%s) AND table_schema = 'public'
|
||||
""",
|
||||
(table,),
|
||||
)
|
||||
existing_columns = {
|
||||
row["column_name"] for row in cursor.fetchall()
|
||||
}
|
||||
|
||||
# Desired columns based on model
|
||||
model_fields = _get_model_fields(model_class)
|
||||
desired_columns = (
|
||||
set(["id"])
|
||||
| set(model_fields.keys())
|
||||
| {"_createdAt", "_modifiedAt", "_createdBy", "_modifiedBy"}
|
||||
)
|
||||
|
||||
# Add missing columns
|
||||
for col in sorted(desired_columns - existing_columns):
|
||||
# Determine SQL type
|
||||
if col in ["id"]:
|
||||
continue # primary key exists already
|
||||
sql_type = model_fields.get(col)
|
||||
if col in ["_createdAt"]:
|
||||
sql_type = "DOUBLE PRECISION"
|
||||
elif col in ["_modifiedAt"]:
|
||||
sql_type = "DOUBLE PRECISION"
|
||||
elif col in ["_createdBy", "_modifiedBy"]:
|
||||
sql_type = "VARCHAR(255)"
|
||||
if not sql_type:
|
||||
sql_type = "TEXT"
|
||||
try:
|
||||
cursor.execute(
|
||||
f'ALTER TABLE "{table}" ADD COLUMN "{col}" {sql_type}'
|
||||
)
|
||||
logger.info(
|
||||
f"Added missing column '{col}' ({sql_type}) to '{table}'"
|
||||
)
|
||||
except Exception as add_err:
|
||||
logger.warning(
|
||||
f"Could not add column '{col}' to '{table}': {add_err}"
|
||||
)
|
||||
except Exception as ensure_err:
|
||||
logger.warning(
|
||||
f"Could not ensure columns for existing table '{table}': {ensure_err}"
|
||||
)
|
||||
|
||||
self.connection.commit()
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ from . import datamodelWeb as web
|
|||
from . import datamodelUam as uam
|
||||
from . import datamodelSecurity as security
|
||||
from . import datamodelNeutralizer as neutralizer
|
||||
from . import datamodelWorkflow as workflow
|
||||
from . import datamodelChat as chat
|
||||
from . import datamodelFiles as files
|
||||
from . import datamodelVoice as voice
|
||||
|
|
|
|||
|
|
@ -112,6 +112,11 @@ class AiCallOptions(BaseModel):
|
|||
safetyMargin: float = Field(default=0.1, ge=0.0, le=0.5, description="Safety margin for token limits (0.0-0.5)")
|
||||
modelCapabilities: Optional[List[str]] = Field(default=None, description="Required model capabilities for filtering")
|
||||
|
||||
# Model generation parameters
|
||||
temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0, description="Temperature for response generation (0.0-2.0, lower = more consistent)")
|
||||
maxTokens: Optional[int] = Field(default=None, ge=1, le=32000, description="Maximum tokens in response")
|
||||
maxParts: Optional[int] = Field(default=1000, ge=1, le=1000, description="Maximum number of continuation parts to fetch")
|
||||
|
||||
|
||||
class AiCallRequest(BaseModel):
|
||||
"""Centralized AI call request payload for interface use."""
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ register_model_labels(
|
|||
)
|
||||
|
||||
|
||||
class ExtractedContent(BaseModel, ModelMixin):
|
||||
class ChatContentExtracted(BaseModel, ModelMixin):
|
||||
id: str = Field(description="Reference to source ChatDocument")
|
||||
contents: List[ContentItem] = Field(
|
||||
default_factory=list, description="List of content items"
|
||||
|
|
@ -177,7 +177,7 @@ class ExtractedContent(BaseModel, ModelMixin):
|
|||
|
||||
|
||||
register_model_labels(
|
||||
"ExtractedContent",
|
||||
"ChatContentExtracted",
|
||||
{"en": "Extracted Content", "fr": "Contenu extrait"},
|
||||
{
|
||||
"id": {"en": "Object ID", "fr": "ID de l'objet"},
|
||||
|
|
@ -201,6 +201,9 @@ class ChatMessage(BaseModel, ModelMixin):
|
|||
None, description="Label for the set of documents"
|
||||
)
|
||||
message: Optional[str] = Field(None, description="Message content")
|
||||
summary: Optional[str] = Field(
|
||||
None, description="Short summary of this message for planning/history"
|
||||
)
|
||||
role: str = Field(description="Role of the message sender")
|
||||
status: str = Field(description="Status of the message (first, step, last)")
|
||||
sequenceNr: int = Field(
|
||||
|
|
@ -244,6 +247,7 @@ register_model_labels(
|
|||
"documents": {"en": "Documents", "fr": "Documents"},
|
||||
"documentsLabel": {"en": "Documents Label", "fr": "Label des documents"},
|
||||
"message": {"en": "Message", "fr": "Message"},
|
||||
"summary": {"en": "Summary", "fr": "Résumé"},
|
||||
"role": {"en": "Role", "fr": "Rôle"},
|
||||
"status": {"en": "Status", "fr": "Statut"},
|
||||
"sequenceNr": {"en": "Sequence Number", "fr": "Numéro de séquence"},
|
||||
|
|
@ -419,34 +423,6 @@ register_model_labels(
|
|||
)
|
||||
|
||||
|
||||
class WorkflowResult(BaseModel, ModelMixin):
|
||||
status: str
|
||||
completed_tasks: int
|
||||
total_tasks: int
|
||||
execution_time: float
|
||||
final_results_count: int
|
||||
error: Optional[str] = None
|
||||
phase: Optional[str] = None
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"WorkflowResult",
|
||||
{"en": "Workflow Result", "fr": "Résultat du workflow"},
|
||||
{
|
||||
"status": {"en": "Status", "fr": "Statut"},
|
||||
"completed_tasks": {"en": "Completed Tasks", "fr": "Tâches terminées"},
|
||||
"total_tasks": {"en": "Total Tasks", "fr": "Total des tâches"},
|
||||
"execution_time": {"en": "Execution Time", "fr": "Temps d'exécution"},
|
||||
"final_results_count": {
|
||||
"en": "Final Results Count",
|
||||
"fr": "Nombre de résultats finaux",
|
||||
},
|
||||
"error": {"en": "Error", "fr": "Erreur"},
|
||||
"phase": {"en": "Phase", "fr": "Phase"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class UserInputRequest(BaseModel, ModelMixin):
|
||||
prompt: str = Field(description="Prompt for the user")
|
||||
listFileId: List[str] = Field(default_factory=list, description="List of file IDs")
|
||||
|
|
@ -462,3 +438,519 @@ register_model_labels(
|
|||
"userLanguage": {"en": "User Language", "fr": "Langue de l'utilisateur"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class ActionDocument(BaseModel, ModelMixin):
|
||||
"""Clear document structure for action results"""
|
||||
|
||||
documentName: str = Field(description="Name of the document")
|
||||
documentData: Any = Field(description="Content/data of the document")
|
||||
mimeType: str = Field(description="MIME type of the document")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"ActionDocument",
|
||||
{"en": "Action Document", "fr": "Document d'action"},
|
||||
{
|
||||
"documentName": {"en": "Document Name", "fr": "Nom du document"},
|
||||
"documentData": {"en": "Document Data", "fr": "Données du document"},
|
||||
"mimeType": {"en": "MIME Type", "fr": "Type MIME"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class ActionResult(BaseModel, ModelMixin):
|
||||
"""Clean action result with documents as primary output
|
||||
|
||||
IMPORTANT: Action methods should NOT set resultLabel in their return value.
|
||||
The resultLabel is managed by the action handler using the action's execResultLabel
|
||||
from the action plan. This ensures consistent document routing throughout the workflow.
|
||||
"""
|
||||
|
||||
success: bool = Field(description="Whether execution succeeded")
|
||||
error: Optional[str] = Field(None, description="Error message if failed")
|
||||
documents: List[ActionDocument] = Field(
|
||||
default_factory=list, description="Document outputs"
|
||||
)
|
||||
resultLabel: Optional[str] = Field(
|
||||
None,
|
||||
description="Label for document routing (set by action handler, not by action methods)",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def isSuccess(cls, documents: List[ActionDocument] = None) -> "ActionResult":
|
||||
return cls(success=True, documents=documents or [])
|
||||
|
||||
@classmethod
|
||||
def isFailure(
|
||||
cls, error: str, documents: List[ActionDocument] = None
|
||||
) -> "ActionResult":
|
||||
return cls(success=False, documents=documents or [], error=error)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"ActionResult",
|
||||
{"en": "Action Result", "fr": "Résultat de l'action"},
|
||||
{
|
||||
"success": {"en": "Success", "fr": "Succès"},
|
||||
"error": {"en": "Error", "fr": "Erreur"},
|
||||
"documents": {"en": "Documents", "fr": "Documents"},
|
||||
"resultLabel": {"en": "Result Label", "fr": "Étiquette du résultat"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class ActionSelection(BaseModel, ModelMixin):
|
||||
method: str = Field(description="Method to execute (e.g., web, document, ai)")
|
||||
name: str = Field(
|
||||
description="Action name within the method (e.g., search, extract)"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"ActionSelection",
|
||||
{"en": "Action Selection", "fr": "Sélection d'action"},
|
||||
{
|
||||
"method": {"en": "Method", "fr": "Méthode"},
|
||||
"name": {"en": "Action Name", "fr": "Nom de l'action"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class ActionParameters(BaseModel, ModelMixin):
|
||||
parameters: Dict[str, Any] = Field(
|
||||
default_factory=dict, description="Parameters to execute the selected action"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"ActionParameters",
|
||||
{"en": "Action Parameters", "fr": "Paramètres d'action"},
|
||||
{
|
||||
"parameters": {"en": "Parameters", "fr": "Paramètres"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class ObservationPreview(BaseModel, ModelMixin):
|
||||
name: str = Field(description="Document name or URL label")
|
||||
mime: str = Field(description="MIME type or kind")
|
||||
snippet: str = Field(description="Short snippet or summary")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"ObservationPreview",
|
||||
{"en": "Observation Preview", "fr": "Aperçu d'observation"},
|
||||
{
|
||||
"name": {"en": "Name", "fr": "Nom"},
|
||||
"mime": {"en": "MIME", "fr": "MIME"},
|
||||
"snippet": {"en": "Snippet", "fr": "Extrait"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class Observation(BaseModel, ModelMixin):
|
||||
success: bool = Field(description="Action execution success flag")
|
||||
resultLabel: str = Field(description="Deterministic label for produced documents")
|
||||
documentsCount: int = Field(description="Number of produced documents")
|
||||
previews: List[ObservationPreview] = Field(
|
||||
default_factory=list, description="Compact previews of outputs"
|
||||
)
|
||||
notes: List[str] = Field(
|
||||
default_factory=list, description="Short notes or key facts"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"Observation",
|
||||
{"en": "Observation", "fr": "Observation"},
|
||||
{
|
||||
"success": {"en": "Success", "fr": "Succès"},
|
||||
"resultLabel": {"en": "Result Label", "fr": "Étiquette du résultat"},
|
||||
"documentsCount": {"en": "Documents Count", "fr": "Nombre de documents"},
|
||||
"previews": {"en": "Previews", "fr": "Aperçus"},
|
||||
"notes": {"en": "Notes", "fr": "Notes"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TaskStatus(str):
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
CANCELLED = "cancelled"
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"TaskStatus",
|
||||
{"en": "Task Status", "fr": "Statut de la tâche"},
|
||||
{
|
||||
"PENDING": {"en": "Pending", "fr": "En attente"},
|
||||
"RUNNING": {"en": "Running", "fr": "En cours"},
|
||||
"COMPLETED": {"en": "Completed", "fr": "Terminé"},
|
||||
"FAILED": {"en": "Failed", "fr": "Échec"},
|
||||
"CANCELLED": {"en": "Cancelled", "fr": "Annulé"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class DocumentExchange(BaseModel, ModelMixin):
|
||||
documentsLabel: str = Field(description="Label for the set of documents")
|
||||
documents: List[str] = Field(
|
||||
default_factory=list, description="List of document references"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"DocumentExchange",
|
||||
{"en": "Document Exchange", "fr": "Échange de documents"},
|
||||
{
|
||||
"documentsLabel": {"en": "Documents Label", "fr": "Label des documents"},
|
||||
"documents": {"en": "Documents", "fr": "Documents"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class ActionItem(BaseModel, ModelMixin):
|
||||
id: str = Field(..., description="Action ID")
|
||||
execMethod: str = Field(..., description="Method to execute")
|
||||
execAction: str = Field(..., description="Action to perform")
|
||||
execParameters: Dict[str, Any] = Field(
|
||||
default_factory=dict, description="Action parameters"
|
||||
)
|
||||
execResultLabel: Optional[str] = Field(
|
||||
None, description="Label for the set of result documents"
|
||||
)
|
||||
expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field(
|
||||
None, description="Expected document formats (optional)"
|
||||
)
|
||||
userMessage: Optional[str] = Field(
|
||||
None, description="User-friendly message in user's language"
|
||||
)
|
||||
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status")
|
||||
error: Optional[str] = Field(None, description="Error message if action failed")
|
||||
retryCount: int = Field(default=0, description="Number of retries attempted")
|
||||
retryMax: int = Field(default=3, description="Maximum number of retries")
|
||||
processingTime: Optional[float] = Field(
|
||||
None, description="Processing time in seconds"
|
||||
)
|
||||
timestamp: float = Field(
|
||||
..., description="When the action was executed (UTC timestamp in seconds)"
|
||||
)
|
||||
result: Optional[str] = Field(None, description="Result of the action")
|
||||
|
||||
def setSuccess(self, result: str = None) -> None:
|
||||
"""Set the action as successful with optional result"""
|
||||
self.status = TaskStatus.COMPLETED
|
||||
self.error = None
|
||||
if result is not None:
|
||||
self.result = result
|
||||
|
||||
def setError(self, error_message: str) -> None:
|
||||
"""Set the action as failed with error message"""
|
||||
self.status = TaskStatus.FAILED
|
||||
self.error = error_message
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"ActionItem",
|
||||
{"en": "Task Action", "fr": "Action de tâche"},
|
||||
{
|
||||
"id": {"en": "Action ID", "fr": "ID de l'action"},
|
||||
"execMethod": {"en": "Method", "fr": "Méthode"},
|
||||
"execAction": {"en": "Action", "fr": "Action"},
|
||||
"execParameters": {"en": "Parameters", "fr": "Paramètres"},
|
||||
"execResultLabel": {"en": "Result Label", "fr": "Label du résultat"},
|
||||
"expectedDocumentFormats": {
|
||||
"en": "Expected Document Formats",
|
||||
"fr": "Formats de documents attendus",
|
||||
},
|
||||
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
||||
"status": {"en": "Status", "fr": "Statut"},
|
||||
"error": {"en": "Error", "fr": "Erreur"},
|
||||
"retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"},
|
||||
"retryMax": {"en": "Max Retries", "fr": "Tentatives max"},
|
||||
"processingTime": {"en": "Processing Time", "fr": "Temps de traitement"},
|
||||
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
|
||||
"result": {"en": "Result", "fr": "Résultat"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TaskResult(BaseModel, ModelMixin):
|
||||
taskId: str = Field(..., description="Task ID")
|
||||
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status")
|
||||
success: bool = Field(..., description="Whether the task was successful")
|
||||
feedback: Optional[str] = Field(None, description="Task feedback message")
|
||||
error: Optional[str] = Field(None, description="Error message if task failed")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"TaskResult",
|
||||
{"en": "Task Result", "fr": "Résultat de tâche"},
|
||||
{
|
||||
"taskId": {"en": "Task ID", "fr": "ID de la tâche"},
|
||||
"status": {"en": "Status", "fr": "Statut"},
|
||||
"success": {"en": "Success", "fr": "Succès"},
|
||||
"feedback": {"en": "Feedback", "fr": "Retour"},
|
||||
"error": {"en": "Error", "fr": "Erreur"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TaskItem(BaseModel, ModelMixin):
|
||||
id: str = Field(..., description="Task ID")
|
||||
workflowId: str = Field(..., description="Workflow ID")
|
||||
userInput: str = Field(..., description="User input that triggered the task")
|
||||
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status")
|
||||
error: Optional[str] = Field(None, description="Error message if task failed")
|
||||
startedAt: Optional[float] = Field(
|
||||
None, description="When the task started (UTC timestamp in seconds)"
|
||||
)
|
||||
finishedAt: Optional[float] = Field(
|
||||
None, description="When the task finished (UTC timestamp in seconds)"
|
||||
)
|
||||
actionList: List[ActionItem] = Field(
|
||||
default_factory=list, description="List of actions to execute"
|
||||
)
|
||||
retryCount: int = Field(default=0, description="Number of retries attempted")
|
||||
retryMax: int = Field(default=3, description="Maximum number of retries")
|
||||
rollbackOnFailure: bool = Field(
|
||||
default=True, description="Whether to rollback on failure"
|
||||
)
|
||||
dependencies: List[str] = Field(
|
||||
default_factory=list, description="List of task IDs this task depends on"
|
||||
)
|
||||
feedback: Optional[str] = Field(None, description="Task feedback message")
|
||||
processingTime: Optional[float] = Field(
|
||||
None, description="Total processing time in seconds"
|
||||
)
|
||||
resultLabels: Optional[Dict[str, Any]] = Field(
|
||||
default_factory=dict, description="Map of result labels to their values"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"TaskItem",
|
||||
{"en": "Task", "fr": "Tâche"},
|
||||
{
|
||||
"id": {"en": "Task ID", "fr": "ID de la tâche"},
|
||||
"workflowId": {"en": "Workflow ID", "fr": "ID du workflow"},
|
||||
"userInput": {"en": "User Input", "fr": "Entrée utilisateur"},
|
||||
"status": {"en": "Status", "fr": "Statut"},
|
||||
"error": {"en": "Error", "fr": "Erreur"},
|
||||
"startedAt": {"en": "Started At", "fr": "Démarré à"},
|
||||
"finishedAt": {"en": "Finished At", "fr": "Terminé à"},
|
||||
"actionList": {"en": "Actions", "fr": "Actions"},
|
||||
"retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"},
|
||||
"retryMax": {"en": "Max Retries", "fr": "Tentatives max"},
|
||||
"processingTime": {"en": "Processing Time", "fr": "Temps de traitement"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TaskStep(BaseModel, ModelMixin):
|
||||
id: str
|
||||
objective: str
|
||||
dependencies: Optional[list[str]] = Field(default_factory=list)
|
||||
success_criteria: Optional[list[str]] = Field(default_factory=list)
|
||||
estimated_complexity: Optional[str] = None
|
||||
userMessage: Optional[str] = Field(
|
||||
None, description="User-friendly message in user's language"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"TaskStep",
|
||||
{"en": "Task Step", "fr": "Étape de tâche"},
|
||||
{
|
||||
"id": {"en": "ID", "fr": "ID"},
|
||||
"objective": {"en": "Objective", "fr": "Objectif"},
|
||||
"dependencies": {"en": "Dependencies", "fr": "Dépendances"},
|
||||
"success_criteria": {"en": "Success Criteria", "fr": "Critères de succès"},
|
||||
"estimated_complexity": {
|
||||
"en": "Estimated Complexity",
|
||||
"fr": "Complexité estimée",
|
||||
},
|
||||
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TaskHandover(BaseModel, ModelMixin):
|
||||
taskId: str = Field(description="Target task ID")
|
||||
sourceTask: Optional[str] = Field(None, description="Source task ID")
|
||||
inputDocuments: List[DocumentExchange] = Field(
|
||||
default_factory=list, description="Available input documents"
|
||||
)
|
||||
outputDocuments: List[DocumentExchange] = Field(
|
||||
default_factory=list, description="Produced output documents"
|
||||
)
|
||||
context: Dict[str, Any] = Field(default_factory=dict, description="Task context")
|
||||
previousResults: List[str] = Field(
|
||||
default_factory=list, description="Previous result summaries"
|
||||
)
|
||||
improvements: List[str] = Field(
|
||||
default_factory=list, description="Improvement suggestions"
|
||||
)
|
||||
workflowSummary: Optional[str] = Field(
|
||||
None, description="Summarized workflow context"
|
||||
)
|
||||
messageHistory: List[str] = Field(
|
||||
default_factory=list, description="Key message summaries"
|
||||
)
|
||||
timestamp: float = Field(
|
||||
..., description="When the handover was created (UTC timestamp in seconds)"
|
||||
)
|
||||
handoverType: str = Field(
|
||||
default="task", description="Type of handover: task, phase, or workflow"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"TaskHandover",
|
||||
{"en": "Task Handover", "fr": "Transfert de tâche"},
|
||||
{
|
||||
"taskId": {"en": "Task ID", "fr": "ID de la tâche"},
|
||||
"sourceTask": {"en": "Source Task", "fr": "Tâche source"},
|
||||
"inputDocuments": {"en": "Input Documents", "fr": "Documents d'entrée"},
|
||||
"outputDocuments": {"en": "Output Documents", "fr": "Documents de sortie"},
|
||||
"context": {"en": "Context", "fr": "Contexte"},
|
||||
"previousResults": {"en": "Previous Results", "fr": "Résultats précédents"},
|
||||
"improvements": {"en": "Improvements", "fr": "Améliorations"},
|
||||
"workflowSummary": {"en": "Workflow Summary", "fr": "Résumé du workflow"},
|
||||
"messageHistory": {"en": "Message History", "fr": "Historique des messages"},
|
||||
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
|
||||
"handoverType": {"en": "Handover Type", "fr": "Type de transfert"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TaskContext(BaseModel, ModelMixin):
|
||||
task_step: TaskStep
|
||||
workflow: Optional["ChatWorkflow"] = None
|
||||
workflow_id: Optional[str] = None
|
||||
available_documents: Optional[str] = "No documents available"
|
||||
available_connections: Optional[list[str]] = Field(default_factory=list)
|
||||
previous_results: Optional[list[str]] = Field(default_factory=list)
|
||||
previous_handover: Optional[TaskHandover] = None
|
||||
improvements: Optional[list[str]] = Field(default_factory=list)
|
||||
retry_count: Optional[int] = 0
|
||||
previous_action_results: Optional[list] = Field(default_factory=list)
|
||||
previous_review_result: Optional[dict] = None
|
||||
is_regeneration: Optional[bool] = False
|
||||
failure_patterns: Optional[list[str]] = Field(default_factory=list)
|
||||
failed_actions: Optional[list] = Field(default_factory=list)
|
||||
successful_actions: Optional[list] = Field(default_factory=list)
|
||||
criteria_progress: Optional[dict] = None
|
||||
|
||||
def getDocumentReferences(self) -> List[str]:
|
||||
docs = []
|
||||
if self.previous_handover:
|
||||
for doc_exchange in self.previous_handover.inputDocuments:
|
||||
docs.extend(doc_exchange.documents)
|
||||
return list(set(docs))
|
||||
|
||||
def addImprovement(self, improvement: str) -> None:
|
||||
if improvement not in (self.improvements or []):
|
||||
if self.improvements is None:
|
||||
self.improvements = []
|
||||
self.improvements.append(improvement)
|
||||
|
||||
|
||||
class ReviewContext(BaseModel, ModelMixin):
|
||||
task_step: TaskStep
|
||||
task_actions: Optional[list] = Field(default_factory=list)
|
||||
action_results: Optional[list] = Field(default_factory=list)
|
||||
step_result: Optional[dict] = Field(default_factory=dict)
|
||||
workflow_id: Optional[str] = None
|
||||
previous_results: Optional[list[str]] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ReviewResult(BaseModel, ModelMixin):
|
||||
status: str
|
||||
reason: Optional[str] = None
|
||||
improvements: Optional[list[str]] = Field(default_factory=list)
|
||||
quality_score: Optional[int] = 5
|
||||
missing_outputs: Optional[list[str]] = Field(default_factory=list)
|
||||
met_criteria: Optional[list[str]] = Field(default_factory=list)
|
||||
unmet_criteria: Optional[list[str]] = Field(default_factory=list)
|
||||
confidence: Optional[float] = 0.5
|
||||
userMessage: Optional[str] = Field(
|
||||
None, description="User-friendly message in user's language"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"ReviewResult",
|
||||
{"en": "Review Result", "fr": "Résultat de l'évaluation"},
|
||||
{
|
||||
"status": {"en": "Status", "fr": "Statut"},
|
||||
"reason": {"en": "Reason", "fr": "Raison"},
|
||||
"improvements": {"en": "Improvements", "fr": "Améliorations"},
|
||||
"quality_score": {"en": "Quality Score", "fr": "Score de qualité"},
|
||||
"missing_outputs": {"en": "Missing Outputs", "fr": "Sorties manquantes"},
|
||||
"met_criteria": {"en": "Met Criteria", "fr": "Critères respectés"},
|
||||
"unmet_criteria": {"en": "Unmet Criteria", "fr": "Critères non respectés"},
|
||||
"confidence": {"en": "Confidence", "fr": "Confiance"},
|
||||
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TaskPlan(BaseModel, ModelMixin):
|
||||
overview: str
|
||||
tasks: list[TaskStep]
|
||||
userMessage: Optional[str] = Field(
|
||||
None, description="Overall user-friendly message for the task plan"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"TaskPlan",
|
||||
{"en": "Task Plan", "fr": "Plan de tâches"},
|
||||
{
|
||||
"overview": {"en": "Overview", "fr": "Aperçu"},
|
||||
"tasks": {"en": "Tasks", "fr": "Tâches"},
|
||||
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
||||
},
|
||||
)
|
||||
|
||||
# Resolve forward references
|
||||
TaskContext.update_forward_refs()
|
||||
|
||||
|
||||
class PromptPlaceholder(BaseModel, ModelMixin):
|
||||
label: str
|
||||
content: str
|
||||
summaryAllowed: bool = Field(
|
||||
default=False,
|
||||
description="Whether host may summarize content before sending to AI",
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"PromptPlaceholder",
|
||||
{"en": "Prompt Placeholder", "fr": "Espace réservé d'invite"},
|
||||
{
|
||||
"label": {"en": "Label", "fr": "Libellé"},
|
||||
"content": {"en": "Content", "fr": "Contenu"},
|
||||
"summaryAllowed": {"en": "Summary Allowed", "fr": "Résumé autorisé"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class PromptBundle(BaseModel, ModelMixin):
|
||||
prompt: str
|
||||
placeholders: List[PromptPlaceholder] = Field(default_factory=list)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"PromptBundle",
|
||||
{"en": "Prompt Bundle", "fr": "Lot d'invite"},
|
||||
{
|
||||
"prompt": {"en": "Prompt", "fr": "Invite"},
|
||||
"placeholders": {"en": "Placeholders", "fr": "Espaces réservés"},
|
||||
},
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Literal
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
|
|
@ -12,8 +12,114 @@ class ContentPart(BaseModel):
|
|||
metadata: Dict[str, Any] = Field(default_factory=dict, description="Arbitrary metadata for the part")
|
||||
|
||||
|
||||
class ExtractedContent(BaseModel):
|
||||
class ContentExtracted(BaseModel):
|
||||
id: str = Field(description="Extraction id or source document id")
|
||||
parts: List[ContentPart] = Field(default_factory=list, description="List of extracted parts")
|
||||
summary: Optional[Dict[str, Any]] = Field(default=None, description="Optional extraction summary")
|
||||
|
||||
|
||||
class MergeStrategy(BaseModel):
|
||||
"""Strategy configuration for merging content parts and AI results."""
|
||||
|
||||
# Grouping configuration
|
||||
groupBy: str = Field(
|
||||
default="typeGroup",
|
||||
description="Field to group parts by (typeGroup, parentId, label, etc.)"
|
||||
)
|
||||
|
||||
# Ordering configuration
|
||||
orderBy: str = Field(
|
||||
default="id",
|
||||
description="Field to order parts within groups (id, order, pageIndex, etc.)"
|
||||
)
|
||||
|
||||
# Merge behavior
|
||||
mergeType: Literal["concatenate", "hierarchical", "intelligent"] = Field(
|
||||
default="concatenate",
|
||||
description="How to merge content within groups"
|
||||
)
|
||||
|
||||
# Size limits
|
||||
maxSize: Optional[int] = Field(
|
||||
default=None,
|
||||
description="Maximum size for merged content in bytes"
|
||||
)
|
||||
|
||||
# Type-specific merge settings
|
||||
textMerge: Optional[Dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description="Text-specific merge settings (separator, formatting, etc.)"
|
||||
)
|
||||
|
||||
tableMerge: Optional[Dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description="Table-specific merge settings (header handling, etc.)"
|
||||
)
|
||||
|
||||
structureMerge: Optional[Dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description="Structure-specific merge settings (hierarchy, etc.)"
|
||||
)
|
||||
|
||||
# AI result merging
|
||||
aiResultMerge: Optional[Dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description="AI result merging settings (prompt, context, etc.)"
|
||||
)
|
||||
|
||||
# Chunk handling
|
||||
preserveChunks: bool = Field(
|
||||
default=False,
|
||||
description="Whether to preserve individual chunks or merge them"
|
||||
)
|
||||
|
||||
chunkSeparator: str = Field(
|
||||
default="\n\n---\n\n",
|
||||
description="Separator between chunks when merging"
|
||||
)
|
||||
|
||||
# Metadata handling
|
||||
preserveMetadata: bool = Field(
|
||||
default=True,
|
||||
description="Whether to preserve metadata from original parts"
|
||||
)
|
||||
|
||||
metadataFields: Optional[List[str]] = Field(
|
||||
default=None,
|
||||
description="Specific metadata fields to preserve (None = all)"
|
||||
)
|
||||
|
||||
# Error handling
|
||||
onError: Literal["skip", "include", "fail"] = Field(
|
||||
default="skip",
|
||||
description="How to handle errors during merging"
|
||||
)
|
||||
|
||||
# Validation
|
||||
validateContent: bool = Field(
|
||||
default=True,
|
||||
description="Whether to validate content before merging"
|
||||
)
|
||||
|
||||
def getTypeSpecificSettings(self, typeGroup: str) -> Dict[str, Any]:
|
||||
"""Get type-specific merge settings for a content type."""
|
||||
if typeGroup == "text" and self.textMerge:
|
||||
return self.textMerge
|
||||
elif typeGroup == "table" and self.tableMerge:
|
||||
return self.tableMerge
|
||||
elif typeGroup == "structure" and self.structureMerge:
|
||||
return self.structureMerge
|
||||
else:
|
||||
return {}
|
||||
|
||||
def shouldPreserveChunk(self, chunk: Dict[str, Any]) -> bool:
|
||||
"""Determine if a chunk should be preserved based on strategy."""
|
||||
if not self.preserveChunks:
|
||||
return False
|
||||
|
||||
# Check if chunk has error metadata
|
||||
if self.onError == "skip" and chunk.get("metadata", {}).get("error"):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
|
|
|||
|
|
@ -19,8 +19,6 @@ class FileItem(BaseModel, ModelMixin):
|
|||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return super().to_dict()
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"FileItem",
|
||||
{"en": "File Item", "fr": "Élément de fichier"},
|
||||
|
|
@ -35,7 +33,6 @@ register_model_labels(
|
|||
},
|
||||
)
|
||||
|
||||
|
||||
class FilePreview(BaseModel, ModelMixin):
|
||||
content: Union[str, bytes] = Field(description="File content (text or binary)")
|
||||
mimeType: str = Field(description="MIME type of the file")
|
||||
|
|
@ -49,8 +46,6 @@ class FilePreview(BaseModel, ModelMixin):
|
|||
if isinstance(data.get("content"), bytes):
|
||||
data["content"] = base64.b64encode(data["content"]).decode("utf-8")
|
||||
return data
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"FilePreview",
|
||||
{"en": "File Preview", "fr": "Aperçu du fichier"},
|
||||
|
|
@ -64,13 +59,10 @@ register_model_labels(
|
|||
},
|
||||
)
|
||||
|
||||
|
||||
class FileData(BaseModel, ModelMixin):
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
|
||||
data: str = Field(description="File data content")
|
||||
base64Encoded: bool = Field(description="Whether the data is base64 encoded")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"FileData",
|
||||
{"en": "File Data", "fr": "Données de fichier"},
|
||||
|
|
@ -80,5 +72,3 @@ register_model_labels(
|
|||
"base64Encoded": {"en": "Base64 Encoded", "fr": "Encodé en Base64"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -14,8 +14,6 @@ class DataNeutraliserConfig(BaseModel, ModelMixin):
|
|||
namesToParse: str = Field(default="", description="Multiline list of names to parse for neutralization", frontend_type="textarea", frontend_readonly=False, frontend_required=False)
|
||||
sharepointSourcePath: str = Field(default="", description="SharePoint path to read files for neutralization", frontend_type="text", frontend_readonly=False, frontend_required=False)
|
||||
sharepointTargetPath: str = Field(default="", description="SharePoint path to store neutralized files", frontend_type="text", frontend_readonly=False, frontend_required=False)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"DataNeutraliserConfig",
|
||||
{"en": "Data Neutralization Config", "fr": "Configuration de neutralisation des données"},
|
||||
|
|
@ -30,7 +28,6 @@ register_model_labels(
|
|||
},
|
||||
)
|
||||
|
||||
|
||||
class DataNeutralizerAttributes(BaseModel, ModelMixin):
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the attribute mapping (used as UID in neutralized files)", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||
mandateId: str = Field(description="ID of the mandate this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=True)
|
||||
|
|
@ -38,8 +35,6 @@ class DataNeutralizerAttributes(BaseModel, ModelMixin):
|
|||
originalText: str = Field(description="Original text that was neutralized", frontend_type="text", frontend_readonly=True, frontend_required=True)
|
||||
fileId: Optional[str] = Field(default=None, description="ID of the file this attribute belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||
patternType: str = Field(description="Type of pattern that matched (email, phone, name, etc.)", frontend_type="text", frontend_readonly=True, frontend_required=True)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"DataNeutralizerAttributes",
|
||||
{"en": "Neutralized Data Attribute", "fr": "Attribut de données neutralisées"},
|
||||
|
|
|
|||
|
|
@ -47,7 +47,8 @@ class Token(BaseModel, ModelMixin):
|
|||
None, description="Mandate ID for tenant scoping of the token"
|
||||
)
|
||||
|
||||
model_config = ConfigDict(use_enum_values=True)
|
||||
class Config:
|
||||
use_enum_values = True
|
||||
|
||||
|
||||
register_model_labels(
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ class TicketFieldAttribute(BaseModel):
|
|||
fieldName: str = Field(description="Human-readable field name")
|
||||
field: str = Field(description="Ticket field ID/key")
|
||||
|
||||
|
||||
class TicketBase(ABC):
|
||||
@abstractmethod
|
||||
async def read_attributes(self) -> list[TicketFieldAttribute]: ...
|
||||
|
|
|
|||
|
|
@ -13,20 +13,17 @@ class AuthAuthority(str, Enum):
|
|||
GOOGLE = "google"
|
||||
MSFT = "msft"
|
||||
|
||||
|
||||
class UserPrivilege(str, Enum):
|
||||
SYSADMIN = "sysadmin"
|
||||
ADMIN = "admin"
|
||||
USER = "user"
|
||||
|
||||
|
||||
class ConnectionStatus(str, Enum):
|
||||
ACTIVE = "active"
|
||||
EXPIRED = "expired"
|
||||
REVOKED = "revoked"
|
||||
PENDING = "pending"
|
||||
|
||||
|
||||
class Mandate(BaseModel, ModelMixin):
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the mandate", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||
name: str = Field(description="Name of the mandate", frontend_type="text", frontend_readonly=False, frontend_required=True)
|
||||
|
|
@ -37,8 +34,6 @@ class Mandate(BaseModel, ModelMixin):
|
|||
{"value": "it", "label": {"en": "Italiano", "fr": "Italien"}},
|
||||
])
|
||||
enabled: bool = Field(default=True, description="Indicates whether the mandate is enabled", frontend_type="checkbox", frontend_readonly=False, frontend_required=False)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"Mandate",
|
||||
{"en": "Mandate", "fr": "Mandat"},
|
||||
|
|
@ -50,7 +45,6 @@ register_model_labels(
|
|||
},
|
||||
)
|
||||
|
||||
|
||||
class UserConnection(BaseModel, ModelMixin):
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the connection", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||
userId: str = Field(description="ID of the user this connection belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||
|
|
@ -77,8 +71,6 @@ class UserConnection(BaseModel, ModelMixin):
|
|||
{"value": "none", "label": {"en": "None", "fr": "Aucun"}},
|
||||
])
|
||||
tokenExpiresAt: Optional[float] = Field(None, description="When the current token expires (UTC timestamp in seconds)", frontend_type="timestamp", frontend_readonly=True, frontend_required=False)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"UserConnection",
|
||||
{"en": "User Connection", "fr": "Connexion utilisateur"},
|
||||
|
|
@ -98,7 +90,6 @@ register_model_labels(
|
|||
},
|
||||
)
|
||||
|
||||
|
||||
class User(BaseModel, ModelMixin):
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique ID of the user", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||
username: str = Field(description="Username for login", frontend_type="text", frontend_readonly=False, frontend_required=True)
|
||||
|
|
@ -122,8 +113,6 @@ class User(BaseModel, ModelMixin):
|
|||
{"value": "msft", "label": {"en": "Microsoft", "fr": "Microsoft"}},
|
||||
])
|
||||
mandateId: Optional[str] = Field(None, description="ID of the mandate this user belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"User",
|
||||
{"en": "User", "fr": "Utilisateur"},
|
||||
|
|
@ -140,15 +129,10 @@ register_model_labels(
|
|||
},
|
||||
)
|
||||
|
||||
|
||||
class UserInDB(User):
|
||||
hashedPassword: Optional[str] = Field(None, description="Hash of the user password")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"UserInDB",
|
||||
{"en": "User Access", "fr": "Accès de l'utilisateur"},
|
||||
{"hashedPassword": {"en": "Password hash", "fr": "Hachage de mot de passe"}},
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -10,8 +10,6 @@ class Prompt(BaseModel, ModelMixin):
|
|||
mandateId: str = Field(description="ID of the mandate this prompt belongs to", frontend_type="text", frontend_readonly=True, frontend_required=False)
|
||||
content: str = Field(description="Content of the prompt", frontend_type="textarea", frontend_readonly=False, frontend_required=True)
|
||||
name: str = Field(description="Name of the prompt", frontend_type="text", frontend_readonly=False, frontend_required=True)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"Prompt",
|
||||
{"en": "Prompt", "fr": "Invite"},
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@ class VoiceSettings(BaseModel, ModelMixin):
|
|||
def to_dict(self) -> Dict[str, Any]:
|
||||
return super().to_dict()
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"VoiceSettings",
|
||||
{"en": "Voice Settings", "fr": "Paramètres vocaux"},
|
||||
|
|
|
|||
|
|
@ -1,10 +1,8 @@
|
|||
"""Web-related modules"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
from typing import List, Optional, Literal
|
||||
from typing import List, Optional, Literal, Dict, Any
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.datamodels.datamodelWorkflow import ActionDocument, ActionResult
|
||||
from modules.datamodels.datamodelChat import ActionDocument, ActionResult
|
||||
|
||||
|
||||
WEB_SEARCH_MAX_QUERY_LENGTH: int = int(APP_CONFIG.get("Web_Search_MAX_QUERY_LENGTH", "400"))
|
||||
|
|
@ -12,130 +10,133 @@ WEB_SEARCH_MAX_RESULTS: int = int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20")
|
|||
WEB_SEARCH_MIN_RESULTS: int = int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1"))
|
||||
|
||||
|
||||
class WebSearchRequest(BaseModel):
|
||||
query: str = Field(min_length=1, max_length=WEB_SEARCH_MAX_QUERY_LENGTH)
|
||||
max_results: int = Field(ge=WEB_SEARCH_MIN_RESULTS, le=WEB_SEARCH_MAX_RESULTS)
|
||||
# Tavily tuning options
|
||||
search_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
||||
time_range: Optional[Literal["d", "w", "m", "y"]] = Field(
|
||||
default=None, description="Limit results to last day/week/month/year"
|
||||
)
|
||||
topic: Optional[Literal["general", "news", "academic"]] = Field(default=None)
|
||||
include_domains: Optional[List[str]] = Field(default=None)
|
||||
exclude_domains: Optional[List[str]] = Field(default=None)
|
||||
language: Optional[str] = Field(default=None, description="ISO language code like 'en', 'de'")
|
||||
include_answer: Optional[bool] = Field(default=None)
|
||||
include_raw_content: Optional[bool] = Field(default=None)
|
||||
class WebResearchOptions(BaseModel):
|
||||
"""Advanced options for web research workflow"""
|
||||
max_pages: int = Field(default=10, ge=1, le=50, description="Maximum pages to crawl")
|
||||
search_depth: Literal["basic", "advanced"] = Field(default="basic", description="Tavily search depth")
|
||||
extract_depth: Literal["basic", "advanced"] = Field(default="advanced", description="Tavily extract depth")
|
||||
format: Literal["text", "markdown"] = Field(default="markdown", description="Content format")
|
||||
return_report: bool = Field(default=True, description="Return formatted report or raw data")
|
||||
pages_search_depth: int = Field(default=1, ge=1, le=5, description="How deep to crawl: 1=main pages only, 2=main+sub-pages, 3=main+sub+sub-sub, etc.")
|
||||
country: Optional[str] = Field(default=None, description="Country code for search bias")
|
||||
time_range: Optional[Literal["d", "w", "m", "y"]] = Field(default=None, description="Time range for search")
|
||||
topic: Optional[Literal["general", "news", "academic"]] = Field(default=None, description="Search topic")
|
||||
language: Optional[str] = Field(default=None, description="Language code")
|
||||
include_answer: Optional[bool] = Field(default=None, description="Include AI answer")
|
||||
include_raw_content: Optional[bool] = Field(default=None, description="Include raw content")
|
||||
|
||||
class WebResearchRequest(BaseModel):
|
||||
"""Main web research request"""
|
||||
user_prompt: str = Field(min_length=1, max_length=WEB_SEARCH_MAX_QUERY_LENGTH, description="User's research question or prompt")
|
||||
urls: Optional[List[str]] = Field(default=None, description="Specific URLs to crawl (optional)")
|
||||
max_results: int = Field(default=5, ge=1, le=WEB_SEARCH_MAX_RESULTS, description="Max search results")
|
||||
options: WebResearchOptions = Field(default_factory=WebResearchOptions, description="Advanced options")
|
||||
|
||||
class WebSearchResultItem(BaseModel):
|
||||
"""Individual search result"""
|
||||
|
||||
title: str
|
||||
url: HttpUrl
|
||||
raw_content: Optional[str] = Field(default=None, description="Raw HTML content")
|
||||
|
||||
class WebCrawlResultItem(BaseModel):
|
||||
"""Individual crawl result"""
|
||||
url: HttpUrl
|
||||
content: str
|
||||
|
||||
class WebResearchDocumentData(BaseModel):
|
||||
"""Complete web research results"""
|
||||
user_prompt: str
|
||||
websites_analyzed: int
|
||||
additional_links_found: int
|
||||
analysis_result: str
|
||||
sources: List[WebSearchResultItem]
|
||||
additional_links: List[str]
|
||||
individual_content: Optional[Dict[str, str]] = None # URL -> content mapping
|
||||
debug_info: Optional[Dict[str, Any]] = None
|
||||
|
||||
class WebResearchActionDocument(ActionDocument):
|
||||
documentData: WebResearchDocumentData
|
||||
|
||||
class WebResearchActionResult(ActionResult):
|
||||
documents: List[WebResearchActionDocument] = Field(default_factory=list)
|
||||
|
||||
# Legacy models for connector compatibility
|
||||
|
||||
class WebSearchDocumentData(BaseModel):
|
||||
"""Complete search (and scrape) results document"""
|
||||
|
||||
query: str = Field(min_length=1, max_length=WEB_SEARCH_MAX_QUERY_LENGTH)
|
||||
# Allow both WebSearchResultItem and WebScrapeResultItem to be stored here
|
||||
results: List[object]
|
||||
"""Search results document data"""
|
||||
query: str
|
||||
results: List[WebSearchResultItem]
|
||||
total_count: int
|
||||
|
||||
|
||||
class WebSearchActionDocument(ActionDocument):
|
||||
documentData: WebSearchDocumentData
|
||||
|
||||
|
||||
class WebSearchActionResult(ActionResult):
|
||||
documents: List[WebSearchActionDocument] = Field(default_factory=list)
|
||||
|
||||
|
||||
class WebSearchBase(ABC):
|
||||
@abstractmethod
|
||||
async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult: ...
|
||||
|
||||
|
||||
# --- Web crawl ---
|
||||
|
||||
|
||||
class WebCrawlRequest(BaseModel):
|
||||
urls: List[HttpUrl]
|
||||
# Tavily extract options
|
||||
extract_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
||||
format: Optional[Literal["text", "markdown"]] = Field(default=None)
|
||||
|
||||
|
||||
class WebCrawlResultItem(BaseModel):
|
||||
"""Individual crawl result"""
|
||||
|
||||
url: HttpUrl
|
||||
content: str
|
||||
|
||||
|
||||
class WebCrawlDocumentData(BaseModel):
|
||||
"""Complete crawl results document"""
|
||||
|
||||
"""Crawl results document data"""
|
||||
urls: List[HttpUrl]
|
||||
results: List[WebCrawlResultItem]
|
||||
total_count: int
|
||||
|
||||
|
||||
class WebCrawlActionDocument(ActionDocument):
|
||||
documentData: WebCrawlDocumentData = Field(
|
||||
description="The data extracted from crawled URLs"
|
||||
)
|
||||
|
||||
documentData: WebCrawlDocumentData
|
||||
|
||||
class WebCrawlActionResult(ActionResult):
|
||||
documents: List[WebCrawlActionDocument] = Field(default_factory=list)
|
||||
|
||||
|
||||
class WebCrawlBase(ABC):
|
||||
@abstractmethod
|
||||
async def crawl_urls(self, request: WebCrawlRequest) -> WebCrawlActionResult: ...
|
||||
|
||||
|
||||
# --- Web scrape ---
|
||||
|
||||
|
||||
class WebScrapeRequest(BaseModel):
|
||||
query: str = Field(min_length=1, max_length=WEB_SEARCH_MAX_QUERY_LENGTH)
|
||||
max_results: int = Field(ge=WEB_SEARCH_MIN_RESULTS, le=WEB_SEARCH_MAX_RESULTS)
|
||||
# Pass-through search options
|
||||
search_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
||||
time_range: Optional[Literal["d", "w", "m", "y"]] = Field(default=None)
|
||||
topic: Optional[Literal["general", "news", "academic"]] = Field(default=None)
|
||||
include_domains: Optional[List[str]] = Field(default=None)
|
||||
exclude_domains: Optional[List[str]] = Field(default=None)
|
||||
language: Optional[str] = Field(default=None)
|
||||
include_answer: Optional[bool] = Field(default=None)
|
||||
include_raw_content: Optional[bool] = Field(default=None)
|
||||
# Extract options
|
||||
extract_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
||||
format: Optional[Literal["text", "markdown"]] = Field(default=None)
|
||||
|
||||
|
||||
class WebScrapeResultItem(BaseModel):
|
||||
"""Individual scrape result"""
|
||||
|
||||
url: HttpUrl
|
||||
content: str
|
||||
|
||||
class WebScrapeDocumentData(BaseModel):
|
||||
"""Scrape results document data"""
|
||||
query: str
|
||||
results: List[WebSearchResultItem]
|
||||
total_count: int
|
||||
|
||||
class WebScrapeActionDocument(ActionDocument):
|
||||
documentData: WebSearchDocumentData = Field(
|
||||
description="The data extracted from scraped URLs"
|
||||
)
|
||||
|
||||
documentData: WebScrapeDocumentData
|
||||
|
||||
class WebScrapeActionResult(ActionResult):
|
||||
documents: List[WebScrapeActionDocument] = Field(default_factory=list)
|
||||
|
||||
class WebSearchRequest(BaseModel):
|
||||
"""Search request for Tavily"""
|
||||
query: str
|
||||
max_results: int = 5
|
||||
search_depth: Optional[Literal["basic", "advanced"]] = None
|
||||
time_range: Optional[Literal["d", "w", "m", "y"]] = None
|
||||
topic: Optional[Literal["general", "news", "academic"]] = None
|
||||
include_domains: Optional[List[str]] = None
|
||||
exclude_domains: Optional[List[str]] = None
|
||||
language: Optional[str] = None
|
||||
include_answer: Optional[bool] = None
|
||||
include_raw_content: Optional[bool] = None
|
||||
auto_parameters: Optional[bool] = None
|
||||
country: Optional[str] = None
|
||||
|
||||
class WebScrapeBase(ABC):
|
||||
@abstractmethod
|
||||
async def scrape(self, request: WebScrapeRequest) -> WebScrapeActionResult: ...
|
||||
class WebCrawlRequest(BaseModel):
|
||||
"""Crawl request for Tavily"""
|
||||
urls: List[HttpUrl]
|
||||
extract_depth: Optional[Literal["basic", "advanced"]] = None
|
||||
format: Optional[Literal["text", "markdown"]] = None
|
||||
|
||||
class WebScrapeRequest(BaseModel):
|
||||
"""Scrape request for Tavily"""
|
||||
query: str
|
||||
max_results: int = 5
|
||||
search_depth: Optional[Literal["basic", "advanced"]] = None
|
||||
time_range: Optional[Literal["d", "w", "m", "y"]] = None
|
||||
topic: Optional[Literal["general", "news", "academic"]] = None
|
||||
include_domains: Optional[List[str]] = None
|
||||
exclude_domains: Optional[List[str]] = None
|
||||
language: Optional[str] = None
|
||||
include_answer: Optional[bool] = None
|
||||
include_raw_content: Optional[bool] = None
|
||||
auto_parameters: Optional[bool] = None
|
||||
country: Optional[str] = None
|
||||
extract_depth: Optional[Literal["basic", "advanced"]] = None
|
||||
format: Optional[Literal["text", "markdown"]] = None
|
||||
|
||||
class WebScrapeResultItem(BaseModel):
|
||||
"""Individual scrape result"""
|
||||
url: HttpUrl
|
||||
content: str
|
||||
|
|
|
|||
|
|
@ -1,474 +0,0 @@
|
|||
"""Workflow-related base datamodels and step/task structures."""
|
||||
|
||||
from enum import Enum
|
||||
from typing import List, Dict, Any, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from modules.shared.attributeUtils import register_model_labels, ModelMixin
|
||||
|
||||
|
||||
class ActionDocument(BaseModel, ModelMixin):
|
||||
"""Clear document structure for action results"""
|
||||
|
||||
documentName: str = Field(description="Name of the document")
|
||||
documentData: Any = Field(description="Content/data of the document")
|
||||
mimeType: str = Field(description="MIME type of the document")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"ActionDocument",
|
||||
{"en": "Action Document", "fr": "Document d'action"},
|
||||
{
|
||||
"documentName": {"en": "Document Name", "fr": "Nom du document"},
|
||||
"documentData": {"en": "Document Data", "fr": "Données du document"},
|
||||
"mimeType": {"en": "MIME Type", "fr": "Type MIME"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class ActionResult(BaseModel, ModelMixin):
|
||||
"""Clean action result with documents as primary output
|
||||
|
||||
IMPORTANT: Action methods should NOT set resultLabel in their return value.
|
||||
The resultLabel is managed by the action handler using the action's execResultLabel
|
||||
from the action plan. This ensures consistent document routing throughout the workflow.
|
||||
"""
|
||||
|
||||
success: bool = Field(description="Whether execution succeeded")
|
||||
error: Optional[str] = Field(None, description="Error message if failed")
|
||||
documents: List[ActionDocument] = Field(
|
||||
default_factory=list, description="Document outputs"
|
||||
)
|
||||
resultLabel: Optional[str] = Field(
|
||||
None,
|
||||
description="Label for document routing (set by action handler, not by action methods)",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def isSuccess(cls, documents: List[ActionDocument] = None) -> "ActionResult":
|
||||
return cls(success=True, documents=documents or [])
|
||||
|
||||
@classmethod
|
||||
def isFailure(
|
||||
cls, error: str, documents: List[ActionDocument] = None
|
||||
) -> "ActionResult":
|
||||
return cls(success=False, documents=documents or [], error=error)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"ActionResult",
|
||||
{"en": "Action Result", "fr": "Résultat de l'action"},
|
||||
{
|
||||
"success": {"en": "Success", "fr": "Succès"},
|
||||
"error": {"en": "Error", "fr": "Erreur"},
|
||||
"documents": {"en": "Documents", "fr": "Documents"},
|
||||
"resultLabel": {"en": "Result Label", "fr": "Étiquette du résultat"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class ActionSelection(BaseModel, ModelMixin):
|
||||
method: str = Field(description="Method to execute (e.g., web, document, ai)")
|
||||
name: str = Field(
|
||||
description="Action name within the method (e.g., search, extract)"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"ActionSelection",
|
||||
{"en": "Action Selection", "fr": "Sélection d'action"},
|
||||
{
|
||||
"method": {"en": "Method", "fr": "Méthode"},
|
||||
"name": {"en": "Action Name", "fr": "Nom de l'action"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class ActionParameters(BaseModel, ModelMixin):
|
||||
parameters: Dict[str, Any] = Field(
|
||||
default_factory=dict, description="Parameters to execute the selected action"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"ActionParameters",
|
||||
{"en": "Action Parameters", "fr": "Paramètres d'action"},
|
||||
{
|
||||
"parameters": {"en": "Parameters", "fr": "Paramètres"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class ObservationPreview(BaseModel, ModelMixin):
|
||||
name: str = Field(description="Document name or URL label")
|
||||
mime: str = Field(description="MIME type or kind")
|
||||
snippet: str = Field(description="Short snippet or summary")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"ObservationPreview",
|
||||
{"en": "Observation Preview", "fr": "Aperçu d'observation"},
|
||||
{
|
||||
"name": {"en": "Name", "fr": "Nom"},
|
||||
"mime": {"en": "MIME", "fr": "MIME"},
|
||||
"snippet": {"en": "Snippet", "fr": "Extrait"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class Observation(BaseModel, ModelMixin):
|
||||
success: bool = Field(description="Action execution success flag")
|
||||
resultLabel: str = Field(description="Deterministic label for produced documents")
|
||||
documentsCount: int = Field(description="Number of produced documents")
|
||||
previews: List[ObservationPreview] = Field(
|
||||
default_factory=list, description="Compact previews of outputs"
|
||||
)
|
||||
notes: List[str] = Field(
|
||||
default_factory=list, description="Short notes or key facts"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"Observation",
|
||||
{"en": "Observation", "fr": "Observation"},
|
||||
{
|
||||
"success": {"en": "Success", "fr": "Succès"},
|
||||
"resultLabel": {"en": "Result Label", "fr": "Étiquette du résultat"},
|
||||
"documentsCount": {"en": "Documents Count", "fr": "Nombre de documents"},
|
||||
"previews": {"en": "Previews", "fr": "Aperçus"},
|
||||
"notes": {"en": "Notes", "fr": "Notes"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TaskStatus(str, Enum):
|
||||
"""Task status enumeration."""
|
||||
|
||||
PENDING = "pending"
|
||||
RUNNING = "running"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
CANCELLED = "cancelled"
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"TaskStatus",
|
||||
{"en": "Task Status", "fr": "Statut de la tâche"},
|
||||
{
|
||||
"PENDING": {"en": "Pending", "fr": "En attente"},
|
||||
"RUNNING": {"en": "Running", "fr": "En cours"},
|
||||
"COMPLETED": {"en": "Completed", "fr": "Terminé"},
|
||||
"FAILED": {"en": "Failed", "fr": "Échec"},
|
||||
"CANCELLED": {"en": "Cancelled", "fr": "Annulé"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class DocumentExchange(BaseModel, ModelMixin):
|
||||
documentsLabel: str = Field(description="Label for the set of documents")
|
||||
documents: List[str] = Field(
|
||||
default_factory=list, description="List of document references"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"DocumentExchange",
|
||||
{"en": "Document Exchange", "fr": "Échange de documents"},
|
||||
{
|
||||
"documentsLabel": {"en": "Documents Label", "fr": "Label des documents"},
|
||||
"documents": {"en": "Documents", "fr": "Documents"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TaskAction(BaseModel, ModelMixin):
|
||||
id: str = Field(..., description="Action ID")
|
||||
execMethod: str = Field(..., description="Method to execute")
|
||||
execAction: str = Field(..., description="Action to perform")
|
||||
execParameters: Dict[str, Any] = Field(
|
||||
default_factory=dict, description="Action parameters"
|
||||
)
|
||||
execResultLabel: Optional[str] = Field(
|
||||
None, description="Label for the set of result documents"
|
||||
)
|
||||
expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field(
|
||||
None, description="Expected document formats (optional)"
|
||||
)
|
||||
userMessage: Optional[str] = Field(
|
||||
None, description="User-friendly message in user's language"
|
||||
)
|
||||
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status")
|
||||
error: Optional[str] = Field(None, description="Error message if action failed")
|
||||
retryCount: int = Field(default=0, description="Number of retries attempted")
|
||||
retryMax: int = Field(default=3, description="Maximum number of retries")
|
||||
processingTime: Optional[float] = Field(
|
||||
None, description="Processing time in seconds"
|
||||
)
|
||||
timestamp: float = Field(
|
||||
..., description="When the action was executed (UTC timestamp in seconds)"
|
||||
)
|
||||
result: Optional[str] = Field(None, description="Result of the action")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"TaskAction",
|
||||
{"en": "Task Action", "fr": "Action de tâche"},
|
||||
{
|
||||
"id": {"en": "Action ID", "fr": "ID de l'action"},
|
||||
"execMethod": {"en": "Method", "fr": "Méthode"},
|
||||
"execAction": {"en": "Action", "fr": "Action"},
|
||||
"execParameters": {"en": "Parameters", "fr": "Paramètres"},
|
||||
"execResultLabel": {"en": "Result Label", "fr": "Label du résultat"},
|
||||
"expectedDocumentFormats": {
|
||||
"en": "Expected Document Formats",
|
||||
"fr": "Formats de documents attendus",
|
||||
},
|
||||
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
||||
"status": {"en": "Status", "fr": "Statut"},
|
||||
"error": {"en": "Error", "fr": "Erreur"},
|
||||
"retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"},
|
||||
"retryMax": {"en": "Max Retries", "fr": "Tentatives max"},
|
||||
"processingTime": {"en": "Processing Time", "fr": "Temps de traitement"},
|
||||
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
|
||||
"result": {"en": "Result", "fr": "Résultat"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TaskResult(BaseModel, ModelMixin):
|
||||
taskId: str = Field(..., description="Task ID")
|
||||
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status")
|
||||
success: bool = Field(..., description="Whether the task was successful")
|
||||
feedback: Optional[str] = Field(None, description="Task feedback message")
|
||||
error: Optional[str] = Field(None, description="Error message if task failed")
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"TaskResult",
|
||||
{"en": "Task Result", "fr": "Résultat de tâche"},
|
||||
{
|
||||
"taskId": {"en": "Task ID", "fr": "ID de la tâche"},
|
||||
"status": {"en": "Status", "fr": "Statut"},
|
||||
"success": {"en": "Success", "fr": "Succès"},
|
||||
"feedback": {"en": "Feedback", "fr": "Retour"},
|
||||
"error": {"en": "Error", "fr": "Erreur"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TaskItem(BaseModel, ModelMixin):
|
||||
id: str = Field(..., description="Task ID")
|
||||
workflowId: str = Field(..., description="Workflow ID")
|
||||
userInput: str = Field(..., description="User input that triggered the task")
|
||||
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Task status")
|
||||
error: Optional[str] = Field(None, description="Error message if task failed")
|
||||
startedAt: Optional[float] = Field(
|
||||
None, description="When the task started (UTC timestamp in seconds)"
|
||||
)
|
||||
finishedAt: Optional[float] = Field(
|
||||
None, description="When the task finished (UTC timestamp in seconds)"
|
||||
)
|
||||
actionList: List[TaskAction] = Field(
|
||||
default_factory=list, description="List of actions to execute"
|
||||
)
|
||||
retryCount: int = Field(default=0, description="Number of retries attempted")
|
||||
retryMax: int = Field(default=3, description="Maximum number of retries")
|
||||
rollbackOnFailure: bool = Field(
|
||||
default=True, description="Whether to rollback on failure"
|
||||
)
|
||||
dependencies: List[str] = Field(
|
||||
default_factory=list, description="List of task IDs this task depends on"
|
||||
)
|
||||
feedback: Optional[str] = Field(None, description="Task feedback message")
|
||||
processingTime: Optional[float] = Field(
|
||||
None, description="Total processing time in seconds"
|
||||
)
|
||||
resultLabels: Optional[Dict[str, Any]] = Field(
|
||||
default_factory=dict, description="Map of result labels to their values"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"TaskItem",
|
||||
{"en": "Task", "fr": "Tâche"},
|
||||
{
|
||||
"id": {"en": "Task ID", "fr": "ID de la tâche"},
|
||||
"workflowId": {"en": "Workflow ID", "fr": "ID du workflow"},
|
||||
"userInput": {"en": "User Input", "fr": "Entrée utilisateur"},
|
||||
"status": {"en": "Status", "fr": "Statut"},
|
||||
"error": {"en": "Error", "fr": "Erreur"},
|
||||
"startedAt": {"en": "Started At", "fr": "Démarré à"},
|
||||
"finishedAt": {"en": "Finished At", "fr": "Terminé à"},
|
||||
"actionList": {"en": "Actions", "fr": "Actions"},
|
||||
"retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"},
|
||||
"retryMax": {"en": "Max Retries", "fr": "Tentatives max"},
|
||||
"processingTime": {"en": "Processing Time", "fr": "Temps de traitement"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TaskStep(BaseModel, ModelMixin):
|
||||
id: str
|
||||
objective: str
|
||||
dependencies: Optional[list[str]] = Field(default_factory=list)
|
||||
success_criteria: Optional[list[str]] = Field(default_factory=list)
|
||||
estimated_complexity: Optional[str] = None
|
||||
userMessage: Optional[str] = Field(
|
||||
None, description="User-friendly message in user's language"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"TaskStep",
|
||||
{"en": "Task Step", "fr": "Étape de tâche"},
|
||||
{
|
||||
"id": {"en": "ID", "fr": "ID"},
|
||||
"objective": {"en": "Objective", "fr": "Objectif"},
|
||||
"dependencies": {"en": "Dependencies", "fr": "Dépendances"},
|
||||
"success_criteria": {"en": "Success Criteria", "fr": "Critères de succès"},
|
||||
"estimated_complexity": {
|
||||
"en": "Estimated Complexity",
|
||||
"fr": "Complexité estimée",
|
||||
},
|
||||
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TaskHandover(BaseModel, ModelMixin):
|
||||
taskId: str = Field(description="Target task ID")
|
||||
sourceTask: Optional[str] = Field(None, description="Source task ID")
|
||||
inputDocuments: List[DocumentExchange] = Field(
|
||||
default_factory=list, description="Available input documents"
|
||||
)
|
||||
outputDocuments: List[DocumentExchange] = Field(
|
||||
default_factory=list, description="Produced output documents"
|
||||
)
|
||||
context: Dict[str, Any] = Field(default_factory=dict, description="Task context")
|
||||
previousResults: List[str] = Field(
|
||||
default_factory=list, description="Previous result summaries"
|
||||
)
|
||||
improvements: List[str] = Field(
|
||||
default_factory=list, description="Improvement suggestions"
|
||||
)
|
||||
workflowSummary: Optional[str] = Field(
|
||||
None, description="Summarized workflow context"
|
||||
)
|
||||
messageHistory: List[str] = Field(
|
||||
default_factory=list, description="Key message summaries"
|
||||
)
|
||||
timestamp: float = Field(
|
||||
..., description="When the handover was created (UTC timestamp in seconds)"
|
||||
)
|
||||
handoverType: str = Field(
|
||||
default="task", description="Type of handover: task, phase, or workflow"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"TaskHandover",
|
||||
{"en": "Task Handover", "fr": "Transfert de tâche"},
|
||||
{
|
||||
"taskId": {"en": "Task ID", "fr": "ID de la tâche"},
|
||||
"sourceTask": {"en": "Source Task", "fr": "Tâche source"},
|
||||
"inputDocuments": {"en": "Input Documents", "fr": "Documents d'entrée"},
|
||||
"outputDocuments": {"en": "Output Documents", "fr": "Documents de sortie"},
|
||||
"context": {"en": "Context", "fr": "Contexte"},
|
||||
"previousResults": {"en": "Previous Results", "fr": "Résultats précédents"},
|
||||
"improvements": {"en": "Improvements", "fr": "Améliorations"},
|
||||
"workflowSummary": {"en": "Workflow Summary", "fr": "Résumé du workflow"},
|
||||
"messageHistory": {"en": "Message History", "fr": "Historique des messages"},
|
||||
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
|
||||
"handoverType": {"en": "Handover Type", "fr": "Type de transfert"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TaskContext(BaseModel, ModelMixin):
|
||||
task_step: TaskStep
|
||||
workflow: Optional["ChatWorkflow"] = None
|
||||
workflow_id: Optional[str] = None
|
||||
available_documents: Optional[str] = "No documents available"
|
||||
available_connections: Optional[list[str]] = Field(default_factory=list)
|
||||
previous_results: Optional[list[str]] = Field(default_factory=list)
|
||||
previous_handover: Optional[TaskHandover] = None
|
||||
improvements: Optional[list[str]] = Field(default_factory=list)
|
||||
retry_count: Optional[int] = 0
|
||||
previous_action_results: Optional[list] = Field(default_factory=list)
|
||||
previous_review_result: Optional[dict] = None
|
||||
is_regeneration: Optional[bool] = False
|
||||
failure_patterns: Optional[list[str]] = Field(default_factory=list)
|
||||
failed_actions: Optional[list] = Field(default_factory=list)
|
||||
successful_actions: Optional[list] = Field(default_factory=list)
|
||||
criteria_progress: Optional[dict] = None
|
||||
|
||||
def getDocumentReferences(self) -> List[str]:
|
||||
docs = []
|
||||
if self.previous_handover:
|
||||
for doc_exchange in self.previous_handover.inputDocuments:
|
||||
docs.extend(doc_exchange.documents)
|
||||
return list(set(docs))
|
||||
|
||||
def addImprovement(self, improvement: str) -> None:
|
||||
if improvement not in (self.improvements or []):
|
||||
if self.improvements is None:
|
||||
self.improvements = []
|
||||
self.improvements.append(improvement)
|
||||
|
||||
|
||||
class ReviewContext(BaseModel, ModelMixin):
|
||||
task_step: TaskStep
|
||||
task_actions: Optional[list] = Field(default_factory=list)
|
||||
action_results: Optional[list] = Field(default_factory=list)
|
||||
step_result: Optional[dict] = Field(default_factory=dict)
|
||||
workflow_id: Optional[str] = None
|
||||
previous_results: Optional[list[str]] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ReviewResult(BaseModel, ModelMixin):
|
||||
status: str
|
||||
reason: Optional[str] = None
|
||||
improvements: Optional[list[str]] = Field(default_factory=list)
|
||||
quality_score: Optional[int] = 5
|
||||
missing_outputs: Optional[list[str]] = Field(default_factory=list)
|
||||
met_criteria: Optional[list[str]] = Field(default_factory=list)
|
||||
unmet_criteria: Optional[list[str]] = Field(default_factory=list)
|
||||
confidence: Optional[float] = 0.5
|
||||
userMessage: Optional[str] = Field(
|
||||
None, description="User-friendly message in user's language"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"ReviewResult",
|
||||
{"en": "Review Result", "fr": "Résultat de l'évaluation"},
|
||||
{
|
||||
"status": {"en": "Status", "fr": "Statut"},
|
||||
"reason": {"en": "Reason", "fr": "Raison"},
|
||||
"improvements": {"en": "Improvements", "fr": "Améliorations"},
|
||||
"quality_score": {"en": "Quality Score", "fr": "Score de qualité"},
|
||||
"missing_outputs": {"en": "Missing Outputs", "fr": "Sorties manquantes"},
|
||||
"met_criteria": {"en": "Met Criteria", "fr": "Critères respectés"},
|
||||
"unmet_criteria": {"en": "Unmet Criteria", "fr": "Critères non respectés"},
|
||||
"confidence": {"en": "Confidence", "fr": "Confiance"},
|
||||
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
class TaskPlan(BaseModel, ModelMixin):
|
||||
overview: str
|
||||
tasks: list[TaskStep]
|
||||
userMessage: Optional[str] = Field(
|
||||
None, description="Overall user-friendly message for the task plan"
|
||||
)
|
||||
|
||||
|
||||
register_model_labels(
|
||||
"TaskPlan",
|
||||
{"en": "Task Plan", "fr": "Plan de tâches"},
|
||||
{
|
||||
"overview": {"en": "Overview", "fr": "Aperçu"},
|
||||
"tasks": {"en": "Tasks", "fr": "Tâches"},
|
||||
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
|
||||
},
|
||||
)
|
||||
|
|
@ -1,10 +1,12 @@
|
|||
import logging
|
||||
from typing import Dict, Any, List, Union
|
||||
from typing import Dict, Any, List, Union, Tuple, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from modules.connectors.connectorAiOpenai import AiOpenai
|
||||
from modules.connectors.connectorAiAnthropic import AiAnthropic
|
||||
from modules.connectors.connectorAiLangdoc import AiLangdoc
|
||||
from modules.connectors.connectorAiPerplexity import AiPerplexity
|
||||
from modules.connectors.connectorAiTavily import ConnectorWeb
|
||||
from modules.datamodels.datamodelAi import (
|
||||
AiCallOptions,
|
||||
|
|
@ -18,26 +20,14 @@ from modules.datamodels.datamodelAi import (
|
|||
PROCESSING_MODE_PRIORITY_MAPPING
|
||||
)
|
||||
from modules.datamodels.datamodelWeb import (
|
||||
WebCrawlActionResult,
|
||||
WebCrawlActionDocument,
|
||||
WebCrawlDocumentData,
|
||||
WebCrawlRequest,
|
||||
WebCrawlResultItem,
|
||||
WebScrapeActionResult,
|
||||
WebScrapeActionDocument,
|
||||
WebSearchDocumentData as WebScrapeDocumentData,
|
||||
WebScrapeRequest,
|
||||
WebScrapeResultItem,
|
||||
WebSearchActionResult,
|
||||
WebSearchActionDocument,
|
||||
WebSearchDocumentData,
|
||||
WebSearchRequest,
|
||||
WebResearchRequest,
|
||||
WebResearchActionResult,
|
||||
WebSearchResultItem,
|
||||
WebCrawlResultItem,
|
||||
WebSearchRequest,
|
||||
WebCrawlRequest,
|
||||
)
|
||||
from modules.datamodels.datamodelWorkflow import ActionDocument
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from modules.datamodels.datamodelChat import ActionDocument
|
||||
|
||||
|
||||
# Comprehensive model registry with capability tags and function mapping
|
||||
|
|
@ -52,8 +42,8 @@ aiModels: Dict[str, Dict[str, Any]] = {
|
|||
"costPer1kTokensOutput": 0.06,
|
||||
"speedRating": 8,
|
||||
"qualityRating": 9,
|
||||
"capabilities": ["text_generation", "chat", "reasoning"],
|
||||
"tags": ["text", "chat", "reasoning", "general"]
|
||||
"capabilities": ["text_generation", "chat", "reasoning", "analysis"],
|
||||
"tags": ["text", "chat", "reasoning", "analysis", "general"]
|
||||
},
|
||||
"openai_callAiBasic_gpt35": {
|
||||
"connector": "openai",
|
||||
|
|
@ -118,90 +108,66 @@ aiModels: Dict[str, Dict[str, Any]] = {
|
|||
"tags": ["image", "vision", "multimodal", "high_quality"]
|
||||
},
|
||||
|
||||
# LangDoc Models
|
||||
"langdoc_callAiBasic": {
|
||||
"connector": "langdoc",
|
||||
# Perplexity Models
|
||||
"perplexity_callAiBasic": {
|
||||
"connector": "perplexity",
|
||||
"function": "callAiBasic",
|
||||
"llmName": "gpt-4o",
|
||||
"llmName": "llama-3.1-sonar-large-128k-online",
|
||||
"contextLength": 128000,
|
||||
"costPer1kTokens": 0.02,
|
||||
"costPer1kTokensOutput": 0.04,
|
||||
"costPer1kTokens": 0.005,
|
||||
"costPer1kTokensOutput": 0.005,
|
||||
"speedRating": 8,
|
||||
"qualityRating": 9,
|
||||
"capabilities": ["text_generation", "chat", "reasoning"],
|
||||
"tags": ["text", "chat", "reasoning", "general", "cost_effective"]
|
||||
"qualityRating": 8,
|
||||
"capabilities": ["text_generation", "chat", "reasoning", "web_search"],
|
||||
"tags": ["text", "chat", "reasoning", "web_search", "cost_effective"]
|
||||
},
|
||||
"langdoc_callAiImage": {
|
||||
"connector": "langdoc",
|
||||
"function": "callAiImage",
|
||||
"llmName": "gpt-4o",
|
||||
"perplexity_callAiWithWebSearch": {
|
||||
"connector": "perplexity",
|
||||
"function": "callAiWithWebSearch",
|
||||
"llmName": "sonar-pro",
|
||||
"contextLength": 128000,
|
||||
"costPer1kTokens": 0.02,
|
||||
"costPer1kTokensOutput": 0.04,
|
||||
"costPer1kTokens": 0.01,
|
||||
"costPer1kTokensOutput": 0.01,
|
||||
"speedRating": 7,
|
||||
"qualityRating": 9,
|
||||
"capabilities": ["image_analysis", "vision", "multimodal"],
|
||||
"tags": ["image", "vision", "multimodal", "cost_effective"]
|
||||
"capabilities": ["text_generation", "web_search", "research"],
|
||||
"tags": ["text", "web_search", "research", "high_quality"]
|
||||
},
|
||||
"langdoc_generateImage": {
|
||||
"connector": "langdoc",
|
||||
"function": "generateImage",
|
||||
"llmName": "dall-e-3",
|
||||
"contextLength": 0,
|
||||
"costPer1kTokens": 0.04,
|
||||
"costPer1kTokensOutput": 0.0,
|
||||
"speedRating": 6,
|
||||
"qualityRating": 9,
|
||||
"capabilities": ["image_generation", "art", "visual_creation"],
|
||||
"tags": ["image_generation", "art", "visual", "cost_effective"]
|
||||
},
|
||||
"langdoc_generateImageWithVariations": {
|
||||
"connector": "langdoc",
|
||||
"function": "generateImageWithVariations",
|
||||
"llmName": "dall-e-3",
|
||||
"contextLength": 0,
|
||||
"costPer1kTokens": 0.04,
|
||||
"costPer1kTokensOutput": 0.0,
|
||||
"speedRating": 5,
|
||||
"qualityRating": 9,
|
||||
"capabilities": ["image_generation", "art", "visual_creation", "variations"],
|
||||
"tags": ["image_generation", "art", "visual", "variations", "cost_effective"]
|
||||
},
|
||||
"langdoc_generateImageWithChat": {
|
||||
"connector": "langdoc",
|
||||
"function": "generateImageWithChat",
|
||||
"llmName": "gpt-4o",
|
||||
"contextLength": 128000,
|
||||
"costPer1kTokens": 0.02,
|
||||
"costPer1kTokensOutput": 0.04,
|
||||
"speedRating": 6,
|
||||
"perplexity_researchTopic": {
|
||||
"connector": "perplexity",
|
||||
"function": "researchTopic",
|
||||
"llmName": "mistral-7b-instruct",
|
||||
"contextLength": 32000,
|
||||
"costPer1kTokens": 0.002,
|
||||
"costPer1kTokensOutput": 0.002,
|
||||
"speedRating": 8,
|
||||
"qualityRating": 8,
|
||||
"capabilities": ["image_generation", "chat", "visual_creation"],
|
||||
"tags": ["image_generation", "chat", "visual", "cost_effective"]
|
||||
"capabilities": ["web_search", "research", "information_gathering"],
|
||||
"tags": ["web_search", "research", "information", "cost_effective"]
|
||||
},
|
||||
"langdoc_listModels": {
|
||||
"connector": "langdoc",
|
||||
"function": "listModels",
|
||||
"llmName": "api",
|
||||
"contextLength": 0,
|
||||
"costPer1kTokens": 0.0,
|
||||
"costPer1kTokensOutput": 0.0,
|
||||
"speedRating": 9,
|
||||
"qualityRating": 5,
|
||||
"capabilities": ["model_listing", "api_info"],
|
||||
"tags": ["api", "info", "models"]
|
||||
"perplexity_answerQuestion": {
|
||||
"connector": "perplexity",
|
||||
"function": "answerQuestion",
|
||||
"llmName": "mistral-7b-instruct",
|
||||
"contextLength": 32000,
|
||||
"costPer1kTokens": 0.002,
|
||||
"costPer1kTokensOutput": 0.002,
|
||||
"speedRating": 8,
|
||||
"qualityRating": 8,
|
||||
"capabilities": ["web_search", "question_answering", "research"],
|
||||
"tags": ["web_search", "qa", "research", "cost_effective"]
|
||||
},
|
||||
"langdoc_getModelInfo": {
|
||||
"connector": "langdoc",
|
||||
"function": "getModelInfo",
|
||||
"llmName": "api",
|
||||
"contextLength": 0,
|
||||
"costPer1kTokens": 0.0,
|
||||
"costPer1kTokensOutput": 0.0,
|
||||
"speedRating": 9,
|
||||
"qualityRating": 5,
|
||||
"capabilities": ["model_info", "api_info"],
|
||||
"tags": ["api", "info", "models"]
|
||||
"perplexity_getCurrentNews": {
|
||||
"connector": "perplexity",
|
||||
"function": "getCurrentNews",
|
||||
"llmName": "mistral-7b-instruct",
|
||||
"contextLength": 32000,
|
||||
"costPer1kTokens": 0.002,
|
||||
"costPer1kTokensOutput": 0.002,
|
||||
"speedRating": 8,
|
||||
"qualityRating": 8,
|
||||
"capabilities": ["web_search", "news", "current_events"],
|
||||
"tags": ["web_search", "news", "current_events", "cost_effective"]
|
||||
},
|
||||
|
||||
# Tavily Web Models
|
||||
|
|
@ -250,7 +216,7 @@ class AiObjects:
|
|||
|
||||
openaiService: AiOpenai
|
||||
anthropicService: AiAnthropic
|
||||
langdocService: AiLangdoc
|
||||
perplexityService: AiPerplexity
|
||||
tavilyService: ConnectorWeb
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
|
|
@ -258,8 +224,8 @@ class AiObjects:
|
|||
raise TypeError("openaiService must be provided")
|
||||
if self.anthropicService is None:
|
||||
raise TypeError("anthropicService must be provided")
|
||||
if self.langdocService is None:
|
||||
raise TypeError("langdocService must be provided")
|
||||
if self.perplexityService is None:
|
||||
raise TypeError("perplexityService must be provided")
|
||||
if self.tavilyService is None:
|
||||
raise TypeError("tavilyService must be provided")
|
||||
|
||||
|
|
@ -268,13 +234,13 @@ class AiObjects:
|
|||
"""Create AiObjects instance with all connectors initialized."""
|
||||
openaiService = AiOpenai()
|
||||
anthropicService = AiAnthropic()
|
||||
langdocService = AiLangdoc()
|
||||
perplexityService = AiPerplexity()
|
||||
tavilyService = await ConnectorWeb.create()
|
||||
|
||||
return cls(
|
||||
openaiService=openaiService,
|
||||
anthropicService=anthropicService,
|
||||
langdocService=langdocService,
|
||||
perplexityService=perplexityService,
|
||||
tavilyService=tavilyService
|
||||
)
|
||||
|
||||
|
|
@ -330,11 +296,22 @@ class AiObjects:
|
|||
elif options.operationType == OperationType.IMAGE_GENERATION:
|
||||
return "openai_generateImage"
|
||||
elif options.operationType == OperationType.WEB_RESEARCH:
|
||||
return "langdoc_callAiBasic"
|
||||
return "perplexity_callAiWithWebSearch"
|
||||
else:
|
||||
return "openai_callAiBasic_gpt35"
|
||||
|
||||
# Select based on priority
|
||||
# Special handling for planning operations - use Claude for consistency
|
||||
if options.operationType in [OperationType.GENERATE_PLAN, OperationType.ANALYSE_CONTENT]:
|
||||
if "anthropic_callAiBasic" in candidates:
|
||||
logger.info("Planning operation: Selected Claude (anthropic_callAiBasic) for highest quality")
|
||||
return "anthropic_callAiBasic"
|
||||
|
||||
# Fallback to GPT-4o if Claude not available
|
||||
if "openai_callAiBasic" in candidates:
|
||||
logger.info("Planning operation: Selected GPT-4o (openai_callAiBasic) as fallback")
|
||||
return "openai_callAiBasic"
|
||||
|
||||
# Select based on priority for other operations
|
||||
if effectivePriority == Priority.SPEED:
|
||||
return max(candidates, key=lambda k: candidates[k]["speedRating"])
|
||||
elif effectivePriority == Priority.QUALITY:
|
||||
|
|
@ -355,8 +332,8 @@ class AiObjects:
|
|||
return self.openaiService
|
||||
elif connectorType == "anthropic":
|
||||
return self.anthropicService
|
||||
elif connectorType == "langdoc":
|
||||
return self.langdocService
|
||||
elif connectorType == "perplexity":
|
||||
return self.perplexityService
|
||||
elif connectorType == "tavily":
|
||||
return self.tavilyService
|
||||
else:
|
||||
|
|
@ -383,6 +360,17 @@ class AiObjects:
|
|||
# Select model for text generation
|
||||
modelName = self._selectModel(prompt, context, options)
|
||||
|
||||
# Derive generation parameters
|
||||
temperature = getattr(options, "temperature", None)
|
||||
if temperature is None:
|
||||
temperature = 0.2
|
||||
maxTokens = getattr(options, "maxTokens", None)
|
||||
# Provide a generous default to avoid truncation for long outputs
|
||||
if maxTokens is None:
|
||||
# If resultFormat suggests large outputs (e.g., html, json), allow more tokens
|
||||
wants_large = str(getattr(options, "resultFormat", "")).lower() in ["html", "json", "md", "markdown"]
|
||||
maxTokens = 8000 if wants_large else 2000
|
||||
|
||||
messages: List[Dict[str, Any]] = []
|
||||
if context:
|
||||
messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
|
||||
|
|
@ -394,10 +382,27 @@ class AiObjects:
|
|||
# Call the appropriate function
|
||||
if functionName == "callAiBasic":
|
||||
if aiModels[modelName]["connector"] == "openai":
|
||||
content = await connector.callAiBasic(messages)
|
||||
content = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
|
||||
elif aiModels[modelName]["connector"] == "perplexity":
|
||||
content = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
|
||||
else:
|
||||
response = await connector.callAiBasic(messages)
|
||||
response = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
|
||||
content = response["choices"][0]["message"]["content"]
|
||||
elif functionName == "callAiWithWebSearch":
|
||||
# Perplexity web search function
|
||||
query = prompt
|
||||
if context:
|
||||
query = f"Context: {context}\n\nQuery: {prompt}"
|
||||
content = await connector.callAiWithWebSearch(query)
|
||||
elif functionName == "researchTopic":
|
||||
# Perplexity research function
|
||||
content = await connector.researchTopic(prompt)
|
||||
elif functionName == "answerQuestion":
|
||||
# Perplexity question answering function
|
||||
content = await connector.answerQuestion(prompt, context)
|
||||
elif functionName == "getCurrentNews":
|
||||
# Perplexity news function
|
||||
content = await connector.getCurrentNews(prompt)
|
||||
else:
|
||||
raise ValueError(f"Function {functionName} not supported for text generation")
|
||||
|
||||
|
|
@ -446,21 +451,331 @@ class AiObjects:
|
|||
else:
|
||||
raise ValueError(f"Function {functionName} not supported for image generation")
|
||||
|
||||
# Web functionality methods
|
||||
async def webSearch(self, web_search_request: WebSearchRequest) -> WebSearchActionResult:
|
||||
"""Perform web search using Tavily."""
|
||||
return await self.tavilyService.search(web_search_request)
|
||||
# Web functionality methods - Simple interface to Tavily connector
|
||||
async def search_websites(self, query: str, max_results: int = 5, **kwargs) -> List[WebSearchResultItem]:
|
||||
"""Search for websites using Tavily."""
|
||||
request = WebSearchRequest(
|
||||
query=query,
|
||||
max_results=max_results,
|
||||
**kwargs
|
||||
)
|
||||
result = await self.tavilyService.search(request)
|
||||
|
||||
async def webCrawl(self, web_crawl_request: WebCrawlRequest) -> WebCrawlActionResult:
|
||||
"""Crawl web pages using Tavily."""
|
||||
return await self.tavilyService.crawl(web_crawl_request)
|
||||
if result.success and result.documents:
|
||||
return result.documents[0].documentData.results
|
||||
return []
|
||||
|
||||
async def webScrape(self, web_scrape_request: WebScrapeRequest) -> WebScrapeActionResult:
|
||||
"""Scrape web content using Tavily."""
|
||||
return await self.tavilyService.scrape(web_scrape_request)
|
||||
async def crawl_websites(self, urls: List[str], extract_depth: str = "advanced", format: str = "markdown") -> List[WebCrawlResultItem]:
|
||||
"""Crawl websites using Tavily."""
|
||||
from pydantic import HttpUrl
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# Safely create HttpUrl objects with proper scheme handling
|
||||
http_urls = []
|
||||
for url in urls:
|
||||
try:
|
||||
# Ensure URL has a scheme
|
||||
parsed = urlparse(url)
|
||||
if not parsed.scheme:
|
||||
url = f"https://{url}"
|
||||
|
||||
# Use HttpUrl with scheme parameter (this works for all URLs)
|
||||
http_urls.append(HttpUrl(url, scheme="https"))
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Skipping invalid URL {url}: {e}")
|
||||
continue
|
||||
|
||||
if not http_urls:
|
||||
return []
|
||||
|
||||
request = WebCrawlRequest(
|
||||
urls=http_urls,
|
||||
extract_depth=extract_depth,
|
||||
format=format
|
||||
)
|
||||
result = await self.tavilyService.crawl(request)
|
||||
|
||||
if result.success and result.documents:
|
||||
return result.documents[0].documentData.results
|
||||
return []
|
||||
|
||||
async def extract_content(self, urls: List[str], extract_depth: str = "advanced", format: str = "markdown") -> Dict[str, str]:
|
||||
"""Extract content from URLs and return as dictionary."""
|
||||
crawl_results = await self.crawl_websites(urls, extract_depth, format)
|
||||
return {str(result.url): result.content for result in crawl_results}
|
||||
|
||||
# Core Web Tools - Clean interface for web operations
|
||||
async def readPage(self, url: str, extract_depth: str = "advanced") -> Optional[str]:
|
||||
"""Read a single web page and return its content (HTML/Markdown)."""
|
||||
logger.debug(f"Reading page: {url}")
|
||||
try:
|
||||
# URL encode the URL to handle spaces and special characters
|
||||
from urllib.parse import quote, urlparse, urlunparse
|
||||
parsed = urlparse(url)
|
||||
encoded_url = urlunparse((
|
||||
parsed.scheme,
|
||||
parsed.netloc,
|
||||
parsed.path,
|
||||
parsed.params,
|
||||
parsed.query,
|
||||
parsed.fragment
|
||||
))
|
||||
|
||||
# Manually encode query parameters to handle spaces
|
||||
if parsed.query:
|
||||
encoded_query = quote(parsed.query, safe='=&')
|
||||
encoded_url = urlunparse((
|
||||
parsed.scheme,
|
||||
parsed.netloc,
|
||||
parsed.path,
|
||||
parsed.params,
|
||||
encoded_query,
|
||||
parsed.fragment
|
||||
))
|
||||
|
||||
logger.debug(f"URL encoded: {url} -> {encoded_url}")
|
||||
|
||||
content = await self.extract_content([encoded_url], extract_depth, "markdown")
|
||||
result = content.get(encoded_url)
|
||||
if result:
|
||||
logger.debug(f"Successfully read page {encoded_url}: {len(result)} chars")
|
||||
else:
|
||||
logger.warning(f"No content returned for page {encoded_url}")
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read page {url}: {e}")
|
||||
return None
|
||||
|
||||
async def getUrlsFromPage(self, url: str, extract_depth: str = "advanced") -> List[str]:
|
||||
"""Get all URLs from a web page, with redundancies removed."""
|
||||
try:
|
||||
content = await self.readPage(url, extract_depth)
|
||||
if not content:
|
||||
return []
|
||||
|
||||
links = self._extractLinksFromContent(content, url)
|
||||
# Remove duplicates while preserving order
|
||||
seen = set()
|
||||
unique_links = []
|
||||
for link in links:
|
||||
if link not in seen:
|
||||
seen.add(link)
|
||||
unique_links.append(link)
|
||||
|
||||
logger.debug(f"Extracted {len(unique_links)} unique URLs from {url}")
|
||||
return unique_links
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get URLs from page {url}: {e}")
|
||||
return []
|
||||
|
||||
def filterUrlsOnlyPages(self, urls: List[str], max_per_domain: int = 10) -> List[str]:
|
||||
"""Filter URLs to get only links for pages to follow (no images, etc.)."""
|
||||
from urllib.parse import urlparse
|
||||
|
||||
def _isHtmlCandidate(url: str) -> bool:
|
||||
lower = url.lower()
|
||||
blocked = ('.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.ico', '.bmp',
|
||||
'.mp4', '.mp3', '.avi', '.mov', '.mkv',
|
||||
'.pdf', '.zip', '.rar', '.7z', '.tar', '.gz',
|
||||
'.css', '.js', '.woff', '.woff2', '.ttf', '.eot')
|
||||
return not lower.endswith(blocked)
|
||||
|
||||
# Group by domain
|
||||
domain_links = {}
|
||||
for link in urls:
|
||||
domain = urlparse(link).netloc
|
||||
if domain not in domain_links:
|
||||
domain_links[domain] = []
|
||||
domain_links[domain].append(link)
|
||||
|
||||
# Filter and cap per domain
|
||||
filtered_links = []
|
||||
for domain, domain_link_list in domain_links.items():
|
||||
seen = set()
|
||||
domain_filtered = []
|
||||
|
||||
for link in domain_link_list:
|
||||
if link in seen:
|
||||
continue
|
||||
if not _isHtmlCandidate(link):
|
||||
continue
|
||||
seen.add(link)
|
||||
domain_filtered.append(link)
|
||||
if len(domain_filtered) >= max_per_domain:
|
||||
break
|
||||
|
||||
filtered_links.extend(domain_filtered)
|
||||
logger.debug(f"Domain {domain}: {len(domain_link_list)} -> {len(domain_filtered)} links")
|
||||
|
||||
return filtered_links
|
||||
|
||||
def _extractLinksFromContent(self, content: str, base_url: str) -> List[str]:
|
||||
"""Extract links from HTML/Markdown content."""
|
||||
try:
|
||||
import re
|
||||
from urllib.parse import urljoin, urlparse, quote, urlunparse
|
||||
|
||||
def _cleanUrl(url: str) -> str:
|
||||
"""Clean and encode URL to remove spaces and invalid characters."""
|
||||
# Remove quotes and extra spaces
|
||||
url = url.strip().strip('"\'')
|
||||
|
||||
# If it's a relative URL, make it absolute first
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
url = urljoin(base_url, url)
|
||||
|
||||
# Parse and re-encode the URL properly
|
||||
parsed = urlparse(url)
|
||||
if parsed.query:
|
||||
# Encode query parameters properly
|
||||
encoded_query = quote(parsed.query, safe='=&')
|
||||
url = urlunparse((
|
||||
parsed.scheme,
|
||||
parsed.netloc,
|
||||
parsed.path,
|
||||
parsed.params,
|
||||
encoded_query,
|
||||
parsed.fragment
|
||||
))
|
||||
|
||||
return url
|
||||
|
||||
links = []
|
||||
|
||||
# Extract HTML links: <a href="url"> format
|
||||
html_link_pattern = r'<a[^>]+href=["\']([^"\']+)["\'][^>]*>'
|
||||
html_links = re.findall(html_link_pattern, content, re.IGNORECASE)
|
||||
|
||||
for url in html_links:
|
||||
if url and not url.startswith('#') and not url.startswith('javascript:'):
|
||||
try:
|
||||
cleaned_url = _cleanUrl(url)
|
||||
links.append(cleaned_url)
|
||||
logger.debug(f"Extracted HTML link: {url} -> {cleaned_url}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to clean HTML link {url}: {e}")
|
||||
|
||||
# Extract markdown links: [text](url) format
|
||||
markdown_link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
|
||||
markdown_links = re.findall(markdown_link_pattern, content)
|
||||
|
||||
for text, url in markdown_links:
|
||||
if url and not url.startswith('#'):
|
||||
try:
|
||||
cleaned_url = _cleanUrl(url)
|
||||
# Only keep URLs from the same domain
|
||||
if urlparse(cleaned_url).netloc == urlparse(base_url).netloc:
|
||||
links.append(cleaned_url)
|
||||
logger.debug(f"Extracted markdown link: {url} -> {cleaned_url}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to clean markdown link {url}: {e}")
|
||||
|
||||
# Extract plain URLs in the text
|
||||
url_pattern = r'https?://[^\s\)]+'
|
||||
plain_urls = re.findall(url_pattern, content)
|
||||
|
||||
for url in plain_urls:
|
||||
try:
|
||||
clean_url = url.rstrip('.,;!?')
|
||||
cleaned_url = _cleanUrl(clean_url)
|
||||
if urlparse(cleaned_url).netloc == urlparse(base_url).netloc:
|
||||
if cleaned_url not in links: # Avoid duplicates
|
||||
links.append(cleaned_url)
|
||||
logger.debug(f"Extracted plain URL: {url} -> {cleaned_url}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to clean plain URL {url}: {e}")
|
||||
|
||||
logger.debug(f"Total links extracted and cleaned: {len(links)}")
|
||||
return links
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to extract links from content: {e}")
|
||||
return []
|
||||
|
||||
async def crawlRecursively(self, urls: List[str], max_depth: int, extract_depth: str = "advanced", max_per_domain: int = 10) -> Dict[str, str]:
|
||||
"""
|
||||
Recursively crawl URLs up to specified depth.
|
||||
|
||||
Args:
|
||||
urls: List of starting URLs to crawl
|
||||
max_depth: Maximum depth to crawl (1=main pages only, 2=main+sub-pages, etc.)
|
||||
extract_depth: Tavily extract depth setting
|
||||
max_per_domain: Maximum URLs per domain per level
|
||||
|
||||
Returns:
|
||||
Dictionary mapping URL -> content for all crawled pages
|
||||
"""
|
||||
logger.info(f"Starting recursive crawl: {len(urls)} starting URLs, max_depth={max_depth}")
|
||||
|
||||
# URL index to track all processed URLs
|
||||
processed_urls = set()
|
||||
all_content = {}
|
||||
|
||||
# Current level URLs to process
|
||||
current_level_urls = urls.copy()
|
||||
|
||||
for depth in range(1, max_depth + 1):
|
||||
logger.info(f"=== DEPTH LEVEL {depth}/{max_depth} ===")
|
||||
logger.info(f"Processing {len(current_level_urls)} URLs at depth {depth}")
|
||||
|
||||
# URLs found at this level (for next iteration)
|
||||
next_level_urls = []
|
||||
|
||||
for url in current_level_urls:
|
||||
if url in processed_urls:
|
||||
logger.debug(f"URL {url} already processed, skipping")
|
||||
continue
|
||||
|
||||
try:
|
||||
logger.info(f"Processing URL at depth {depth}: {url}")
|
||||
|
||||
# Read page content
|
||||
content = await self.readPage(url, extract_depth)
|
||||
if content:
|
||||
all_content[url] = content
|
||||
processed_urls.add(url)
|
||||
logger.info(f"✓ Successfully processed {url}: {len(content)} chars")
|
||||
|
||||
# Get URLs from this page for next level
|
||||
page_urls = await self.getUrlsFromPage(url, extract_depth)
|
||||
logger.info(f"Found {len(page_urls)} URLs on {url}")
|
||||
|
||||
# Filter URLs and add to next level
|
||||
filtered_urls = self.filterUrlsOnlyPages(page_urls, max_per_domain)
|
||||
logger.info(f"Filtered to {len(filtered_urls)} valid URLs")
|
||||
|
||||
# Add new URLs to next level (avoiding already processed ones)
|
||||
new_urls_count = 0
|
||||
for new_url in filtered_urls:
|
||||
if new_url not in processed_urls:
|
||||
next_level_urls.append(new_url)
|
||||
new_urls_count += 1
|
||||
|
||||
logger.info(f"Added {new_urls_count} new URLs to next level from {url}")
|
||||
else:
|
||||
logger.warning(f"✗ No content extracted from {url}")
|
||||
processed_urls.add(url) # Mark as processed to avoid retry
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"✗ Failed to process URL {url} at depth {depth}: {e}")
|
||||
processed_urls.add(url) # Mark as processed to avoid retry
|
||||
|
||||
# Prepare for next iteration
|
||||
current_level_urls = next_level_urls
|
||||
logger.info(f"Depth {depth} completed. Found {len(next_level_urls)} URLs for next level")
|
||||
|
||||
# Stop if no more URLs to process
|
||||
if not current_level_urls:
|
||||
logger.info(f"No more URLs found at depth {depth}, stopping recursion")
|
||||
break
|
||||
|
||||
logger.info(f"Recursive crawl completed: {len(all_content)} total pages crawled")
|
||||
return all_content
|
||||
|
||||
async def webQuery(self, query: str, context: str = "", options: AiCallOptions = None) -> str:
|
||||
"""Use LangDoc AI to provide the best answers for web-related queries."""
|
||||
"""Use Perplexity AI to provide the best answers for web-related queries."""
|
||||
if options is None:
|
||||
options = AiCallOptions(operationType=OperationType.WEB_RESEARCH)
|
||||
|
||||
|
|
@ -480,14 +795,12 @@ Please provide:
|
|||
|
||||
Format your response in a clear, professional manner that would be helpful for someone researching this topic."""
|
||||
|
||||
messages = [{"role": "user", "content": webPrompt}]
|
||||
|
||||
try:
|
||||
# Use LangDoc for the best answers
|
||||
response = await self.langdocService.callAiBasic(messages)
|
||||
# Use Perplexity for web research with search capabilities
|
||||
response = await self.perplexityService.callAiWithWebSearch(webPrompt)
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"LangDoc web query failed: {str(e)}")
|
||||
logger.error(f"Perplexity web query failed: {str(e)}")
|
||||
raise Exception(f"Failed to process web query: {str(e)}")
|
||||
|
||||
# Utility methods
|
||||
|
|
@ -511,3 +824,157 @@ Format your response in a clear, professional manner that would be helpful for s
|
|||
"""Get model names that have a specific tag."""
|
||||
return [name for name, info in aiModels.items() if tag in info.get("tags", [])]
|
||||
|
||||
async def selectRelevantWebsites(self, websites: List[str], userQuestion: str) -> Tuple[List[str], str]:
|
||||
"""Select most relevant websites using AI analysis. Returns (selected_websites, ai_response)."""
|
||||
if len(websites) <= 1:
|
||||
return websites, "Only one website available, no selection needed"
|
||||
|
||||
try:
|
||||
# Create website summaries for AI analysis
|
||||
websiteSummaries = []
|
||||
for i, url in enumerate(websites, 1):
|
||||
from urllib.parse import urlparse
|
||||
domain = urlparse(url).netloc
|
||||
summary = f"{i}. {url} (Domain: {domain})"
|
||||
websiteSummaries.append(summary)
|
||||
|
||||
selectionPrompt = f"""
|
||||
Based on this user request: "{userQuestion}"
|
||||
|
||||
I have {len(websites)} websites found. Please select the most relevant website(s) for this request.
|
||||
|
||||
Available websites:
|
||||
{chr(10).join(websiteSummaries)}
|
||||
|
||||
Please respond with the website number(s) (1, 2, 3, etc.) that are most relevant.
|
||||
Format: 1,3,5 (or just 1 for single selection)
|
||||
"""
|
||||
|
||||
# Use Perplexity to select the best websites
|
||||
response = await self.webQuery(selectionPrompt)
|
||||
|
||||
# Parse the selection
|
||||
import re
|
||||
numbers = re.findall(r'\d+', response)
|
||||
if numbers:
|
||||
selectedWebsites = []
|
||||
for num in numbers:
|
||||
index = int(num) - 1
|
||||
if 0 <= index < len(websites):
|
||||
selectedWebsites.append(websites[index])
|
||||
|
||||
if selectedWebsites:
|
||||
logger.info(f"AI selected {len(selectedWebsites)} websites")
|
||||
return selectedWebsites, response
|
||||
|
||||
# Fallback to first website
|
||||
logger.warning("AI selection failed, using first website")
|
||||
return websites[:1], f"AI selection failed, fallback to first website. AI response: {response}"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in website selection: {str(e)}")
|
||||
return websites[:1], f"Error in website selection: {str(e)}"
|
||||
|
||||
async def analyzeContentWithChunking(self, allContent: Dict[str, str], userQuestion: str) -> str:
|
||||
"""Analyze content using AI with chunking for large content."""
|
||||
logger.info(f"Analyzing {len(allContent)} websites with AI")
|
||||
|
||||
# Process content in chunks to avoid token limits
|
||||
chunkSize = 50000 # 50k chars per chunk
|
||||
allChunks = []
|
||||
|
||||
for url, content in allContent.items():
|
||||
filteredContent = self._filterContent(content)
|
||||
if len(filteredContent) <= chunkSize:
|
||||
allChunks.append((url, filteredContent))
|
||||
logger.info(f"Content from {url}: {len(filteredContent)} chars (single chunk)")
|
||||
else:
|
||||
# Split large content into chunks
|
||||
chunkCount = (len(filteredContent) + chunkSize - 1) // chunkSize
|
||||
logger.info(f"Content from {url}: {len(filteredContent)} chars (split into {chunkCount} chunks)")
|
||||
for i in range(0, len(filteredContent), chunkSize):
|
||||
chunk = filteredContent[i:i+chunkSize]
|
||||
chunkNum = i//chunkSize + 1
|
||||
allChunks.append((f"{url} (part {chunkNum})", chunk))
|
||||
|
||||
logger.info(f"Processing {len(allChunks)} content chunks")
|
||||
|
||||
# Analyze each chunk
|
||||
chunkAnalyses = []
|
||||
for i, (url, chunk) in enumerate(allChunks, 1):
|
||||
logger.info(f"Analyzing chunk {i}/{len(allChunks)}: {url}")
|
||||
|
||||
try:
|
||||
analysisPrompt = f"""
|
||||
Analyze this web content and extract relevant information for: {userQuestion}
|
||||
|
||||
Source: {url}
|
||||
Content: {chunk}
|
||||
|
||||
Please extract key information relevant to the query.
|
||||
"""
|
||||
|
||||
analysis = await self.webQuery(analysisPrompt)
|
||||
chunkAnalyses.append(analysis)
|
||||
logger.info(f"Chunk {i}/{len(allChunks)} analyzed successfully")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Chunk {i}/{len(allChunks)} error: {e}")
|
||||
|
||||
# Combine all chunk analyses
|
||||
if chunkAnalyses:
|
||||
logger.info(f"Combining {len(chunkAnalyses)} chunk analyses")
|
||||
combinedAnalysis = "\n\n".join(chunkAnalyses)
|
||||
|
||||
# Final synthesis
|
||||
try:
|
||||
logger.info("Performing final synthesis of all analyses")
|
||||
synthesisPrompt = f"""
|
||||
Based on these partial analyses, provide a comprehensive answer to: {userQuestion}
|
||||
|
||||
Partial analyses:
|
||||
{combinedAnalysis}
|
||||
|
||||
Please provide a clear, well-structured answer to the query.
|
||||
"""
|
||||
|
||||
finalAnalysis = await self.webQuery(synthesisPrompt)
|
||||
logger.info("Final synthesis completed successfully")
|
||||
return finalAnalysis
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Synthesis error: {e}")
|
||||
return combinedAnalysis
|
||||
else:
|
||||
logger.error("No content could be analyzed")
|
||||
return "No content could be analyzed"
|
||||
|
||||
def _filterContent(self, content: str) -> str:
|
||||
"""Filter out navigation, ads, and other nonsense content."""
|
||||
lines = content.split('\n')
|
||||
filteredLines = []
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
# Skip empty lines
|
||||
if not line:
|
||||
continue
|
||||
# Skip navigation elements
|
||||
if any(skip in line.lower() for skip in [
|
||||
'toggle navigation', 'log in', 'sign up', 'cookies', 'privacy policy',
|
||||
'terms of service', 'subscribe', 'newsletter', 'follow us', 'share this',
|
||||
'advertisement', 'sponsored', 'banner', 'popup', 'modal'
|
||||
]):
|
||||
continue
|
||||
# Skip image references without context
|
||||
if line.startswith(' and line.endswith(')') and '---' in line:
|
||||
continue
|
||||
# Keep meaningful content
|
||||
if len(line) > 10: # Skip very short lines
|
||||
filteredLines.append(line)
|
||||
|
||||
return '\n'.join(filteredLines)
|
||||
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ from typing import Dict, Any, List, Optional, Union, get_origin, get_args
|
|||
import asyncio
|
||||
|
||||
from modules.interfaces.interfaceDbChatAccess import ChatAccess
|
||||
from modules.datamodels.datamodelWorkflow import (
|
||||
TaskAction,
|
||||
from modules.datamodels.datamodelChat import (
|
||||
ActionItem,
|
||||
TaskResult,
|
||||
TaskItem,
|
||||
TaskStatus,
|
||||
|
|
@ -549,7 +549,7 @@ class ChatObjects:
|
|||
created_documents.append(created_doc)
|
||||
|
||||
# Convert to ChatMessage model
|
||||
return ChatMessage(
|
||||
chat_message = ChatMessage(
|
||||
id=createdMessage["id"],
|
||||
workflowId=createdMessage["workflowId"],
|
||||
parentMessageId=createdMessage.get("parentMessageId"),
|
||||
|
|
@ -571,6 +571,11 @@ class ChatObjects:
|
|||
actionName=createdMessage.get("actionName")
|
||||
)
|
||||
|
||||
# Debug: Store message and documents for debugging TODO REMOVE
|
||||
self._storeDebugMessageAndDocuments(chat_message)
|
||||
|
||||
return chat_message
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating workflow message: {str(e)}")
|
||||
return None
|
||||
|
|
@ -1045,6 +1050,120 @@ class ChatObjects:
|
|||
|
||||
return {"items": items}
|
||||
|
||||
def _storeDebugMessageAndDocuments(self, message: ChatMessage) -> None:
|
||||
"""
|
||||
Store message and documents for debugging purposes in fileshare.
|
||||
Structure: gateway/test-chat/messages/m_round_task_action_timestamp/documentlist_label/documents
|
||||
|
||||
Args:
|
||||
message: ChatMessage object to store
|
||||
"""
|
||||
try:
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime, UTC
|
||||
|
||||
# Create base debug directory
|
||||
debug_root = "./test-chat/messages"
|
||||
os.makedirs(debug_root, exist_ok=True)
|
||||
|
||||
# Generate timestamp
|
||||
timestamp = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
|
||||
|
||||
# Create message folder name: m_round_task_action_timestamp
|
||||
# Use actual values from message, not defaults
|
||||
round_str = str(message.roundNumber) if message.roundNumber is not None else "0"
|
||||
task_str = str(message.taskNumber) if message.taskNumber is not None else "0"
|
||||
action_str = str(message.actionNumber) if message.actionNumber is not None else "0"
|
||||
message_folder = f"{timestamp}_m_{round_str}_{task_str}_{action_str}"
|
||||
|
||||
message_path = os.path.join(debug_root, message_folder)
|
||||
os.makedirs(message_path, exist_ok=True)
|
||||
|
||||
# Store message data - use dict() instead of model_dump() for compatibility
|
||||
message_file = os.path.join(message_path, "message.json")
|
||||
with open(message_file, "w", encoding="utf-8") as f:
|
||||
# Convert message to dict manually to avoid model_dump() issues
|
||||
message_dict = {
|
||||
"id": message.id,
|
||||
"workflowId": message.workflowId,
|
||||
"parentMessageId": message.parentMessageId,
|
||||
"message": message.message,
|
||||
"role": message.role,
|
||||
"status": message.status,
|
||||
"sequenceNr": message.sequenceNr,
|
||||
"publishedAt": message.publishedAt,
|
||||
"roundNumber": message.roundNumber,
|
||||
"taskNumber": message.taskNumber,
|
||||
"actionNumber": message.actionNumber,
|
||||
"documentsLabel": message.documentsLabel,
|
||||
"actionId": message.actionId,
|
||||
"actionMethod": message.actionMethod,
|
||||
"actionName": message.actionName,
|
||||
"success": message.success,
|
||||
"documents": []
|
||||
}
|
||||
json.dump(message_dict, f, indent=2, ensure_ascii=False, default=str)
|
||||
|
||||
# Store message content as text
|
||||
if message.message:
|
||||
message_text_file = os.path.join(message_path, "message_text.txt")
|
||||
with open(message_text_file, "w", encoding="utf-8") as f:
|
||||
f.write(str(message.message))
|
||||
|
||||
# Store documents if provided
|
||||
if message.documents and len(message.documents) > 0:
|
||||
logger.info(f"Debug: Processing {len(message.documents)} documents")
|
||||
|
||||
# Group documents by documentsLabel
|
||||
documents_by_label = {}
|
||||
for doc in message.documents:
|
||||
label = message.documentsLabel or 'default'
|
||||
if label not in documents_by_label:
|
||||
documents_by_label[label] = []
|
||||
documents_by_label[label].append(doc)
|
||||
|
||||
# Create subfolder for each document label
|
||||
for label, docs in documents_by_label.items():
|
||||
# Sanitize label for filesystem
|
||||
safe_label = "".join(c for c in str(label) if c.isalnum() or c in (' ', '-', '_')).rstrip()
|
||||
safe_label = safe_label.replace(' ', '_')
|
||||
if not safe_label:
|
||||
safe_label = "default"
|
||||
|
||||
label_folder = os.path.join(message_path, safe_label)
|
||||
os.makedirs(label_folder, exist_ok=True)
|
||||
logger.info(f"Debug: Created document folder: {label_folder}")
|
||||
|
||||
# Store each document
|
||||
for i, doc in enumerate(docs):
|
||||
# Create document metadata file
|
||||
doc_meta = {
|
||||
"id": doc.id,
|
||||
"messageId": doc.messageId,
|
||||
"fileId": doc.fileId,
|
||||
"fileName": doc.fileName,
|
||||
"fileSize": doc.fileSize,
|
||||
"mimeType": doc.mimeType,
|
||||
"roundNumber": doc.roundNumber,
|
||||
"taskNumber": doc.taskNumber,
|
||||
"actionNumber": doc.actionNumber,
|
||||
"actionId": doc.actionId
|
||||
}
|
||||
|
||||
doc_meta_file = os.path.join(label_folder, f"document_{i+1:03d}_metadata.json")
|
||||
with open(doc_meta_file, "w", encoding="utf-8") as f:
|
||||
json.dump(doc_meta, f, indent=2, ensure_ascii=False, default=str)
|
||||
|
||||
logger.info(f"Debug: Stored document metadata for {doc.fileName}")
|
||||
|
||||
logger.info(f"Debug: Stored message and documents in {message_path}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Debug: Failed to store message and documents: {e}")
|
||||
import traceback
|
||||
logger.error(f"Debug: Traceback: {traceback.format_exc()}")
|
||||
|
||||
|
||||
def getInterface(currentUser: Optional[User] = None) -> 'ChatObjects':
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ class Services:
|
|||
def __init__(self, user: User, workflow: ChatWorkflow = None):
|
||||
self.user: User = user
|
||||
self.workflow: ChatWorkflow = workflow
|
||||
self.currentUserPrompt: str = "" # Cleaned/normalized user intent for the current round
|
||||
self.rawUserPrompt: str = "" # Original raw user message for the current round
|
||||
|
||||
# Initialize interfaces
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
182
modules/services/serviceExtraction/chunking/image_chunker.py
Normal file
182
modules/services/serviceExtraction/chunking/image_chunker.py
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
from typing import Any, Dict, List
|
||||
import base64
|
||||
import io
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..subRegistry import Chunker
|
||||
|
||||
|
||||
class ImageChunker(Chunker):
|
||||
"""Chunker for reducing image size through resizing, compression, and tiling."""
|
||||
|
||||
def chunk(self, part: ContentPart, options: Dict[str, Any]) -> list[Dict[str, Any]]:
|
||||
"""
|
||||
Chunk an image by reducing its size through various strategies.
|
||||
|
||||
Args:
|
||||
part: ContentPart containing image data (base64 encoded)
|
||||
options: Chunking options including:
|
||||
- imageChunkSize: Maximum size in bytes for each chunk
|
||||
- imageMaxPixels: Maximum pixels (width*height) for the image
|
||||
- imageQuality: JPEG quality (0-100, default 85)
|
||||
- imageTileSize: Size for tiling if image is still too large
|
||||
|
||||
Returns:
|
||||
List of image chunks with reduced size
|
||||
"""
|
||||
maxBytes = int(options.get("imageChunkSize", 1000000)) # 1MB default
|
||||
maxPixels = int(options.get("imageMaxPixels", 1024 * 1024)) # 1MP default
|
||||
quality = int(options.get("imageQuality", 85))
|
||||
tileSize = int(options.get("imageTileSize", 512)) # 512x512 tiles
|
||||
|
||||
chunks: List[Dict[str, Any]] = []
|
||||
|
||||
try:
|
||||
# Lazy import PIL to avoid hanging during module import
|
||||
from PIL import Image
|
||||
|
||||
# Decode base64 image data
|
||||
imageData = base64.b64decode(part.data)
|
||||
image = Image.open(io.BytesIO(imageData))
|
||||
|
||||
# Get original dimensions
|
||||
originalWidth, originalHeight = image.size
|
||||
originalPixels = originalWidth * originalHeight
|
||||
|
||||
# Strategy 1: If image is small enough, return as-is
|
||||
if len(part.data) <= maxBytes and originalPixels <= maxPixels:
|
||||
chunks.append({
|
||||
"data": part.data,
|
||||
"size": len(part.data),
|
||||
"order": 0,
|
||||
"metadata": {
|
||||
"originalSize": len(part.data),
|
||||
"originalPixels": originalPixels,
|
||||
"strategy": "original"
|
||||
}
|
||||
})
|
||||
return chunks
|
||||
|
||||
# Strategy 2: Resize to fit within pixel limit
|
||||
if originalPixels > maxPixels:
|
||||
# Calculate new dimensions maintaining aspect ratio
|
||||
scale = (maxPixels / originalPixels) ** 0.5
|
||||
newWidth = int(originalWidth * scale)
|
||||
newHeight = int(originalHeight * scale)
|
||||
|
||||
# Ensure minimum size
|
||||
newWidth = max(newWidth, 64)
|
||||
newHeight = max(newHeight, 64)
|
||||
|
||||
image = image.resize((newWidth, newHeight), Image.Resampling.LANCZOS)
|
||||
|
||||
# Strategy 3: Compress with quality reduction
|
||||
currentSize = len(part.data)
|
||||
currentQuality = quality
|
||||
|
||||
while currentSize > maxBytes and currentQuality > 10:
|
||||
# Compress image
|
||||
output = io.BytesIO()
|
||||
image.save(output, format='JPEG', quality=currentQuality, optimize=True)
|
||||
compressedData = output.getvalue()
|
||||
compressedB64 = base64.b64encode(compressedData).decode('utf-8')
|
||||
currentSize = len(compressedB64)
|
||||
|
||||
if currentSize <= maxBytes:
|
||||
chunks.append({
|
||||
"data": compressedB64,
|
||||
"size": currentSize,
|
||||
"order": 0,
|
||||
"metadata": {
|
||||
"originalSize": len(part.data),
|
||||
"originalPixels": originalPixels,
|
||||
"compressedSize": currentSize,
|
||||
"quality": currentQuality,
|
||||
"strategy": "compressed"
|
||||
}
|
||||
})
|
||||
return chunks
|
||||
|
||||
currentQuality -= 10
|
||||
|
||||
# Strategy 4: Tile the image if still too large
|
||||
if currentSize > maxBytes:
|
||||
chunks = self._tileImage(image, maxBytes, tileSize, quality, originalPixels)
|
||||
return chunks
|
||||
|
||||
# Fallback: Return compressed version even if over limit
|
||||
output = io.BytesIO()
|
||||
image.save(output, format='JPEG', quality=10, optimize=True)
|
||||
compressedData = output.getvalue()
|
||||
compressedB64 = base64.b64encode(compressedData).decode('utf-8')
|
||||
|
||||
chunks.append({
|
||||
"data": compressedB64,
|
||||
"size": len(compressedB64),
|
||||
"order": 0,
|
||||
"metadata": {
|
||||
"originalSize": len(part.data),
|
||||
"originalPixels": originalPixels,
|
||||
"compressedSize": len(compressedB64),
|
||||
"quality": 10,
|
||||
"strategy": "fallback_compressed"
|
||||
}
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
# Fallback: Return original data with error metadata
|
||||
chunks.append({
|
||||
"data": part.data,
|
||||
"size": len(part.data),
|
||||
"order": 0,
|
||||
"metadata": {
|
||||
"originalSize": len(part.data),
|
||||
"strategy": "error_fallback",
|
||||
"error": str(e)
|
||||
}
|
||||
})
|
||||
|
||||
return chunks
|
||||
|
||||
def _tileImage(self, image: "Image.Image", maxBytes: int, tileSize: int, quality: int, originalPixels: int) -> List[Dict[str, Any]]:
|
||||
"""Split image into tiles if it's still too large after compression."""
|
||||
chunks = []
|
||||
width, height = image.size
|
||||
|
||||
# Calculate tile grid
|
||||
tilesX = (width + tileSize - 1) // tileSize
|
||||
tilesY = (height + tileSize - 1) // tileSize
|
||||
|
||||
for y in range(tilesY):
|
||||
for x in range(tilesX):
|
||||
# Calculate tile boundaries
|
||||
left = x * tileSize
|
||||
top = y * tileSize
|
||||
right = min(left + tileSize, width)
|
||||
bottom = min(top + tileSize, height)
|
||||
|
||||
# Extract tile
|
||||
tile = image.crop((left, top, right, bottom))
|
||||
|
||||
# Compress tile
|
||||
output = io.BytesIO()
|
||||
tile.save(output, format='JPEG', quality=quality, optimize=True)
|
||||
tileData = output.getvalue()
|
||||
tileB64 = base64.b64encode(tileData).decode('utf-8')
|
||||
|
||||
chunks.append({
|
||||
"data": tileB64,
|
||||
"size": len(tileB64),
|
||||
"order": y * tilesX + x,
|
||||
"metadata": {
|
||||
"originalSize": len(image.tobytes()),
|
||||
"originalPixels": originalPixels,
|
||||
"tileSize": tileSize,
|
||||
"tilePosition": f"{x},{y}",
|
||||
"tileBounds": f"{left},{top},{right},{bottom}",
|
||||
"quality": quality,
|
||||
"strategy": "tiled"
|
||||
}
|
||||
})
|
||||
|
||||
return chunks
|
||||
|
|
@ -1,12 +1,17 @@
|
|||
from typing import Any, Dict, List
|
||||
import logging
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..subRegistry import Chunker
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TextChunker(Chunker):
|
||||
def chunk(self, part: ContentPart, options: Dict[str, Any]) -> list[Dict[str, Any]]:
|
||||
maxBytes = int(options.get("textChunkSize", 40000))
|
||||
logger.debug(f"TextChunker: textChunkSize from options: {options.get('textChunkSize', 'NOT_FOUND')}")
|
||||
logger.debug(f"TextChunker: using maxBytes: {maxBytes}")
|
||||
chunks: List[Dict[str, Any]] = []
|
||||
current: List[str] = []
|
||||
size = 0
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
from typing import Any, Dict, List
|
||||
import base64
|
||||
|
||||
from ..utils import makeId
|
||||
from ..subUtils import makeId
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..subRegistry import Extractor
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
from typing import Any, Dict, List
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..utils import makeId
|
||||
from ..subUtils import makeId
|
||||
from ..subRegistry import Extractor
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
from typing import Any, Dict, List
|
||||
import io
|
||||
|
||||
from ..utils import makeId
|
||||
from ..subUtils import makeId
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..subRegistry import Extractor
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ from typing import Any, Dict, List
|
|||
from bs4 import BeautifulSoup
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..utils import makeId
|
||||
from ..subUtils import makeId
|
||||
from ..subRegistry import Extractor
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
from typing import Any, Dict, List
|
||||
import base64
|
||||
|
||||
from ..utils import makeId
|
||||
from ..subUtils import makeId
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..subRegistry import Extractor
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ from typing import Any, Dict, List
|
|||
import json
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..utils import makeId
|
||||
from ..subUtils import makeId
|
||||
from ..subRegistry import Extractor
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ from typing import Any, Dict, List
|
|||
import base64
|
||||
import io
|
||||
|
||||
from ..utils import makeId
|
||||
from ..subUtils import makeId
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..subRegistry import Extractor
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
from typing import Any, Dict, List
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..utils import makeId
|
||||
from ..subUtils import makeId
|
||||
from ..subRegistry import Extractor
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ from typing import Any, Dict, List
|
|||
import io
|
||||
from datetime import datetime
|
||||
|
||||
from ..utils import makeId
|
||||
from ..subUtils import makeId
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..subRegistry import Extractor
|
||||
|
||||
|
|
@ -75,7 +75,8 @@ class XlsxExtractor(Extractor):
|
|||
elif isinstance(v, datetime):
|
||||
cells.append(v.strftime("%Y-%m-%d %H:%M:%S"))
|
||||
else:
|
||||
cells.append(f'"{str(v).replace("\"", "\"\"")}"')
|
||||
escaped_value = str(v).replace('"', '""')
|
||||
cells.append(f'"{escaped_value}"')
|
||||
lines.append(",".join(cells))
|
||||
csvData = "\n".join(lines)
|
||||
parts.append(ContentPart(
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ from typing import Any, Dict, List
|
|||
import xml.etree.ElementTree as ET
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..utils import makeId
|
||||
from ..subUtils import makeId
|
||||
from ..subRegistry import Extractor
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,14 @@
|
|||
from typing import Any, Dict, List, Optional, Union
|
||||
import uuid
|
||||
import logging
|
||||
|
||||
from .subRegistry import ExtractorRegistry, ChunkerRegistry
|
||||
from .subPipeline import runExtraction, poolAndLimit, applyAiIfRequested
|
||||
from modules.datamodels.datamodelExtraction import ExtractedContent, ContentPart
|
||||
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy
|
||||
from modules.datamodels.datamodelChat import ChatDocument
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ExtractionService:
|
||||
|
|
@ -12,45 +17,325 @@ class ExtractionService:
|
|||
self._extractorRegistry = ExtractorRegistry()
|
||||
self._chunkerRegistry = ChunkerRegistry()
|
||||
|
||||
def extractContent(self, documentList: List[Dict[str, Any]], options: Dict[str, Any]) -> List[ExtractedContent]:
|
||||
results: List[ExtractedContent] = []
|
||||
for doc in documentList:
|
||||
def extractContent(self, documents: List[ChatDocument], options: Dict[str, Any]) -> List[ContentExtracted]:
|
||||
"""
|
||||
Extract content from a list of ChatDocument objects.
|
||||
|
||||
Args:
|
||||
documents: List of ChatDocument objects to extract content from
|
||||
options: Extraction options including maxSize, chunkAllowed, mergeStrategy, etc.
|
||||
|
||||
Returns:
|
||||
List of ContentExtracted objects, one per input document
|
||||
"""
|
||||
results: List[ContentExtracted] = []
|
||||
|
||||
# Lazy import to avoid circular deps and heavy init at module import
|
||||
from modules.interfaces.interfaceDbComponentObjects import getInterface
|
||||
dbInterface = getInterface()
|
||||
|
||||
for i, doc in enumerate(documents):
|
||||
logger.info(f"=== DOCUMENT {i}: {doc.fileName} ===")
|
||||
logger.info(f"Initial MIME type: {doc.mimeType}")
|
||||
|
||||
# Resolve raw bytes for this document using interface
|
||||
documentBytes = dbInterface.getFileData(doc.fileId)
|
||||
if not documentBytes:
|
||||
raise ValueError(f"No file data found for fileId={doc.fileId}")
|
||||
|
||||
# Convert ChatDocument to the format expected by runExtraction
|
||||
documentData = {
|
||||
"id": doc.id,
|
||||
"bytes": documentBytes,
|
||||
"fileName": doc.fileName,
|
||||
"mimeType": doc.mimeType
|
||||
}
|
||||
|
||||
ec = runExtraction(
|
||||
extractorRegistry=self._extractorRegistry,
|
||||
chunkerRegistry=self._chunkerRegistry,
|
||||
documentBytes=doc.get("bytes"),
|
||||
fileName=doc.get("fileName"),
|
||||
mimeType=doc.get("mimeType"),
|
||||
documentBytes=documentData["bytes"],
|
||||
fileName=documentData["fileName"],
|
||||
mimeType=documentData["mimeType"],
|
||||
options=options
|
||||
)
|
||||
|
||||
# Log content parts metadata
|
||||
logger.debug(f"Content parts: {len(ec.parts)}")
|
||||
for j, part in enumerate(ec.parts):
|
||||
logger.debug(f" Part {j}: {part.typeGroup} ({part.mimeType}) - {len(part.data) if part.data else 0} chars")
|
||||
if part.metadata:
|
||||
logger.debug(f" Metadata: {part.metadata}")
|
||||
|
||||
# Attach document id to parts if missing
|
||||
for p in ec.parts:
|
||||
if "documentId" not in p.metadata:
|
||||
p.metadata["documentId"] = doc.get("id") or str(uuid.uuid4())
|
||||
p.metadata["documentId"] = documentData["id"] or str(uuid.uuid4())
|
||||
|
||||
# Log chunking information
|
||||
chunked_parts = [p for p in ec.parts if p.metadata.get("chunk", False)]
|
||||
if chunked_parts:
|
||||
logger.debug(f"=== CHUNKING RESULTS ===")
|
||||
logger.debug(f"Total parts: {len(ec.parts)}")
|
||||
logger.debug(f"Chunked parts: {len(chunked_parts)}")
|
||||
for chunk in chunked_parts:
|
||||
logger.debug(f" Chunk: {chunk.label} - {len(chunk.data)} chars (parent: {chunk.parentId})")
|
||||
else:
|
||||
logger.debug(f"No chunking needed - {len(ec.parts)} parts fit within size limits")
|
||||
|
||||
ec = applyAiIfRequested(ec, options)
|
||||
results.append(ec)
|
||||
|
||||
return results
|
||||
|
||||
async def extractContentFromDocument(self, prompt: str, documents: List[Dict[str, Any]], options: Optional[Dict[str, Any]] = None) -> List[ExtractedContent]:
|
||||
def mergeAiResults(
|
||||
self,
|
||||
extractedContent: List[ContentExtracted],
|
||||
aiResults: List[str],
|
||||
strategy: MergeStrategy
|
||||
) -> ContentExtracted:
|
||||
"""
|
||||
Batch extract content from multiple documents.
|
||||
Merge AI results from chunked content back into a single ContentExtracted.
|
||||
|
||||
Args:
|
||||
prompt: Instructional prompt for optional AI post-processing/selection.
|
||||
documents: List of dicts with keys: id, bytes, fileName, mimeType.
|
||||
options: Optional extraction options. "ai" config may be provided.
|
||||
extractedContent: List of ContentExtracted objects that were processed
|
||||
aiResults: List of AI response strings, one per chunk
|
||||
strategy: Merge strategy configuration (dict or MergeStrategy object)
|
||||
|
||||
Returns:
|
||||
List[ExtractedContent]: one per input document in order.
|
||||
Single ContentExtracted with merged AI results
|
||||
"""
|
||||
# Build options safely and inject prompt for downstream AI selection if desired
|
||||
effectiveOptions: Dict[str, Any] = options.copy() if options else {}
|
||||
aiCfg = effectiveOptions.get("ai") or {}
|
||||
if prompt:
|
||||
aiCfg["prompt"] = prompt
|
||||
effectiveOptions["ai"] = aiCfg
|
||||
logger.debug(f"=== MERGING AI RESULTS ===")
|
||||
logger.debug(f"Extracted content: {len(extractedContent)} documents")
|
||||
logger.debug(f"AI results: {len(aiResults)} responses")
|
||||
logger.debug(f"Merge strategy: {strategy.mergeType}")
|
||||
|
||||
# Delegate to existing synchronous pipeline
|
||||
return self.extractContent(documents, effectiveOptions)
|
||||
mergeStrategy = strategy
|
||||
|
||||
# Collect all parts from all extracted content
|
||||
allParts: List[ContentPart] = []
|
||||
for ec in extractedContent:
|
||||
allParts.extend(ec.parts)
|
||||
|
||||
logger.debug(f"Total original parts: {len(allParts)}")
|
||||
|
||||
# Create AI result parts
|
||||
aiResultParts: List[ContentPart] = []
|
||||
for i, aiResult in enumerate(aiResults):
|
||||
aiPart = ContentPart(
|
||||
id=f"ai_result_{i}",
|
||||
parentId=None, # Will be set based on strategy
|
||||
label="ai_result",
|
||||
typeGroup="text",
|
||||
mimeType="text/plain",
|
||||
data=aiResult,
|
||||
metadata={
|
||||
"aiResult": True,
|
||||
"order": i,
|
||||
"size": len(aiResult.encode('utf-8'))
|
||||
}
|
||||
)
|
||||
aiResultParts.append(aiPart)
|
||||
|
||||
logger.debug(f"Created {len(aiResultParts)} AI result parts")
|
||||
|
||||
# Apply merging strategy
|
||||
if mergeStrategy.mergeType == "concatenate":
|
||||
mergedParts = self._mergeConcatenate(allParts, aiResultParts, mergeStrategy)
|
||||
elif mergeStrategy.mergeType == "hierarchical":
|
||||
mergedParts = self._mergeHierarchical(allParts, aiResultParts, mergeStrategy)
|
||||
elif mergeStrategy.mergeType == "intelligent":
|
||||
mergedParts = self._mergeIntelligent(allParts, aiResultParts, mergeStrategy)
|
||||
else:
|
||||
# Default to concatenate
|
||||
mergedParts = self._mergeConcatenate(allParts, aiResultParts, mergeStrategy)
|
||||
|
||||
# Create final ContentExtracted
|
||||
mergedContent = ContentExtracted(
|
||||
id=f"merged_{uuid.uuid4()}",
|
||||
parts=mergedParts
|
||||
)
|
||||
|
||||
logger.debug(f"=== MERGE COMPLETED ===")
|
||||
logger.debug(f"Final merged parts: {len(mergedParts)}")
|
||||
logger.debug(f"Merged content ID: {mergedContent.id}")
|
||||
|
||||
return mergedContent
|
||||
|
||||
def _mergeConcatenate(
|
||||
self,
|
||||
originalParts: List[ContentPart],
|
||||
aiResultParts: List[ContentPart],
|
||||
strategy: MergeStrategy
|
||||
) -> List[ContentPart]:
|
||||
"""Merge parts by simple concatenation."""
|
||||
mergedParts = []
|
||||
|
||||
# Add original parts (filtered if needed)
|
||||
for part in originalParts:
|
||||
if strategy.preserveChunks or not part.metadata.get("chunk", False):
|
||||
mergedParts.append(part)
|
||||
|
||||
# Add AI results
|
||||
if aiResultParts:
|
||||
# Group AI results by parentId if available
|
||||
aiResultsByParent = {}
|
||||
for aiPart in aiResultParts:
|
||||
parentId = aiPart.parentId or "root"
|
||||
if parentId not in aiResultsByParent:
|
||||
aiResultsByParent[parentId] = []
|
||||
aiResultsByParent[parentId].append(aiPart)
|
||||
|
||||
# Merge AI results for each parent
|
||||
for parentId, aiParts in aiResultsByParent.items():
|
||||
if len(aiParts) == 1:
|
||||
mergedParts.append(aiParts[0])
|
||||
else:
|
||||
# Concatenate multiple AI results for same parent
|
||||
combinedData = strategy.chunkSeparator.join([p.data for p in aiParts])
|
||||
combinedPart = ContentPart(
|
||||
id=f"merged_ai_{parentId}",
|
||||
parentId=parentId if parentId != "root" else None,
|
||||
label="merged_ai_result",
|
||||
typeGroup="text",
|
||||
mimeType="text/plain",
|
||||
data=combinedData,
|
||||
metadata={
|
||||
"aiResult": True,
|
||||
"merged": True,
|
||||
"sourceCount": len(aiParts),
|
||||
"size": len(combinedData.encode('utf-8'))
|
||||
}
|
||||
)
|
||||
mergedParts.append(combinedPart)
|
||||
|
||||
return mergedParts
|
||||
|
||||
def _mergeHierarchical(
|
||||
self,
|
||||
originalParts: List[ContentPart],
|
||||
aiResultParts: List[ContentPart],
|
||||
strategy: MergeStrategy
|
||||
) -> List[ContentPart]:
|
||||
"""Merge parts hierarchically based on parentId relationships."""
|
||||
# Group parts by parentId
|
||||
partsByParent = {}
|
||||
for part in originalParts:
|
||||
parentId = part.parentId or "root"
|
||||
if parentId not in partsByParent:
|
||||
partsByParent[parentId] = []
|
||||
partsByParent[parentId].append(part)
|
||||
|
||||
# Group AI results by parentId
|
||||
aiResultsByParent = {}
|
||||
for aiPart in aiResultParts:
|
||||
parentId = aiPart.parentId or "root"
|
||||
if parentId not in aiResultsByParent:
|
||||
aiResultsByParent[parentId] = []
|
||||
aiResultsByParent[parentId].append(aiPart)
|
||||
|
||||
mergedParts = []
|
||||
|
||||
# Process each parent group
|
||||
for parentId in set(list(partsByParent.keys()) + list(aiResultsByParent.keys())):
|
||||
originalGroup = partsByParent.get(parentId, [])
|
||||
aiGroup = aiResultsByParent.get(parentId, [])
|
||||
|
||||
# Add original parts
|
||||
mergedParts.extend(originalGroup)
|
||||
|
||||
# Add AI results for this parent
|
||||
if aiGroup:
|
||||
if len(aiGroup) == 1:
|
||||
mergedParts.append(aiGroup[0])
|
||||
else:
|
||||
# Merge multiple AI results
|
||||
combinedData = strategy.chunkSeparator.join([p.data for p in aiGroup])
|
||||
combinedPart = ContentPart(
|
||||
id=f"hierarchical_ai_{parentId}",
|
||||
parentId=parentId if parentId != "root" else None,
|
||||
label="hierarchical_ai_result",
|
||||
typeGroup="text",
|
||||
mimeType="text/plain",
|
||||
data=combinedData,
|
||||
metadata={
|
||||
"aiResult": True,
|
||||
"hierarchical": True,
|
||||
"sourceCount": len(aiGroup),
|
||||
"size": len(combinedData.encode('utf-8'))
|
||||
}
|
||||
)
|
||||
mergedParts.append(combinedPart)
|
||||
|
||||
return mergedParts
|
||||
|
||||
def _mergeIntelligent(
|
||||
self,
|
||||
originalParts: List[ContentPart],
|
||||
aiResultParts: List[ContentPart],
|
||||
strategy: MergeStrategy
|
||||
) -> List[ContentPart]:
|
||||
"""Merge parts using intelligent strategies based on content type."""
|
||||
mergedParts = []
|
||||
|
||||
# Group by typeGroup for intelligent merging
|
||||
partsByType = {}
|
||||
for part in originalParts:
|
||||
typeGroup = part.typeGroup
|
||||
if typeGroup not in partsByType:
|
||||
partsByType[typeGroup] = []
|
||||
partsByType[typeGroup].append(part)
|
||||
|
||||
# Process each type group
|
||||
for typeGroup, parts in partsByType.items():
|
||||
if typeGroup == "text":
|
||||
mergedParts.extend(self._mergeTextIntelligent(parts, aiResultParts, strategy))
|
||||
elif typeGroup == "table":
|
||||
mergedParts.extend(self._mergeTableIntelligent(parts, aiResultParts, strategy))
|
||||
elif typeGroup == "structure":
|
||||
mergedParts.extend(self._mergeStructureIntelligent(parts, aiResultParts, strategy))
|
||||
else:
|
||||
# Default handling for other types
|
||||
mergedParts.extend(parts)
|
||||
|
||||
# Add any remaining AI results that weren't merged
|
||||
for aiPart in aiResultParts:
|
||||
if not any(p.id == aiPart.id for p in mergedParts):
|
||||
mergedParts.append(aiPart)
|
||||
|
||||
return mergedParts
|
||||
|
||||
def _mergeTextIntelligent(
|
||||
self,
|
||||
textParts: List[ContentPart],
|
||||
aiResultParts: List[ContentPart],
|
||||
strategy: MergeStrategy
|
||||
) -> List[ContentPart]:
|
||||
"""Intelligent merging for text content."""
|
||||
# For now, use concatenate strategy
|
||||
# This could be enhanced with semantic analysis, summarization, etc.
|
||||
return self._mergeConcatenate(textParts, aiResultParts, strategy)
|
||||
|
||||
def _mergeTableIntelligent(
|
||||
self,
|
||||
tableParts: List[ContentPart],
|
||||
aiResultParts: List[ContentPart],
|
||||
strategy: MergeStrategy
|
||||
) -> List[ContentPart]:
|
||||
"""Intelligent merging for table content."""
|
||||
# For now, use concatenate strategy
|
||||
# This could be enhanced with table merging logic
|
||||
return self._mergeConcatenate(tableParts, aiResultParts, strategy)
|
||||
|
||||
def _mergeStructureIntelligent(
|
||||
self,
|
||||
structureParts: List[ContentPart],
|
||||
aiResultParts: List[ContentPart],
|
||||
strategy: MergeStrategy
|
||||
) -> List[ContentPart]:
|
||||
"""Intelligent merging for structured content."""
|
||||
# For now, use concatenate strategy
|
||||
# This could be enhanced with structure-aware merging
|
||||
return self._mergeConcatenate(structureParts, aiResultParts, strategy)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from typing import Any, Dict, List
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..utils import makeId
|
||||
from ..subUtils import makeId
|
||||
|
||||
|
||||
class TableMerger:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
from typing import Any, Dict, List
|
||||
from modules.datamodels.datamodelExtraction import ContentPart
|
||||
from ..utils import makeId
|
||||
from ..subUtils import makeId
|
||||
|
||||
|
||||
class TextMerger:
|
||||
|
|
|
|||
|
|
@ -1,14 +1,61 @@
|
|||
from typing import Any, Dict, List
|
||||
import logging
|
||||
import os
|
||||
|
||||
from modules.datamodels.datamodelExtraction import ExtractedContent, ContentPart
|
||||
from .utils import makeId
|
||||
from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
|
||||
from .subUtils import makeId
|
||||
from .subRegistry import ExtractorRegistry, ChunkerRegistry
|
||||
from .merging.text_merger import TextMerger
|
||||
from .merging.table_merger import TableMerger
|
||||
from .merging.default_merger import DefaultMerger
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: ChunkerRegistry, documentBytes: bytes, fileName: str, mimeType: str, options: Dict[str, Any]) -> ExtractedContent:
|
||||
|
||||
def _mergeParts(parts: List[ContentPart], mergeStrategy: Dict[str, Any]) -> List[ContentPart]:
|
||||
"""Merge parts based on the provided strategy."""
|
||||
if not parts or not mergeStrategy:
|
||||
return parts
|
||||
|
||||
groupBy = mergeStrategy.get("groupBy", "typeGroup")
|
||||
orderBy = mergeStrategy.get("orderBy", "id")
|
||||
|
||||
# Group parts by the specified field
|
||||
groups = {}
|
||||
for part in parts:
|
||||
key = getattr(part, groupBy, "unknown")
|
||||
if key not in groups:
|
||||
groups[key] = []
|
||||
groups[key].append(part)
|
||||
|
||||
# Merge each group
|
||||
merged_parts = []
|
||||
for group_key, group_parts in groups.items():
|
||||
if len(group_parts) == 1:
|
||||
merged_parts.extend(group_parts)
|
||||
else:
|
||||
# Sort by orderBy field if specified
|
||||
if orderBy:
|
||||
group_parts.sort(key=lambda p: getattr(p, orderBy, ""))
|
||||
|
||||
# Use appropriate merger based on type
|
||||
type_group = group_parts[0].typeGroup if group_parts else "unknown"
|
||||
|
||||
if type_group == "text":
|
||||
merger = TextMerger()
|
||||
elif type_group == "table":
|
||||
merger = TableMerger()
|
||||
else:
|
||||
merger = DefaultMerger()
|
||||
|
||||
# Merge the group
|
||||
merged = merger.merge(group_parts, mergeStrategy)
|
||||
merged_parts.extend(merged)
|
||||
|
||||
return merged_parts
|
||||
|
||||
|
||||
def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: ChunkerRegistry, documentBytes: bytes, fileName: str, mimeType: str, options: Dict[str, Any]) -> ContentExtracted:
|
||||
extractor = extractorRegistry.resolve(mimeType, fileName)
|
||||
if extractor is None:
|
||||
# fallback: single binary part
|
||||
|
|
@ -21,14 +68,66 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker
|
|||
data="",
|
||||
metadata={"warning": "No extractor registered"}
|
||||
)
|
||||
return ExtractedContent(id=makeId(), parts=[part])
|
||||
return ContentExtracted(id=makeId(), parts=[part])
|
||||
|
||||
parts = extractor.extract(documentBytes, {"fileName": fileName, "mimeType": mimeType, "options": options})
|
||||
# Optional merge step
|
||||
|
||||
# Apply chunking and size limiting
|
||||
parts = poolAndLimit(parts, chunkerRegistry, options)
|
||||
|
||||
# Optional merge step - but preserve chunks
|
||||
mergeStrategy = options.get("mergeStrategy", {})
|
||||
if mergeStrategy:
|
||||
parts = _mergeParts(parts, mergeStrategy)
|
||||
return ExtractedContent(id=makeId(), parts=parts)
|
||||
|
||||
# Don't merge chunks - they should stay separate for processing
|
||||
non_chunk_parts = [p for p in parts if not p.metadata.get("chunk", False)]
|
||||
chunk_parts = [p for p in parts if p.metadata.get("chunk", False)]
|
||||
|
||||
logger.debug(f"runExtraction: Preserving {len(chunk_parts)} chunks from merging")
|
||||
|
||||
if non_chunk_parts:
|
||||
non_chunk_parts = _mergeParts(non_chunk_parts, mergeStrategy)
|
||||
|
||||
# Combine non-chunk parts with chunk parts (chunks stay separate)
|
||||
parts = non_chunk_parts + chunk_parts
|
||||
|
||||
logger.debug(f"runExtraction: Final parts after merging: {len(parts)} (chunks: {len(chunk_parts)})")
|
||||
# DEBUG: dump parts and chunks to files TODO TO REMOVE
|
||||
try:
|
||||
base_dir = "./test-chat/ai"
|
||||
os.makedirs(base_dir, exist_ok=True)
|
||||
|
||||
# Generate timestamp for consistent naming
|
||||
from datetime import datetime, UTC
|
||||
ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
|
||||
|
||||
# Write a summary file
|
||||
summary_lines: List[str] = [f"fileName: {fileName}", f"mimeType: {mimeType}", f"totalParts: {len(parts)}"]
|
||||
text_index = 0
|
||||
for idx, part in enumerate(parts):
|
||||
is_texty = part.typeGroup in ("text", "table", "structure")
|
||||
size = int(part.metadata.get("size", 0) or 0)
|
||||
is_chunk = bool(part.metadata.get("chunk", False))
|
||||
summary_lines.append(
|
||||
f"part[{idx}]: typeGroup={part.typeGroup}, label={part.label}, size={size}, chunk={is_chunk}"
|
||||
)
|
||||
if is_texty and getattr(part, "data", None):
|
||||
text_index += 1
|
||||
fname = f"{ts}_extract_{fileName}_part_{idx:03d}_{'chunk' if is_chunk else 'full'}_{text_index:03d}.txt"
|
||||
fpath = os.path.join(base_dir, fname)
|
||||
with open(fpath, "w", encoding="utf-8") as f:
|
||||
f.write(f"# typeGroup: {part.typeGroup}\n# label: {part.label}\n# chunk: {is_chunk}\n# size: {size}\n\n")
|
||||
f.write(str(part.data))
|
||||
|
||||
# Write summary file
|
||||
summary_fname = f"{ts}_extract_{fileName}_summary.txt"
|
||||
summary_fpath = os.path.join(base_dir, summary_fname)
|
||||
with open(summary_fpath, "w", encoding="utf-8") as f:
|
||||
f.write("\n".join(summary_lines))
|
||||
except Exception as _e:
|
||||
logger.debug(f"Debug dump skipped: {_e}")
|
||||
|
||||
return ContentExtracted(id=makeId(), parts=parts)
|
||||
|
||||
|
||||
def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, options: Dict[str, Any]) -> List[ContentPart]:
|
||||
|
|
@ -57,12 +156,21 @@ def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, opt
|
|||
|
||||
# If we have remaining parts and chunking is allowed, try chunking
|
||||
if remaining and chunkAllowed:
|
||||
logger.debug(f"=== CHUNKING ACTIVATED ===")
|
||||
logger.debug(f"Remaining parts to chunk: {len(remaining)}")
|
||||
logger.debug(f"Max size limit: {maxSize} bytes")
|
||||
logger.debug(f"Current size used: {current} bytes")
|
||||
|
||||
for p in remaining:
|
||||
if p.typeGroup in ("text", "table", "structure"):
|
||||
if p.typeGroup in ("text", "table", "structure", "image"):
|
||||
logger.debug(f"Chunking {p.typeGroup} part: {len(p.data)} chars")
|
||||
chunks = chunkerRegistry.resolve(p.typeGroup).chunk(p, options)
|
||||
logger.debug(f"Created {len(chunks)} chunks")
|
||||
|
||||
chunks_added = 0
|
||||
for ch in chunks:
|
||||
chSize = int(ch.get("size", 0) or 0)
|
||||
if current + chSize <= maxSize:
|
||||
# Add all chunks - don't limit by maxSize since they'll be processed separately
|
||||
kept.append(ContentPart(
|
||||
id=makeId(),
|
||||
parentId=p.id,
|
||||
|
|
@ -70,15 +178,32 @@ def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, opt
|
|||
typeGroup=p.typeGroup,
|
||||
mimeType=p.mimeType,
|
||||
data=ch.get("data", ""),
|
||||
metadata={"size": chSize, "chunk": True}
|
||||
metadata={
|
||||
"size": chSize,
|
||||
"chunk": True,
|
||||
**ch.get("metadata", {})
|
||||
}
|
||||
))
|
||||
current += chSize
|
||||
else:
|
||||
break
|
||||
chunks_added += 1
|
||||
logger.debug(f"Added chunk {ch.get('order', 0)}: {chSize} bytes")
|
||||
|
||||
# Apply merging strategy if provided
|
||||
logger.debug(f"Added {chunks_added} chunks from {p.typeGroup} part")
|
||||
|
||||
# Apply merging strategy if provided, but preserve chunks
|
||||
if mergeStrategy:
|
||||
kept = _applyMerging(kept, mergeStrategy)
|
||||
# Don't merge chunks - they should stay separate for processing
|
||||
non_chunk_parts = [p for p in kept if not p.metadata.get("chunk", False)]
|
||||
chunk_parts = [p for p in kept if p.metadata.get("chunk", False)]
|
||||
|
||||
logger.debug(f"Preserving {len(chunk_parts)} chunks from merging")
|
||||
|
||||
if non_chunk_parts:
|
||||
non_chunk_parts = _applyMerging(non_chunk_parts, mergeStrategy)
|
||||
|
||||
# Combine non-chunk parts with chunk parts (chunks stay separate)
|
||||
kept = non_chunk_parts + chunk_parts
|
||||
|
||||
logger.debug(f"Final parts after merging: {len(kept)} (chunks: {len(chunk_parts)})")
|
||||
|
||||
# Re-check size after merging
|
||||
totalSize = sum(int(p.metadata.get("size", 0) or 0) for p in kept)
|
||||
|
|
@ -151,7 +276,7 @@ def _applySizeLimit(parts: List[ContentPart], maxSize: int) -> List[ContentPart]
|
|||
return kept
|
||||
|
||||
|
||||
def applyAiIfRequested(extracted: ExtractedContent, options: Dict[str, Any]) -> ExtractedContent:
|
||||
def applyAiIfRequested(extracted: ContentExtracted, options: Dict[str, Any]) -> ContentExtracted:
|
||||
"""
|
||||
Apply AI processing if requested in options.
|
||||
This is a placeholder for actual AI integration.
|
||||
|
|
|
|||
|
|
@ -59,8 +59,11 @@ class ExtractorRegistry:
|
|||
self.register("xlsm", XlsxExtractor())
|
||||
# fallback
|
||||
self.setFallback(BinaryExtractor())
|
||||
except Exception:
|
||||
pass
|
||||
print(f"✅ ExtractorRegistry: Successfully registered {len(self._map)} extractors")
|
||||
except Exception as e:
|
||||
print(f"❌ ExtractorRegistry: Failed to register extractors: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
def register(self, key: str, extractor: Extractor):
|
||||
self._map[key] = extractor
|
||||
|
|
@ -88,11 +91,16 @@ class ChunkerRegistry:
|
|||
from .chunking.text_chunker import TextChunker
|
||||
from .chunking.table_chunker import TableChunker
|
||||
from .chunking.structure_chunker import StructureChunker
|
||||
# Skip ImageChunker for now to avoid PIL import hang
|
||||
# from .chunking.image_chunker import ImageChunker
|
||||
self.register("text", TextChunker())
|
||||
self.register("table", TableChunker())
|
||||
self.register("structure", StructureChunker())
|
||||
except Exception:
|
||||
pass
|
||||
# self.register("image", ImageChunker())
|
||||
except Exception as e:
|
||||
print(f"❌ ChunkerRegistry: Failed to register chunkers: {str(e)}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
def register(self, typeGroup: str, chunker: Chunker):
|
||||
self._map[typeGroup] = chunker
|
||||
|
|
|
|||
|
|
@ -3,5 +3,3 @@ import uuid
|
|||
|
||||
def makeId() -> str:
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
|
|
@ -105,12 +105,49 @@ class GenerationService:
|
|||
|
||||
logger.info(f"Document {document_name} has content: {len(content)} characters")
|
||||
|
||||
# Normalize file extension based on mime type if missing or incorrect
|
||||
try:
|
||||
mime_to_ext = {
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
|
||||
"application/pdf": ".pdf",
|
||||
"text/html": ".html",
|
||||
"text/markdown": ".md",
|
||||
"text/plain": ".txt",
|
||||
"application/json": ".json",
|
||||
}
|
||||
expected_ext = mime_to_ext.get(mime_type)
|
||||
if expected_ext:
|
||||
if not document_name.lower().endswith(expected_ext):
|
||||
# Append/replace extension to match mime type
|
||||
if "." in document_name:
|
||||
document_name = document_name.rsplit(".", 1)[0] + expected_ext
|
||||
else:
|
||||
document_name = document_name + expected_ext
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text
|
||||
base64encoded = False
|
||||
try:
|
||||
binary_mime_types = {
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"application/pdf",
|
||||
}
|
||||
if isinstance(document_data, str) and mime_type in binary_mime_types:
|
||||
base64encoded = True
|
||||
except Exception:
|
||||
base64encoded = False
|
||||
|
||||
# Create document with file in one step using interfaces directly
|
||||
document = self._createDocument(
|
||||
fileName=document_name,
|
||||
mimeType=mime_type,
|
||||
content=content,
|
||||
base64encoded=False,
|
||||
base64encoded=base64encoded,
|
||||
messageId=message_id
|
||||
)
|
||||
if document:
|
||||
|
|
@ -258,3 +295,108 @@ class GenerationService:
|
|||
'workflowStatus': 'unknown',
|
||||
'workflowId': 'unknown'
|
||||
}
|
||||
|
||||
async def renderReport(self, extracted_content: str, output_format: str, title: str) -> tuple[str, str]:
|
||||
"""
|
||||
Render extracted content to the specified output format.
|
||||
|
||||
Args:
|
||||
extracted_content: Content extracted by AI using format-specific prompt
|
||||
output_format: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
|
||||
title: Report title
|
||||
|
||||
Returns:
|
||||
tuple: (rendered_content, mime_type)
|
||||
"""
|
||||
try:
|
||||
# DEBUG: dump renderer input to diagnose JSON+HTML mixtures TODO REMOVE
|
||||
try:
|
||||
import os
|
||||
ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
|
||||
debug_root = "./test-chat/ai"
|
||||
debug_dir = os.path.join(debug_root, f"render_input_{ts}")
|
||||
os.makedirs(debug_dir, exist_ok=True)
|
||||
with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f:
|
||||
f.write(f"title: {title}\nformat: {output_format}\nlength: {len(extracted_content or '')}\nstarts_with_brace: {str(extracted_content.strip().startswith('{') if extracted_content else False)}\n")
|
||||
with open(os.path.join(debug_dir, "extracted_content.txt"), "w", encoding="utf-8") as f:
|
||||
f.write(extracted_content or "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Get the appropriate renderer for the format
|
||||
renderer = self._getFormatRenderer(output_format)
|
||||
if not renderer:
|
||||
raise ValueError(f"Unsupported output format: {output_format}")
|
||||
|
||||
# Render the content
|
||||
rendered_content, mime_type = await renderer.render(extracted_content, title)
|
||||
# DEBUG: dump rendered output
|
||||
try:
|
||||
import os
|
||||
with open(os.path.join(debug_dir, "rendered_output.txt"), "w", encoding="utf-8") as f:
|
||||
f.write(rendered_content or "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logger.info(f"Successfully rendered report to {output_format} format: {len(rendered_content)} characters")
|
||||
return rendered_content, mime_type
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error rendering report to {output_format}: {str(e)}")
|
||||
raise
|
||||
|
||||
def getExtractionPrompt(self, output_format: str, user_prompt: str, title: str) -> str:
|
||||
"""
|
||||
Get the format-specific extraction prompt for AI content extraction.
|
||||
|
||||
Args:
|
||||
output_format: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
|
||||
user_prompt: User's original prompt for report generation
|
||||
title: Report title
|
||||
|
||||
Returns:
|
||||
str: Format-specific prompt for AI extraction
|
||||
"""
|
||||
try:
|
||||
# Get the appropriate renderer for the format
|
||||
renderer = self._getFormatRenderer(output_format)
|
||||
if not renderer:
|
||||
raise ValueError(f"Unsupported output format: {output_format}")
|
||||
|
||||
# Build centralized prompt with generic rules + format-specific guidelines
|
||||
from .prompt_builder import buildExtractionPrompt
|
||||
extraction_prompt = buildExtractionPrompt(
|
||||
output_format=output_format,
|
||||
renderer=renderer,
|
||||
user_prompt=user_prompt,
|
||||
title=title
|
||||
)
|
||||
|
||||
logger.info(f"Generated {output_format}-specific extraction prompt: {len(extraction_prompt)} characters")
|
||||
return extraction_prompt
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting extraction prompt for {output_format}: {str(e)}")
|
||||
raise
|
||||
|
||||
def _getFormatRenderer(self, output_format: str):
|
||||
"""Get the appropriate renderer for the specified format using auto-discovery."""
|
||||
try:
|
||||
from .renderers.registry import get_renderer
|
||||
renderer = get_renderer(output_format)
|
||||
|
||||
if renderer:
|
||||
return renderer
|
||||
|
||||
# Fallback to text renderer if no specific renderer found
|
||||
logger.warning(f"No renderer found for format {output_format}, falling back to text")
|
||||
fallback_renderer = get_renderer('text')
|
||||
if fallback_renderer:
|
||||
return fallback_renderer
|
||||
|
||||
logger.error("Even text renderer fallback failed")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting renderer for {output_format}: {str(e)}")
|
||||
return None
|
||||
72
modules/services/serviceGeneration/prompt_builder.py
Normal file
72
modules/services/serviceGeneration/prompt_builder.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
"""
|
||||
Centralized prompt builder for document generation across formats.
|
||||
|
||||
Builds a robust prompt that:
|
||||
- Accepts any user intent (no fixed structure assumptions)
|
||||
- Injects format-specific guidelines from the selected renderer
|
||||
- Adds a common policy section to always use real data from source docs
|
||||
- Requires the AI to output a filename header that we can parse and use
|
||||
"""
|
||||
|
||||
from typing import Protocol
|
||||
|
||||
|
||||
class _RendererLike(Protocol):
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str: # returns only format-specific guidelines
|
||||
...
|
||||
|
||||
|
||||
def buildExtractionPrompt(
|
||||
output_format: str,
|
||||
renderer: _RendererLike,
|
||||
user_prompt: str,
|
||||
title: str
|
||||
) -> str:
|
||||
"""
|
||||
Build the final extraction prompt by combining:
|
||||
- The raw user prompt (verbatim)
|
||||
- Generic cross-format instructions (filename header + real-data policy)
|
||||
- Format-specific guidelines snippet provided by the renderer
|
||||
|
||||
The AI must place a single filename header at the very top:
|
||||
FILENAME: <safe-file-name-with-extension>
|
||||
followed by a blank line and then ONLY the document content according to the target format.
|
||||
"""
|
||||
|
||||
format_guidelines = renderer.getExtractionPrompt(user_prompt, title)
|
||||
|
||||
# Generic block appears once for every format
|
||||
generic_intro = f"""
|
||||
{user_prompt}
|
||||
|
||||
You are generating a document in {output_format.upper()} format for the title: "{title}".
|
||||
|
||||
Rules:
|
||||
- The user's intent fully defines the structure. Do not assume a fixed template or headings.
|
||||
- Use only factual information extracted from the supplied source documents.
|
||||
- Do not invent, hallucinate, or include placeholders (e.g., "lorem ipsum", "TBD").
|
||||
- The output must strictly follow the target format and be ready for saving without extra wrapping.
|
||||
- At the VERY TOP output exactly one line with the filename header:
|
||||
FILENAME: <safe-file-name-with-extension>
|
||||
- The base name should be short, descriptive, and kebab-case or snake-case without spaces.
|
||||
- Include the correct extension for the requested format (e.g., .html, .pdf, .docx, .md, .txt, .json, .csv, .xlsx).
|
||||
- Avoid special characters beyond [a-zA-Z0-9-_].
|
||||
- After this header, insert a single blank line and then provide ONLY the document content.
|
||||
|
||||
Common policy:
|
||||
- Use the actual data from the source documents to create the content.
|
||||
- Do not generate placeholder text or templates.
|
||||
- Extract and use the real data provided in the source documents to create meaningful content.
|
||||
""".strip()
|
||||
|
||||
# Final assembly
|
||||
final_prompt = (
|
||||
generic_intro
|
||||
+ "\n\nFORMAT-SPECIFIC GUIDELINES:\n"
|
||||
+ format_guidelines.strip()
|
||||
+ "\n\nGenerate the complete document content now based on the source documents below:"
|
||||
)
|
||||
|
||||
return final_prompt
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
"""
|
||||
Base renderer class for all format renderers.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class BaseRenderer(ABC):
|
||||
"""Base class for all format renderers."""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logger
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""
|
||||
Return list of supported format names for this renderer.
|
||||
Override this method in subclasses to specify supported formats.
|
||||
"""
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""
|
||||
Return list of format aliases for this renderer.
|
||||
Override this method in subclasses to specify format aliases.
|
||||
"""
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""
|
||||
Return priority for this renderer (higher number = higher priority).
|
||||
Used when multiple renderers support the same format.
|
||||
"""
|
||||
return 0
|
||||
|
||||
@abstractmethod
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""
|
||||
Get the format-specific extraction prompt for AI content extraction.
|
||||
|
||||
Args:
|
||||
user_prompt: User's original prompt for report generation
|
||||
title: Report title
|
||||
|
||||
Returns:
|
||||
str: Format-specific prompt for AI extraction
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||
"""
|
||||
Render extracted content to the target format.
|
||||
|
||||
Args:
|
||||
extracted_content: Raw content extracted by AI using format-specific prompt
|
||||
title: Report title
|
||||
|
||||
Returns:
|
||||
tuple: (rendered_content, mime_type)
|
||||
"""
|
||||
pass
|
||||
|
||||
def _extract_sections(self, report_data: Dict[str, Any]) -> list:
|
||||
"""Extract sections from report data."""
|
||||
return report_data.get('sections', [])
|
||||
|
||||
def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract metadata from report data."""
|
||||
return report_data.get('metadata', {})
|
||||
|
||||
def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
|
||||
"""Get title from report data or use fallback."""
|
||||
return report_data.get('title', fallback_title)
|
||||
|
||||
def _format_timestamp(self, timestamp: str = None) -> str:
|
||||
"""Format timestamp for display."""
|
||||
if timestamp:
|
||||
return timestamp
|
||||
from datetime import datetime, UTC
|
||||
return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
|
||||
64
modules/services/serviceGeneration/renderers/csv_renderer.py
Normal file
64
modules/services/serviceGeneration/renderers/csv_renderer.py
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
"""
|
||||
CSV renderer for report generation.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import csv
|
||||
import io
|
||||
|
||||
class CsvRenderer(BaseRenderer):
|
||||
"""Renders content to CSV format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported CSV formats."""
|
||||
return ['csv']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['spreadsheet', 'table']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for CSV renderer."""
|
||||
return 70
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only CSV-specific guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"CSV FORMAT GUIDELINES:\n"
|
||||
"- Emit ONLY CSV text without fences or commentary.\n"
|
||||
"- Include a single header row with clear column names.\n"
|
||||
"- Quote fields containing commas, quotes, or newlines; escape quotes by doubling them.\n"
|
||||
"- Use rows to represent items/records derived from sources.\n"
|
||||
"- Keep cells concise; include units in headers when useful.\n"
|
||||
"OUTPUT: Return ONLY valid CSV content that can be imported."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||
"""Render extracted content to CSV format."""
|
||||
try:
|
||||
# The extracted content should already be CSV from the AI
|
||||
# Just clean it up
|
||||
csv_content = self._clean_csv_content(extracted_content, title)
|
||||
|
||||
return csv_content, "text/csv"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering CSV: {str(e)}")
|
||||
# Return minimal CSV fallback
|
||||
return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
|
||||
|
||||
def _clean_csv_content(self, content: str, title: str) -> str:
|
||||
"""Clean and validate CSV content from AI."""
|
||||
content = content.strip()
|
||||
|
||||
# Remove markdown code blocks if present
|
||||
if content.startswith("```") and content.endswith("```"):
|
||||
lines = content.split('\n')
|
||||
if len(lines) > 2:
|
||||
content = '\n'.join(lines[1:-1]).strip()
|
||||
|
||||
return content
|
||||
249
modules/services/serviceGeneration/renderers/docx_renderer.py
Normal file
249
modules/services/serviceGeneration/renderers/docx_renderer.py
Normal file
|
|
@ -0,0 +1,249 @@
|
|||
"""
|
||||
DOCX renderer for report generation using python-docx.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import io
|
||||
import base64
|
||||
from datetime import datetime, UTC
|
||||
|
||||
try:
|
||||
from docx import Document
|
||||
from docx.shared import Inches, Pt
|
||||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||
from docx.enum.table import WD_TABLE_ALIGNMENT
|
||||
from docx.oxml.shared import OxmlElement, qn
|
||||
from docx.oxml.ns import nsdecls
|
||||
from docx.oxml import parse_xml
|
||||
DOCX_AVAILABLE = True
|
||||
except ImportError:
|
||||
DOCX_AVAILABLE = False
|
||||
|
||||
class DocxRenderer(BaseRenderer):
|
||||
"""Renders content to DOCX format using python-docx."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported DOCX formats."""
|
||||
return ['docx', 'doc']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['word', 'document']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for DOCX renderer."""
|
||||
return 115
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only DOCX-specific guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"DOCX FORMAT GUIDELINES:\n"
|
||||
"- Provide plain text content suitable for Word generation (no markdown/HTML).\n"
|
||||
"- Use clear section hierarchy; bullet and numbered lists where needed.\n"
|
||||
"- Include tables as simple pipe-delimited lines if tabular data is needed.\n"
|
||||
"OUTPUT: Return ONLY the structured plain text to be converted into DOCX."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||
"""Render extracted content to DOCX format."""
|
||||
try:
|
||||
if not DOCX_AVAILABLE:
|
||||
# Fallback to HTML if python-docx not available
|
||||
from .html_renderer import HtmlRenderer
|
||||
html_renderer = HtmlRenderer()
|
||||
html_content, _ = await html_renderer.render(extracted_content, title)
|
||||
return html_content, "text/html"
|
||||
|
||||
# Generate DOCX using python-docx
|
||||
docx_content = self._generate_docx(extracted_content, title)
|
||||
|
||||
return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering DOCX: {str(e)}")
|
||||
# Return minimal fallback
|
||||
return f"DOCX Generation Error: {str(e)}", "text/plain"
|
||||
|
||||
def _generate_docx(self, content: str, title: str) -> str:
|
||||
"""Generate DOCX content using python-docx."""
|
||||
try:
|
||||
# Create new document
|
||||
doc = Document()
|
||||
|
||||
# Set up document styles
|
||||
self._setup_document_styles(doc)
|
||||
|
||||
# Add title
|
||||
title_para = doc.add_heading(title, 0)
|
||||
title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
|
||||
# Add generation date
|
||||
date_para = doc.add_paragraph(f"Generated: {self._format_timestamp()}")
|
||||
date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
|
||||
# Add page break
|
||||
doc.add_page_break()
|
||||
|
||||
# Process content
|
||||
lines = content.split('\n')
|
||||
current_section = []
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Check for ALL CAPS headings (major headings)
|
||||
if line.isupper() and len(line) > 3 and not line.startswith('-') and not line.startswith('*'):
|
||||
if current_section:
|
||||
self._process_section(doc, current_section)
|
||||
current_section = []
|
||||
doc.add_heading(line, level=1)
|
||||
# Check for Title Case headings (subheadings)
|
||||
elif line.istitle() and len(line) > 5 and not line.startswith('-') and not line.startswith('*') and not line.startswith(('1.', '2.', '3.', '4.', '5.')):
|
||||
if current_section:
|
||||
self._process_section(doc, current_section)
|
||||
current_section = []
|
||||
doc.add_heading(line, level=2)
|
||||
# Check for markdown headings (fallback)
|
||||
elif line.startswith('# '):
|
||||
# H1 heading
|
||||
if current_section:
|
||||
self._process_section(doc, current_section)
|
||||
current_section = []
|
||||
doc.add_heading(line[2:], level=1)
|
||||
elif line.startswith('## '):
|
||||
# H2 heading
|
||||
if current_section:
|
||||
self._process_section(doc, current_section)
|
||||
current_section = []
|
||||
doc.add_heading(line[3:], level=2)
|
||||
elif line.startswith('### '):
|
||||
# H3 heading
|
||||
if current_section:
|
||||
self._process_section(doc, current_section)
|
||||
current_section = []
|
||||
doc.add_heading(line[4:], level=3)
|
||||
else:
|
||||
current_section.append(line)
|
||||
|
||||
# Process remaining content
|
||||
if current_section:
|
||||
self._process_section(doc, current_section)
|
||||
|
||||
# Save to buffer
|
||||
buffer = io.BytesIO()
|
||||
doc.save(buffer)
|
||||
buffer.seek(0)
|
||||
|
||||
# Convert to base64
|
||||
docx_bytes = buffer.getvalue()
|
||||
docx_base64 = base64.b64encode(docx_bytes).decode('utf-8')
|
||||
|
||||
return docx_base64
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating DOCX: {str(e)}")
|
||||
raise
|
||||
|
||||
def _setup_document_styles(self, doc):
|
||||
"""Set up document styles."""
|
||||
try:
|
||||
# Set default font
|
||||
style = doc.styles['Normal']
|
||||
font = style.font
|
||||
font.name = 'Calibri'
|
||||
font.size = Pt(11)
|
||||
|
||||
# Set heading styles
|
||||
for i in range(1, 4):
|
||||
heading_style = doc.styles[f'Heading {i}']
|
||||
heading_font = heading_style.font
|
||||
heading_font.name = 'Calibri'
|
||||
heading_font.size = Pt(16 - i * 2)
|
||||
heading_font.bold = True
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not set up document styles: {str(e)}")
|
||||
|
||||
def _process_section(self, doc, lines: list):
|
||||
"""Process a section of content into DOCX elements."""
|
||||
for line in lines:
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
# Check for tables (lines with |)
|
||||
if '|' in line and not line.startswith('|'):
|
||||
# This might be part of a table, process as table
|
||||
table_data = self._extract_table_data(lines)
|
||||
if table_data:
|
||||
self._add_table(doc, table_data)
|
||||
return
|
||||
|
||||
# Check for lists
|
||||
if line.startswith('- ') or line.startswith('* '):
|
||||
# This is a list item
|
||||
doc.add_paragraph(line[2:], style='List Bullet')
|
||||
elif line.startswith(('1. ', '2. ', '3. ', '4. ', '5. ')):
|
||||
# This is a numbered list item
|
||||
doc.add_paragraph(line[3:], style='List Number')
|
||||
else:
|
||||
# Regular paragraph
|
||||
doc.add_paragraph(line)
|
||||
|
||||
def _extract_table_data(self, lines: list) -> list:
|
||||
"""Extract table data from lines."""
|
||||
table_data = []
|
||||
in_table = False
|
||||
|
||||
for line in lines:
|
||||
if '|' in line:
|
||||
if not in_table:
|
||||
in_table = True
|
||||
# Split by | and clean up
|
||||
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
|
||||
if cells:
|
||||
table_data.append(cells)
|
||||
elif in_table and not line.strip():
|
||||
# Empty line, might be end of table
|
||||
break
|
||||
|
||||
return table_data if len(table_data) > 1 else []
|
||||
|
||||
def _add_table(self, doc, table_data: list):
|
||||
"""Add a table to the document."""
|
||||
try:
|
||||
if not table_data:
|
||||
return
|
||||
|
||||
# Create table
|
||||
table = doc.add_table(rows=len(table_data), cols=len(table_data[0]))
|
||||
table.alignment = WD_TABLE_ALIGNMENT.CENTER
|
||||
|
||||
# Add data to table
|
||||
for row_idx, row_data in enumerate(table_data):
|
||||
for col_idx, cell_data in enumerate(row_data):
|
||||
if col_idx < len(table.rows[row_idx].cells):
|
||||
table.rows[row_idx].cells[col_idx].text = cell_data
|
||||
|
||||
# Style the table
|
||||
self._style_table(table)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not add table: {str(e)}")
|
||||
|
||||
def _style_table(self, table):
|
||||
"""Apply styling to the table."""
|
||||
try:
|
||||
# Style header row
|
||||
if len(table.rows) > 0:
|
||||
header_cells = table.rows[0].cells
|
||||
for cell in header_cells:
|
||||
for paragraph in cell.paragraphs:
|
||||
for run in paragraph.runs:
|
||||
run.bold = True
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not style table: {str(e)}")
|
||||
210
modules/services/serviceGeneration/renderers/excel_renderer.py
Normal file
210
modules/services/serviceGeneration/renderers/excel_renderer.py
Normal file
|
|
@ -0,0 +1,210 @@
|
|||
"""
|
||||
Excel renderer for report generation using openpyxl.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import io
|
||||
import base64
|
||||
from datetime import datetime, UTC
|
||||
|
||||
try:
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
||||
from openpyxl.utils import get_column_letter
|
||||
from openpyxl.worksheet.table import Table, TableStyleInfo
|
||||
OPENPYXL_AVAILABLE = True
|
||||
except ImportError:
|
||||
OPENPYXL_AVAILABLE = False
|
||||
|
||||
class ExcelRenderer(BaseRenderer):
|
||||
"""Renders content to Excel format using openpyxl."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported Excel formats."""
|
||||
return ['xlsx', 'xls', 'excel']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['spreadsheet', 'workbook']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for Excel renderer."""
|
||||
return 110
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only Excel-specific guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"EXCEL FORMAT GUIDELINES:\n"
|
||||
"- Output one or more pipe-delimited tables with a single header row.\n"
|
||||
"- Let user intent define columns; use clear names and ISO dates.\n"
|
||||
"- Separate multiple tables by a single blank line.\n"
|
||||
"- No markdown/HTML/code fences; tables only unless user explicitly asks for notes.\n"
|
||||
"OUTPUT: Return ONLY pipe-delimited tables suitable for import."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||
"""Render extracted content to Excel format."""
|
||||
try:
|
||||
if not OPENPYXL_AVAILABLE:
|
||||
# Fallback to CSV if openpyxl not available
|
||||
from .csv_renderer import CsvRenderer
|
||||
csv_renderer = CsvRenderer()
|
||||
csv_content, _ = await csv_renderer.render(extracted_content, title)
|
||||
return csv_content, "text/csv"
|
||||
|
||||
# Generate Excel using openpyxl
|
||||
excel_content = self._generate_excel(extracted_content, title)
|
||||
|
||||
return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering Excel: {str(e)}")
|
||||
# Return CSV fallback
|
||||
return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
|
||||
|
||||
def _generate_excel(self, content: str, title: str) -> str:
|
||||
"""Generate Excel content using openpyxl."""
|
||||
try:
|
||||
# Create workbook
|
||||
wb = Workbook()
|
||||
|
||||
# Remove default sheet
|
||||
wb.remove(wb.active)
|
||||
|
||||
# Create sheets
|
||||
summary_sheet = wb.create_sheet("Summary", 0)
|
||||
data_sheet = wb.create_sheet("Data", 1)
|
||||
analysis_sheet = wb.create_sheet("Analysis", 2)
|
||||
|
||||
# Add content to sheets
|
||||
self._populate_summary_sheet(summary_sheet, title)
|
||||
self._populate_data_sheet(data_sheet, content)
|
||||
self._populate_analysis_sheet(analysis_sheet, content)
|
||||
|
||||
# Save to buffer
|
||||
buffer = io.BytesIO()
|
||||
wb.save(buffer)
|
||||
buffer.seek(0)
|
||||
|
||||
# Convert to base64
|
||||
excel_bytes = buffer.getvalue()
|
||||
excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
|
||||
|
||||
return excel_base64
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating Excel: {str(e)}")
|
||||
raise
|
||||
|
||||
def _populate_summary_sheet(self, sheet, title: str):
|
||||
"""Populate the summary sheet."""
|
||||
try:
|
||||
# Title
|
||||
sheet['A1'] = title
|
||||
sheet['A1'].font = Font(size=16, bold=True)
|
||||
sheet['A1'].alignment = Alignment(horizontal='center')
|
||||
|
||||
# Generation info
|
||||
sheet['A3'] = "Generated:"
|
||||
sheet['B3'] = self._format_timestamp()
|
||||
sheet['A4'] = "Status:"
|
||||
sheet['B4'] = "Generated Successfully"
|
||||
|
||||
# Key metrics placeholder
|
||||
sheet['A6'] = "Key Metrics:"
|
||||
sheet['A6'].font = Font(bold=True)
|
||||
sheet['A7'] = "Total Items:"
|
||||
sheet['B7'] = "=COUNTA(Data!A:A)-1" # Count non-empty cells in Data sheet
|
||||
|
||||
# Auto-adjust column widths
|
||||
sheet.column_dimensions['A'].width = 20
|
||||
sheet.column_dimensions['B'].width = 30
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not populate summary sheet: {str(e)}")
|
||||
|
||||
def _populate_data_sheet(self, sheet, content: str):
|
||||
"""Populate the data sheet."""
|
||||
try:
|
||||
# Headers
|
||||
headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
|
||||
for col, header in enumerate(headers, 1):
|
||||
cell = sheet.cell(row=1, column=col, value=header)
|
||||
cell.font = Font(bold=True)
|
||||
cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid")
|
||||
|
||||
# Process content
|
||||
lines = content.split('\n')
|
||||
row = 2
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Check for table data (lines with |)
|
||||
if '|' in line:
|
||||
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
|
||||
for col, cell_data in enumerate(cells[:5], 1): # Limit to 5 columns
|
||||
sheet.cell(row=row, column=col, value=cell_data)
|
||||
row += 1
|
||||
else:
|
||||
# Regular content
|
||||
sheet.cell(row=row, column=1, value=line)
|
||||
row += 1
|
||||
|
||||
# Auto-adjust column widths
|
||||
for col in range(1, 6):
|
||||
sheet.column_dimensions[get_column_letter(col)].width = 20
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not populate data sheet: {str(e)}")
|
||||
|
||||
def _populate_analysis_sheet(self, sheet, content: str):
|
||||
"""Populate the analysis sheet."""
|
||||
try:
|
||||
# Title
|
||||
sheet['A1'] = "Analysis & Insights"
|
||||
sheet['A1'].font = Font(size=14, bold=True)
|
||||
|
||||
# Content analysis
|
||||
lines = content.split('\n')
|
||||
row = 3
|
||||
|
||||
sheet['A3'] = "Content Analysis:"
|
||||
sheet['A3'].font = Font(bold=True)
|
||||
row += 1
|
||||
|
||||
# Count different types of content
|
||||
table_lines = sum(1 for line in lines if '|' in line)
|
||||
list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
|
||||
text_lines = len(lines) - table_lines - list_lines
|
||||
|
||||
sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
|
||||
row += 1
|
||||
sheet[f'A{row}'] = f"Table Rows: {table_lines}"
|
||||
row += 1
|
||||
sheet[f'A{row}'] = f"List Items: {list_lines}"
|
||||
row += 1
|
||||
sheet[f'A{row}'] = f"Text Lines: {text_lines}"
|
||||
row += 2
|
||||
|
||||
# Recommendations
|
||||
sheet[f'A{row}'] = "Recommendations:"
|
||||
sheet[f'A{row}'].font = Font(bold=True)
|
||||
row += 1
|
||||
sheet[f'A{row}'] = "1. Review data accuracy"
|
||||
row += 1
|
||||
sheet[f'A{row}'] = "2. Consider additional analysis"
|
||||
row += 1
|
||||
sheet[f'A{row}'] = "3. Update regularly"
|
||||
|
||||
# Auto-adjust column width
|
||||
sheet.column_dimensions['A'].width = 30
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
"""
|
||||
HTML renderer for report generation.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
|
||||
class HtmlRenderer(BaseRenderer):
|
||||
"""Renders content to HTML format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported HTML formats."""
|
||||
return ['html', 'htm']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['web', 'webpage']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for HTML renderer."""
|
||||
return 100
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only HTML-specific guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"HTML FORMAT GUIDELINES:\n"
|
||||
"- Output a complete HTML5 document starting with <!DOCTYPE html>.\n"
|
||||
"- Include <html>, <head> with <meta charset=\"UTF-8\"> and <title>, and <body>.\n"
|
||||
"- Use semantic elements: <header>, <main>, <section>, <article>, <footer>.\n"
|
||||
"- Provide professional CSS in a <style> block; responsive, clean typography.\n"
|
||||
"- Use h1/h2/h3 for headings; tables and lists for structure.\n"
|
||||
"OUTPUT: Return ONLY valid HTML (no markdown, no code fences)."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||
"""Render extracted content to HTML format."""
|
||||
try:
|
||||
# The extracted content should already be HTML from the AI
|
||||
# Just clean it up and ensure it's valid
|
||||
html_content = self._clean_html_content(extracted_content, title)
|
||||
|
||||
return html_content, "text/html"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering HTML: {str(e)}")
|
||||
# Return minimal HTML fallback
|
||||
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
|
||||
|
||||
def _clean_html_content(self, content: str, title: str) -> str:
|
||||
"""Clean and validate HTML content from AI."""
|
||||
content = content.strip()
|
||||
|
||||
# Remove markdown code blocks if present
|
||||
if content.startswith("```") and content.endswith("```"):
|
||||
lines = content.split('\n')
|
||||
if len(lines) > 2:
|
||||
content = '\n'.join(lines[1:-1]).strip()
|
||||
|
||||
# Ensure it starts with DOCTYPE
|
||||
if not content.startswith('<!DOCTYPE'):
|
||||
if content.startswith('<html'):
|
||||
content = '<!DOCTYPE html>\n' + content
|
||||
else:
|
||||
content = f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>{title}</title></head>\n<body>\n{content}\n</body>\n</html>'
|
||||
|
||||
return content
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
"""
|
||||
JSON renderer for report generation.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import json
|
||||
|
||||
class JsonRenderer(BaseRenderer):
|
||||
"""Renders content to JSON format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported JSON formats."""
|
||||
return ['json']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['data']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for JSON renderer."""
|
||||
return 80
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only JSON-specific guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"JSON FORMAT GUIDELINES:\n"
|
||||
"- Output ONLY a single valid JSON object (no fences, no pre/post text).\n"
|
||||
"- Choose a structure that best fits the user's intent; include a top-level title and data.\n"
|
||||
"- Prefer arrays/objects that map cleanly to the extracted facts.\n"
|
||||
"- Include minimal metadata only if useful (e.g., generatedAt, sources).\n"
|
||||
"OUTPUT: Return ONLY valid, parseable JSON."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||
"""Render extracted content to JSON format."""
|
||||
try:
|
||||
# The extracted content should already be JSON from the AI
|
||||
# Just validate and format it
|
||||
json_content = self._clean_json_content(extracted_content, title)
|
||||
|
||||
return json_content, "application/json"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering JSON: {str(e)}")
|
||||
# Return minimal JSON fallback
|
||||
fallback_data = {
|
||||
"title": title,
|
||||
"sections": [{"type": "text", "content": f"Error rendering report: {str(e)}"}],
|
||||
"metadata": {"error": str(e)}
|
||||
}
|
||||
return json.dumps(fallback_data, indent=2), "application/json"
|
||||
|
||||
def _clean_json_content(self, content: str, title: str) -> str:
|
||||
"""Clean and validate JSON content from AI."""
|
||||
content = content.strip()
|
||||
|
||||
# Remove markdown code blocks if present
|
||||
if content.startswith("```") and content.endswith("```"):
|
||||
lines = content.split('\n')
|
||||
if len(lines) > 2:
|
||||
content = '\n'.join(lines[1:-1]).strip()
|
||||
|
||||
# Validate JSON
|
||||
try:
|
||||
parsed = json.loads(content)
|
||||
# Re-format with proper indentation
|
||||
return json.dumps(parsed, indent=2, ensure_ascii=False)
|
||||
except json.JSONDecodeError:
|
||||
# If not valid JSON, return as-is
|
||||
return content
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
"""
|
||||
Markdown renderer for report generation.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
|
||||
class MarkdownRenderer(BaseRenderer):
|
||||
"""Renders content to Markdown format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported Markdown formats."""
|
||||
return ['md', 'markdown']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['mdown', 'mkd']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for markdown renderer."""
|
||||
return 95
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only Markdown-specific guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"MARKDOWN FORMAT GUIDELINES:\n"
|
||||
"- Use proper Markdown syntax only (no HTML wrappers).\n"
|
||||
"- # for main title, ## for sections, ### for subsections.\n"
|
||||
"- Tables with | separators and a header row.\n"
|
||||
"- Bullet lists with - or *.\n"
|
||||
"- Emphasis with **bold** and *italic*.\n"
|
||||
"- Code blocks with ```language.\n"
|
||||
"- Horizontal rules (---) to separate major sections when helpful.\n"
|
||||
"- Include links [text](url) and images  when referenced by sources.\n"
|
||||
"OUTPUT: Return ONLY raw Markdown content without code fences."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||
"""Render extracted content to Markdown format."""
|
||||
try:
|
||||
# The extracted content should already be Markdown from the AI
|
||||
# Just clean it up
|
||||
markdown_content = self._clean_markdown_content(extracted_content, title)
|
||||
|
||||
return markdown_content, "text/markdown"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering markdown: {str(e)}")
|
||||
# Return minimal markdown fallback
|
||||
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
|
||||
|
||||
def _clean_markdown_content(self, content: str, title: str) -> str:
|
||||
"""Clean and validate Markdown content from AI."""
|
||||
content = content.strip()
|
||||
|
||||
# Remove markdown code blocks if present
|
||||
if content.startswith("```") and content.endswith("```"):
|
||||
lines = content.split('\n')
|
||||
if len(lines) > 2:
|
||||
content = '\n'.join(lines[1:-1]).strip()
|
||||
|
||||
return content
|
||||
225
modules/services/serviceGeneration/renderers/pdf_renderer.py
Normal file
225
modules/services/serviceGeneration/renderers/pdf_renderer.py
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
"""
|
||||
PDF renderer for report generation using reportlab.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
import io
|
||||
import base64
|
||||
from datetime import datetime, UTC
|
||||
|
||||
try:
|
||||
from reportlab.lib.pagesizes import letter, A4
|
||||
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
||||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
||||
from reportlab.lib.units import inch
|
||||
from reportlab.lib import colors
|
||||
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
|
||||
REPORTLAB_AVAILABLE = True
|
||||
except ImportError:
|
||||
REPORTLAB_AVAILABLE = False
|
||||
|
||||
class PdfRenderer(BaseRenderer):
|
||||
"""Renders content to PDF format using reportlab."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported PDF formats."""
|
||||
return ['pdf']
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return ['document', 'print']
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for PDF renderer."""
|
||||
return 120
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only PDF-specific guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"PDF FORMAT GUIDELINES:\n"
|
||||
"- Provide structured content suitable for pagination and headings (H1/H2/H3-like).\n"
|
||||
"- Use bullet lists and tables where useful; separate major sections clearly.\n"
|
||||
"- Avoid markdown/HTML; produce clean, plain content that can be laid out as PDF.\n"
|
||||
"OUTPUT: Return ONLY the PDF-ready textual content (no fences)."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||
"""Render extracted content to PDF format."""
|
||||
try:
|
||||
if not REPORTLAB_AVAILABLE:
|
||||
# Fallback to HTML if reportlab not available
|
||||
from .html_renderer import HtmlRenderer
|
||||
html_renderer = HtmlRenderer()
|
||||
html_content, _ = await html_renderer.render(extracted_content, title)
|
||||
return html_content, "text/html"
|
||||
|
||||
# Generate PDF using reportlab
|
||||
pdf_content = self._generate_pdf(extracted_content, title)
|
||||
|
||||
return pdf_content, "application/pdf"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering PDF: {str(e)}")
|
||||
# Return minimal fallback
|
||||
return f"PDF Generation Error: {str(e)}", "text/plain"
|
||||
|
||||
def _generate_pdf(self, content: str, title: str) -> str:
|
||||
"""Generate PDF content using reportlab."""
|
||||
try:
|
||||
# Create a buffer to hold the PDF
|
||||
buffer = io.BytesIO()
|
||||
|
||||
# Create PDF document
|
||||
doc = SimpleDocTemplate(
|
||||
buffer,
|
||||
pagesize=A4,
|
||||
rightMargin=72,
|
||||
leftMargin=72,
|
||||
topMargin=72,
|
||||
bottomMargin=18
|
||||
)
|
||||
|
||||
# Get styles
|
||||
styles = getSampleStyleSheet()
|
||||
|
||||
# Create custom styles
|
||||
title_style = ParagraphStyle(
|
||||
'CustomTitle',
|
||||
parent=styles['Heading1'],
|
||||
fontSize=24,
|
||||
spaceAfter=30,
|
||||
alignment=TA_CENTER,
|
||||
textColor=colors.darkblue
|
||||
)
|
||||
|
||||
heading_style = ParagraphStyle(
|
||||
'CustomHeading',
|
||||
parent=styles['Heading2'],
|
||||
fontSize=16,
|
||||
spaceAfter=12,
|
||||
spaceBefore=12,
|
||||
textColor=colors.darkblue
|
||||
)
|
||||
|
||||
# Build PDF content
|
||||
story = []
|
||||
|
||||
# Title page
|
||||
story.append(Paragraph(title, title_style))
|
||||
story.append(Spacer(1, 20))
|
||||
story.append(Paragraph(f"Generated: {self._format_timestamp()}", styles['Normal']))
|
||||
story.append(PageBreak())
|
||||
|
||||
# Process content
|
||||
lines = content.split('\n')
|
||||
current_section = []
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Check for headings
|
||||
if line.startswith('# '):
|
||||
# H1 heading
|
||||
if current_section:
|
||||
story.extend(self._process_section(current_section, styles))
|
||||
current_section = []
|
||||
story.append(Paragraph(line[2:], title_style))
|
||||
story.append(Spacer(1, 12))
|
||||
elif line.startswith('## '):
|
||||
# H2 heading
|
||||
if current_section:
|
||||
story.extend(self._process_section(current_section, styles))
|
||||
current_section = []
|
||||
story.append(Paragraph(line[3:], heading_style))
|
||||
story.append(Spacer(1, 8))
|
||||
elif line.startswith('### '):
|
||||
# H3 heading
|
||||
if current_section:
|
||||
story.extend(self._process_section(current_section, styles))
|
||||
current_section = []
|
||||
story.append(Paragraph(line[4:], styles['Heading3']))
|
||||
story.append(Spacer(1, 6))
|
||||
else:
|
||||
current_section.append(line)
|
||||
|
||||
# Process remaining content
|
||||
if current_section:
|
||||
story.extend(self._process_section(current_section, styles))
|
||||
|
||||
# Build PDF
|
||||
doc.build(story)
|
||||
|
||||
# Get PDF content as base64
|
||||
buffer.seek(0)
|
||||
pdf_bytes = buffer.getvalue()
|
||||
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
|
||||
|
||||
return pdf_base64
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating PDF: {str(e)}")
|
||||
raise
|
||||
|
||||
def _process_section(self, lines: list, styles) -> list:
|
||||
"""Process a section of content into PDF elements."""
|
||||
elements = []
|
||||
|
||||
for line in lines:
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
# Check for tables (lines with |)
|
||||
if '|' in line and not line.startswith('|'):
|
||||
# This might be part of a table, process as table
|
||||
table_data = self._extract_table_data(lines)
|
||||
if table_data:
|
||||
table = Table(table_data)
|
||||
table.setStyle(TableStyle([
|
||||
('BACKGROUND', (0, 0), (-1, 0), colors.grey),
|
||||
('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
|
||||
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
|
||||
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
||||
('FONTSIZE', (0, 0), (-1, 0), 14),
|
||||
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
||||
('BACKGROUND', (0, 1), (-1, -1), colors.beige),
|
||||
('GRID', (0, 0), (-1, -1), 1, colors.black)
|
||||
]))
|
||||
elements.append(table)
|
||||
elements.append(Spacer(1, 12))
|
||||
return elements
|
||||
|
||||
# Check for lists
|
||||
if line.startswith('- ') or line.startswith('* '):
|
||||
# This is a list item
|
||||
elements.append(Paragraph(f"• {line[2:]}", styles['Normal']))
|
||||
else:
|
||||
# Regular paragraph
|
||||
elements.append(Paragraph(line, styles['Normal']))
|
||||
|
||||
elements.append(Spacer(1, 6))
|
||||
return elements
|
||||
|
||||
def _extract_table_data(self, lines: list) -> list:
|
||||
"""Extract table data from lines."""
|
||||
table_data = []
|
||||
in_table = False
|
||||
|
||||
for line in lines:
|
||||
if '|' in line:
|
||||
if not in_table:
|
||||
in_table = True
|
||||
# Split by | and clean up
|
||||
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
|
||||
if cells:
|
||||
table_data.append(cells)
|
||||
elif in_table and not line.strip():
|
||||
# Empty line, might be end of table
|
||||
break
|
||||
|
||||
return table_data if len(table_data) > 1 else []
|
||||
157
modules/services/serviceGeneration/renderers/registry.py
Normal file
157
modules/services/serviceGeneration/renderers/registry.py
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
"""
|
||||
Renderer registry for automatic discovery and registration of renderers.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import importlib
|
||||
import pkgutil
|
||||
from typing import Dict, Type, List, Optional
|
||||
from .base_renderer import BaseRenderer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class RendererRegistry:
|
||||
"""Registry for automatic renderer discovery and management."""
|
||||
|
||||
def __init__(self):
|
||||
self._renderers: Dict[str, Type[BaseRenderer]] = {}
|
||||
self._format_mappings: Dict[str, str] = {}
|
||||
self._discovered = False
|
||||
|
||||
def discover_renderers(self) -> None:
|
||||
"""Automatically discover and register all renderers by scanning files."""
|
||||
if self._discovered:
|
||||
return
|
||||
|
||||
try:
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Get the directory containing this registry file
|
||||
current_dir = Path(__file__).parent
|
||||
renderers_dir = current_dir
|
||||
|
||||
# Get the package name dynamically
|
||||
package_name = __name__.rsplit('.', 1)[0]
|
||||
|
||||
# Scan all Python files in the renderers directory
|
||||
for file_path in renderers_dir.glob("*.py"):
|
||||
if file_path.name in ['registry.py', 'base_renderer.py', '__init__.py']:
|
||||
continue
|
||||
|
||||
# Extract module name from filename
|
||||
module_name = file_path.stem
|
||||
|
||||
try:
|
||||
# Import the module dynamically
|
||||
full_module_name = f"{package_name}.{module_name}"
|
||||
module = importlib.import_module(full_module_name)
|
||||
|
||||
# Look for renderer classes in the module
|
||||
for attr_name in dir(module):
|
||||
attr = getattr(module, attr_name)
|
||||
if (isinstance(attr, type) and
|
||||
issubclass(attr, BaseRenderer) and
|
||||
attr != BaseRenderer and
|
||||
hasattr(attr, 'get_supported_formats')):
|
||||
|
||||
# Register the renderer
|
||||
self._register_renderer_class(attr)
|
||||
logger.info(f"Discovered renderer: {attr.__name__} from {module_name}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not load renderer from {module_name}: {str(e)}")
|
||||
continue
|
||||
|
||||
self._discovered = True
|
||||
logger.info(f"Renderer discovery completed. Found {len(self._renderers)} renderers.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during renderer discovery: {str(e)}")
|
||||
self._discovered = True # Mark as discovered to avoid repeated attempts
|
||||
|
||||
def _register_renderer_class(self, renderer_class: Type[BaseRenderer]) -> None:
|
||||
"""Register a renderer class with its supported formats."""
|
||||
try:
|
||||
# Get supported formats from the renderer class
|
||||
supported_formats = renderer_class.get_supported_formats()
|
||||
|
||||
for format_name in supported_formats:
|
||||
# Register primary format
|
||||
self._renderers[format_name.lower()] = renderer_class
|
||||
|
||||
# Register aliases if any
|
||||
if hasattr(renderer_class, 'get_format_aliases'):
|
||||
aliases = renderer_class.get_format_aliases()
|
||||
for alias in aliases:
|
||||
self._format_mappings[alias.lower()] = format_name.lower()
|
||||
|
||||
logger.debug(f"Registered {renderer_class.__name__} for formats: {supported_formats}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error registering renderer {renderer_class.__name__}: {str(e)}")
|
||||
|
||||
def get_renderer(self, output_format: str) -> Optional[BaseRenderer]:
|
||||
"""Get a renderer instance for the specified format."""
|
||||
if not self._discovered:
|
||||
self.discover_renderers()
|
||||
|
||||
# Normalize format name
|
||||
format_name = output_format.lower().strip()
|
||||
|
||||
# Check for aliases first
|
||||
if format_name in self._format_mappings:
|
||||
format_name = self._format_mappings[format_name]
|
||||
|
||||
# Get renderer class
|
||||
renderer_class = self._renderers.get(format_name)
|
||||
|
||||
if renderer_class:
|
||||
try:
|
||||
return renderer_class()
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating renderer instance for {format_name}: {str(e)}")
|
||||
return None
|
||||
|
||||
logger.warning(f"No renderer found for format: {output_format}")
|
||||
return None
|
||||
|
||||
def get_supported_formats(self) -> List[str]:
|
||||
"""Get list of all supported formats."""
|
||||
if not self._discovered:
|
||||
self.discover_renderers()
|
||||
|
||||
formats = list(self._renderers.keys())
|
||||
formats.extend(self._format_mappings.keys())
|
||||
return sorted(set(formats))
|
||||
|
||||
def get_renderer_info(self) -> Dict[str, Dict[str, str]]:
|
||||
"""Get information about all registered renderers."""
|
||||
if not self._discovered:
|
||||
self.discover_renderers()
|
||||
|
||||
info = {}
|
||||
for format_name, renderer_class in self._renderers.items():
|
||||
info[format_name] = {
|
||||
'class_name': renderer_class.__name__,
|
||||
'module': renderer_class.__module__,
|
||||
'description': getattr(renderer_class, '__doc__', 'No description').strip().split('\n')[0] if renderer_class.__doc__ else 'No description'
|
||||
}
|
||||
|
||||
return info
|
||||
|
||||
# Global registry instance
|
||||
_registry = RendererRegistry()
|
||||
|
||||
def get_renderer(output_format: str) -> Optional[BaseRenderer]:
|
||||
"""Get a renderer instance for the specified format."""
|
||||
return _registry.get_renderer(output_format)
|
||||
|
||||
def get_supported_formats() -> List[str]:
|
||||
"""Get list of all supported formats."""
|
||||
return _registry.get_supported_formats()
|
||||
|
||||
def get_renderer_info() -> Dict[str, Dict[str, str]]:
|
||||
"""Get information about all registered renderers."""
|
||||
return _registry.get_renderer_info()
|
||||
|
|
@ -0,0 +1,94 @@
|
|||
"""
|
||||
Text renderer for report generation.
|
||||
"""
|
||||
|
||||
from .base_renderer import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
|
||||
class TextRenderer(BaseRenderer):
|
||||
"""Renders content to plain text format with format-specific extraction."""
|
||||
|
||||
@classmethod
|
||||
def get_supported_formats(cls) -> List[str]:
|
||||
"""Return supported text formats (excluding formats with dedicated renderers)."""
|
||||
return [
|
||||
'txt', 'text', 'plain',
|
||||
# Programming languages
|
||||
'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx',
|
||||
'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp',
|
||||
'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust',
|
||||
'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc',
|
||||
'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd',
|
||||
# Web technologies (excluding html/htm which have dedicated renderer)
|
||||
'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg',
|
||||
# Data formats (excluding csv, md/markdown which have dedicated renderers)
|
||||
'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore',
|
||||
# Configuration files
|
||||
'env', 'properties', 'conf', 'config', 'rc',
|
||||
'gitattributes', 'editorconfig', 'eslintrc',
|
||||
# Documentation
|
||||
'readme', 'changelog', 'license', 'authors',
|
||||
'contributing', 'todo', 'notes', 'docs'
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def get_format_aliases(cls) -> List[str]:
|
||||
"""Return format aliases."""
|
||||
return [
|
||||
'ascii', 'utf8', 'utf-8', 'code', 'source',
|
||||
'script', 'program', 'file', 'document',
|
||||
'raw', 'unformatted', 'plaintext'
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def get_priority(cls) -> int:
|
||||
"""Return priority for text renderer."""
|
||||
return 90
|
||||
|
||||
def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
|
||||
"""Return only plain-text guidelines; global prompt is built centrally."""
|
||||
return (
|
||||
"TEXT FORMAT GUIDELINES:\n"
|
||||
"- Output ONLY plain text (no markdown or HTML).\n"
|
||||
"- Use clear headings (you may underline with === or --- when helpful).\n"
|
||||
"- Use simple bullet lists with '-' and tables with '|' when needed.\n"
|
||||
"- Preserve indentation for code-like content if present.\n"
|
||||
"OUTPUT: Return ONLY the raw text content."
|
||||
)
|
||||
|
||||
async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
|
||||
"""Render extracted content to plain text format."""
|
||||
try:
|
||||
# The extracted content should already be formatted text from the AI
|
||||
# Just clean it up
|
||||
text_content = self._clean_text_content(extracted_content, title)
|
||||
|
||||
return text_content, "text/plain"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering text: {str(e)}")
|
||||
# Return minimal text fallback
|
||||
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
|
||||
|
||||
def _clean_text_content(self, content: str, title: str) -> str:
|
||||
"""Clean and validate text content from AI."""
|
||||
content = content.strip()
|
||||
|
||||
# Remove markdown code blocks if present
|
||||
if content.startswith("```") and content.endswith("```"):
|
||||
lines = content.split('\n')
|
||||
if len(lines) > 2:
|
||||
content = '\n'.join(lines[1:-1]).strip()
|
||||
|
||||
# Remove any remaining markdown formatting
|
||||
content = content.replace('**', '').replace('*', '')
|
||||
content = content.replace('__', '').replace('_', '')
|
||||
|
||||
# Clean up any HTML-like tags that might have slipped through
|
||||
import re
|
||||
content = re.sub(r'<[^>]+>', '', content)
|
||||
|
||||
# Ensure proper line endings
|
||||
content = content.replace('\r\n', '\n').replace('\r', '\n')
|
||||
|
||||
return content
|
||||
|
|
@ -3,7 +3,7 @@ import uuid
|
|||
from typing import Dict, Any, List, Optional
|
||||
from modules.datamodels.datamodelUam import User, UserConnection
|
||||
from modules.datamodels.datamodelChat import ChatDocument, ChatMessage
|
||||
from modules.datamodels.datamodelChat import ExtractedContent
|
||||
from modules.datamodels.datamodelChat import ChatContentExtracted
|
||||
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
||||
from modules.services.serviceGeneration.subDocumentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
|
|
@ -78,6 +78,12 @@ class WorkflowService:
|
|||
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
|
||||
"""Get ChatDocuments from a list of document references using all three formats."""
|
||||
try:
|
||||
# Get the current workflow from services (same pattern as setWorkflowContext)
|
||||
workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
|
||||
if not workflow:
|
||||
logger.error("No workflow available for document list resolution")
|
||||
return []
|
||||
|
||||
all_documents = []
|
||||
for doc_ref in documentList:
|
||||
if doc_ref.startswith("docItem:"):
|
||||
|
|
@ -86,7 +92,7 @@ class WorkflowService:
|
|||
if len(parts) >= 2:
|
||||
doc_id = parts[1]
|
||||
# Find the document by ID
|
||||
for message in self.workflow.messages:
|
||||
for message in workflow.messages:
|
||||
if message.documents:
|
||||
for doc in message.documents:
|
||||
if doc.id == doc_id:
|
||||
|
|
@ -101,9 +107,15 @@ class WorkflowService:
|
|||
# Format: docList:<messageId>:<label>
|
||||
message_id = parts[1]
|
||||
label = parts[2]
|
||||
logger.debug(f"Looking for message with ID: {message_id} and label: {label}")
|
||||
|
||||
# Find the message by ID and get all its documents
|
||||
for message in self.workflow.messages:
|
||||
message_found = False
|
||||
for message in workflow.messages:
|
||||
logger.debug(f"Checking message ID: {message.id} (looking for: {message_id})")
|
||||
if str(message.id) == message_id:
|
||||
message_found = True
|
||||
logger.debug(f"Found message {message.id} with documentsLabel: {getattr(message, 'documentsLabel', 'None')}")
|
||||
if message.documents:
|
||||
doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')]
|
||||
logger.debug(f"Found docList reference {doc_ref}: {len(message.documents)} documents - {doc_names}")
|
||||
|
|
@ -111,13 +123,16 @@ class WorkflowService:
|
|||
else:
|
||||
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
|
||||
break
|
||||
|
||||
if not message_found:
|
||||
logger.warning(f"Message with ID {message_id} not found in workflow. Available message IDs: {[str(msg.id) for msg in workflow.messages]}")
|
||||
elif len(parts) >= 2:
|
||||
# Format: docList:<label> - find message by documentsLabel
|
||||
label = parts[1]
|
||||
logger.debug(f"Looking for message with documentsLabel: {label}")
|
||||
# Find messages with matching documentsLabel
|
||||
matching_messages = []
|
||||
for message in self.workflow.messages:
|
||||
for message in workflow.messages:
|
||||
# Check both attribute and raw data for documentsLabel
|
||||
msg_label = getattr(message, 'documentsLabel', None)
|
||||
if msg_label == label:
|
||||
|
|
@ -158,7 +173,7 @@ class WorkflowService:
|
|||
# Find messages with matching documentsLabel (this is the correct way!)
|
||||
# In case of retries, we want the NEWEST message (most recent publishedAt)
|
||||
matching_messages = []
|
||||
for message in self.workflow.messages:
|
||||
for message in workflow.messages:
|
||||
msg_documents_label = getattr(message, 'documentsLabel', '')
|
||||
|
||||
# Check if this message's documentsLabel matches our reference
|
||||
|
|
@ -187,7 +202,7 @@ class WorkflowService:
|
|||
# Fallback: also check if any message has this documentsLabel as a prefix
|
||||
logger.debug(f"Trying fallback search for messages with documentsLabel containing: {doc_ref}")
|
||||
fallback_messages = []
|
||||
for message in self.workflow.messages:
|
||||
for message in workflow.messages:
|
||||
msg_documents_label = getattr(message, 'documentsLabel', '')
|
||||
if msg_documents_label and msg_documents_label.startswith(doc_ref):
|
||||
fallback_messages.append(message)
|
||||
|
|
@ -422,24 +437,30 @@ class WorkflowService:
|
|||
def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
|
||||
"""Set current workflow context for document generation and routing"""
|
||||
try:
|
||||
# Get the current workflow from services
|
||||
workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
|
||||
if not workflow:
|
||||
logger.error("No workflow available for context setting")
|
||||
return
|
||||
|
||||
# Prepare update data
|
||||
update_data = {}
|
||||
|
||||
if round_number is not None:
|
||||
self.workflow.currentRound = round_number
|
||||
workflow.currentRound = round_number
|
||||
update_data["currentRound"] = round_number
|
||||
if task_number is not None:
|
||||
self.workflow.currentTask = task_number
|
||||
workflow.currentTask = task_number
|
||||
update_data["currentTask"] = task_number
|
||||
if action_number is not None:
|
||||
self.workflow.currentAction = action_number
|
||||
workflow.currentAction = action_number
|
||||
update_data["currentAction"] = action_number
|
||||
|
||||
# Persist changes to database if any updates were made
|
||||
if update_data:
|
||||
self.interfaceDbChat.updateWorkflow(self.workflow.id, update_data)
|
||||
self.interfaceDbChat.updateWorkflow(workflow.id, update_data)
|
||||
|
||||
logger.debug(f"Updated workflow context: Round {self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 'N/A'}, Task {self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 'N/A'}, Action {self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 'N/A'}")
|
||||
logger.debug(f"Updated workflow context: Round {workflow.currentRound if hasattr(workflow, 'currentRound') else 'N/A'}, Task {workflow.currentTask if hasattr(workflow, 'currentTask') else 'N/A'}, Action {workflow.currentAction if hasattr(workflow, 'currentAction') else 'N/A'}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting workflow context: {str(e)}")
|
||||
|
||||
|
|
@ -467,3 +488,376 @@ class WorkflowService:
|
|||
'workflowStatus': 'unknown',
|
||||
'workflowId': 'unknown'
|
||||
}
|
||||
|
||||
def createWorkflow(self, workflowData: Dict[str, Any]):
|
||||
"""Create a new workflow by delegating to the chat interface"""
|
||||
try:
|
||||
return self.interfaceDbChat.createWorkflow(workflowData)
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating workflow: {str(e)}")
|
||||
raise
|
||||
|
||||
def updateWorkflow(self, workflowId: str, updateData: Dict[str, Any]):
|
||||
"""Update workflow by delegating to the chat interface"""
|
||||
try:
|
||||
return self.interfaceDbChat.updateWorkflow(workflowId, updateData)
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating workflow: {str(e)}")
|
||||
raise
|
||||
|
||||
def updateWorkflowStats(self, workflowId: str, **kwargs):
|
||||
"""Update workflow statistics by delegating to the chat interface"""
|
||||
try:
|
||||
return self.interfaceDbChat.updateWorkflowStats(workflowId, **kwargs)
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating workflow stats: {str(e)}")
|
||||
raise
|
||||
|
||||
def getWorkflow(self, workflowId: str):
|
||||
"""Get workflow by ID by delegating to the chat interface"""
|
||||
try:
|
||||
return self.interfaceDbChat.getWorkflow(workflowId)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting workflow: {str(e)}")
|
||||
raise
|
||||
|
||||
def createMessage(self, messageData: Dict[str, Any]):
|
||||
"""Create a new message by delegating to the chat interface"""
|
||||
try:
|
||||
return self.interfaceDbChat.createMessage(messageData)
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating message: {str(e)}")
|
||||
raise
|
||||
|
||||
def updateMessage(self, messageId: str, messageData: Dict[str, Any]):
|
||||
"""Update message by delegating to the chat interface"""
|
||||
try:
|
||||
return self.interfaceDbChat.updateMessage(messageId, messageData)
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating message: {str(e)}")
|
||||
raise
|
||||
|
||||
def createLog(self, logData: Dict[str, Any]):
|
||||
"""Create a new log entry by delegating to the chat interface"""
|
||||
try:
|
||||
return self.interfaceDbChat.createLog(logData)
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating log: {str(e)}")
|
||||
raise
|
||||
|
||||
def getDocumentCount(self) -> str:
|
||||
"""Get document count for task planning (matching old handlingTasks.py logic)"""
|
||||
try:
|
||||
# Get the current workflow from services
|
||||
workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
|
||||
if not workflow:
|
||||
return "No documents available"
|
||||
|
||||
# Count documents from all messages in the workflow (like old system)
|
||||
total_docs = 0
|
||||
for message in workflow.messages:
|
||||
if hasattr(message, 'documents') and message.documents:
|
||||
total_docs += len(message.documents)
|
||||
|
||||
if total_docs == 0:
|
||||
return "No documents available"
|
||||
|
||||
return f"{total_docs} document(s) available"
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting document count: {str(e)}")
|
||||
return "No documents available"
|
||||
|
||||
def getWorkflowHistoryContext(self) -> str:
|
||||
"""Get workflow history context for task planning (matching old handlingTasks.py logic)"""
|
||||
try:
|
||||
# Get the current workflow from services
|
||||
workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
|
||||
if not workflow:
|
||||
return "No previous round context available"
|
||||
|
||||
# Check if there are any previous rounds by looking for "first" messages
|
||||
has_previous_rounds = False
|
||||
for message in workflow.messages:
|
||||
if hasattr(message, 'status') and message.status == "first":
|
||||
has_previous_rounds = True
|
||||
break
|
||||
|
||||
if not has_previous_rounds:
|
||||
return "No previous round context available"
|
||||
|
||||
# Get document reference list to show what documents are available from previous rounds
|
||||
document_list = self._getDocumentReferenceList(workflow)
|
||||
|
||||
# Build context string showing previous rounds
|
||||
context = "Previous workflow rounds contain documents:\n"
|
||||
|
||||
# Show history exchanges (previous rounds)
|
||||
if document_list["history"]:
|
||||
for exchange in document_list["history"]:
|
||||
# Find the message that corresponds to this exchange
|
||||
message_id = None
|
||||
for message in workflow.messages:
|
||||
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange['documentsLabel']:
|
||||
message_id = message.id
|
||||
break
|
||||
|
||||
if message_id:
|
||||
doc_list_ref = f"docList:{message_id}:{exchange['documentsLabel']}"
|
||||
else:
|
||||
doc_list_ref = f"docList:{exchange['documentsLabel']}"
|
||||
|
||||
context += f"- {doc_list_ref} ({len(exchange['documents'])} documents)\n"
|
||||
else:
|
||||
context = "No previous round context available"
|
||||
|
||||
return context
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting workflow history context: {str(e)}")
|
||||
return "No previous round context available"
|
||||
|
||||
def getAvailableDocuments(self, workflow) -> str:
|
||||
"""Get available documents formatted for AI prompts (exact copy of old ServiceCenter.getEnhancedDocumentContext)"""
|
||||
try:
|
||||
if not workflow or not hasattr(workflow, 'messages'):
|
||||
return "No documents available"
|
||||
|
||||
# Get document reference list using the exact same logic as old system
|
||||
document_list = self._getDocumentReferenceList(workflow)
|
||||
|
||||
# Build technical context string for AI action planning (exact copy of old system)
|
||||
context = "AVAILABLE DOCUMENTS:\n\n"
|
||||
|
||||
# Process chat exchanges (current round) - exact copy of old system
|
||||
if document_list["chat"]:
|
||||
context += "CURRENT ROUND DOCUMENTS:\n"
|
||||
for exchange in document_list["chat"]:
|
||||
# Generate docList reference for the exchange (using message ID and label)
|
||||
# Find the message that corresponds to this exchange
|
||||
message_id = None
|
||||
for message in workflow.messages:
|
||||
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange['documentsLabel']:
|
||||
message_id = message.id
|
||||
break
|
||||
|
||||
if message_id:
|
||||
doc_list_ref = f"docList:{message_id}:{exchange['documentsLabel']}"
|
||||
else:
|
||||
# Fallback to label-only format if message ID not found
|
||||
doc_list_ref = f"docList:{exchange['documentsLabel']}"
|
||||
|
||||
context += f"- {doc_list_ref} contains:\n"
|
||||
# Generate docItem references for each document in the list
|
||||
for doc_ref in exchange['documents']:
|
||||
if doc_ref.startswith("docItem:"):
|
||||
context += f" - {doc_ref}\n"
|
||||
else:
|
||||
# Convert to proper docItem format if needed
|
||||
context += f" - docItem:{doc_ref}\n"
|
||||
context += "\n"
|
||||
|
||||
# Process history exchanges (previous rounds) - exact copy of old system
|
||||
if document_list["history"]:
|
||||
context += "WORKFLOW HISTORY DOCUMENTS:\n"
|
||||
for exchange in document_list["history"]:
|
||||
# Generate docList reference for the exchange (using message ID and label)
|
||||
# Find the message that corresponds to this exchange
|
||||
message_id = None
|
||||
for message in workflow.messages:
|
||||
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange['documentsLabel']:
|
||||
message_id = message.id
|
||||
break
|
||||
|
||||
if message_id:
|
||||
doc_list_ref = f"docList:{message_id}:{exchange['documentsLabel']}"
|
||||
else:
|
||||
# Fallback to label-only format if message ID not found
|
||||
doc_list_ref = f"docList:{exchange['documentsLabel']}"
|
||||
|
||||
context += f"- {doc_list_ref} contains:\n"
|
||||
# Generate docItem references for each document in the list
|
||||
for doc_ref in exchange['documents']:
|
||||
if doc_ref.startswith("docItem:"):
|
||||
context += f" - {doc_ref}\n"
|
||||
else:
|
||||
# Convert to proper docItem format if needed
|
||||
context += f" - docItem:{doc_ref}\n"
|
||||
context += "\n"
|
||||
|
||||
if not document_list["chat"] and not document_list["history"]:
|
||||
context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
|
||||
|
||||
return context
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting available documents: {str(e)}")
|
||||
return "NO DOCUMENTS AVAILABLE - Error generating document context."
|
||||
|
||||
def _getDocumentReferenceList(self, workflow) -> Dict[str, List]:
|
||||
"""Get list of document exchanges with new labeling format, sorted by recency (exact copy of old system)"""
|
||||
# Collect all documents first and refresh their attributes
|
||||
all_documents = []
|
||||
for message in workflow.messages:
|
||||
if message.documents:
|
||||
all_documents.extend(message.documents)
|
||||
|
||||
# Refresh file attributes for all documents
|
||||
if all_documents:
|
||||
self._refreshDocumentFileAttributes(all_documents)
|
||||
|
||||
chat_exchanges = []
|
||||
history_exchanges = []
|
||||
|
||||
# Process messages in reverse order; "first" marks boundary
|
||||
in_current_round = True
|
||||
for message in reversed(workflow.messages):
|
||||
is_first = message.status == "first" if hasattr(message, 'status') else False
|
||||
|
||||
# Build a DocumentExchange if message has documents
|
||||
doc_exchange = None
|
||||
if message.documents:
|
||||
if message.actionId and message.documentsLabel:
|
||||
# Validate that we use the same label as in the message
|
||||
validated_label = self._validateDocumentLabelConsistency(message)
|
||||
|
||||
# Use the message's actual documentsLabel
|
||||
doc_refs = []
|
||||
for doc in message.documents:
|
||||
doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
|
||||
doc_refs.append(doc_ref)
|
||||
|
||||
doc_exchange = {
|
||||
'documentsLabel': validated_label,
|
||||
'documents': doc_refs
|
||||
}
|
||||
else:
|
||||
# Generate new labels for documents without explicit labels
|
||||
doc_refs = []
|
||||
for doc in message.documents:
|
||||
doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
|
||||
doc_refs.append(doc_ref)
|
||||
|
||||
if doc_refs:
|
||||
# Create a label based on message context
|
||||
context_prefix = self._generateWorkflowContextPrefix(message)
|
||||
context_label = f"{context_prefix}_context"
|
||||
|
||||
doc_exchange = {
|
||||
'documentsLabel': context_label,
|
||||
'documents': doc_refs
|
||||
}
|
||||
|
||||
# Append to appropriate container based on boundary
|
||||
if doc_exchange:
|
||||
if in_current_round:
|
||||
chat_exchanges.append(doc_exchange)
|
||||
else:
|
||||
history_exchanges.append(doc_exchange)
|
||||
|
||||
# Flip boundary after including the "first" message in chat
|
||||
if in_current_round and is_first:
|
||||
in_current_round = False
|
||||
|
||||
# Sort by recency: most recent first, then current round, then earlier rounds
|
||||
# Sort chat exchanges by message sequence number (most recent first)
|
||||
chat_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x, workflow), reverse=True)
|
||||
# Sort history exchanges by message sequence number (most recent first)
|
||||
history_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x, workflow), reverse=True)
|
||||
|
||||
return {
|
||||
"chat": chat_exchanges,
|
||||
"history": history_exchanges
|
||||
}
|
||||
|
||||
def _refreshDocumentFileAttributes(self, documents) -> None:
|
||||
"""Update file attributes (fileName, fileSize, mimeType) for documents"""
|
||||
for doc in documents:
|
||||
try:
|
||||
# Use the proper WorkflowService method to get file info
|
||||
file_info = self.getFileInfo(doc.fileId)
|
||||
if file_info:
|
||||
doc.fileName = file_info.get("fileName", doc.fileName)
|
||||
doc.fileSize = file_info.get("size", doc.fileSize)
|
||||
doc.mimeType = file_info.get("mimeType", doc.mimeType)
|
||||
else:
|
||||
logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error refreshing file attributes for document {doc.id}: {e}")
|
||||
|
||||
def _generateWorkflowContextPrefix(self, message) -> str:
|
||||
"""Generate workflow context prefix: round{num}_task{num}_action{num}"""
|
||||
round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1
|
||||
task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0
|
||||
action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0
|
||||
return f"round{round_num}_task{task_num}_action{action_num}"
|
||||
|
||||
def _getDocumentReferenceFromChatDocument(self, document, message) -> str:
|
||||
"""Get document reference using document ID and filename."""
|
||||
try:
|
||||
# Use document ID and filename for simple reference
|
||||
return f"docItem:{document.id}:{document.fileName}"
|
||||
except Exception as e:
|
||||
logger.error(f"Critical error creating document reference for document {document.id}: {str(e)}")
|
||||
# Re-raise the error to prevent workflow from continuing with invalid data
|
||||
raise
|
||||
|
||||
def _getMessageSequenceForExchange(self, exchange, workflow) -> int:
|
||||
"""Get message sequence number for sorting exchanges by recency"""
|
||||
try:
|
||||
# Extract message ID from the first document reference
|
||||
if exchange['documents'] and len(exchange['documents']) > 0:
|
||||
first_doc_ref = exchange['documents'][0]
|
||||
if first_doc_ref.startswith("docItem:"):
|
||||
# docItem:<id>:<label> - extract ID
|
||||
parts = first_doc_ref.split(':')
|
||||
if len(parts) >= 2:
|
||||
doc_id = parts[1]
|
||||
# Find the message containing this document
|
||||
for message in workflow.messages:
|
||||
if message.documents:
|
||||
for doc in message.documents:
|
||||
if doc.id == doc_id:
|
||||
return message.sequenceNr if hasattr(message, 'sequenceNr') else 0
|
||||
elif first_doc_ref.startswith("docList:"):
|
||||
# docList:<message_id>:<label> - extract message ID
|
||||
parts = first_doc_ref.split(':')
|
||||
if len(parts) >= 2:
|
||||
message_id = parts[1]
|
||||
# Find the message by ID
|
||||
for message in workflow.messages:
|
||||
if str(message.id) == message_id:
|
||||
return message.sequenceNr if hasattr(message, 'sequenceNr') else 0
|
||||
return 0
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting message sequence for exchange: {str(e)}")
|
||||
return 0
|
||||
|
||||
def _validateDocumentLabelConsistency(self, message) -> str:
|
||||
"""Validate that the document label used for references matches the message's actual label"""
|
||||
if not hasattr(message, 'documentsLabel') or not message.documentsLabel:
|
||||
return None
|
||||
|
||||
# Simply return the message's actual documentsLabel - no correction, just validation
|
||||
return message.documentsLabel
|
||||
|
||||
def getConnectionReferenceList(self) -> List[str]:
|
||||
"""Get connection reference list (matching old handlingTasks.py logic)"""
|
||||
try:
|
||||
# Get connections from the database using the same logic as the old system
|
||||
if hasattr(self.serviceCenter, 'interfaceDbApp') and hasattr(self.serviceCenter, 'user'):
|
||||
userId = self.serviceCenter.user.id
|
||||
connections = self.serviceCenter.interfaceDbApp.getUserConnections(userId)
|
||||
if connections:
|
||||
# Format connections as reference strings using the same pattern as the old system
|
||||
connectionRefs = []
|
||||
for conn in connections:
|
||||
# Create reference string in format: connection:{authority}:{username}:{id} [status:..., token:...]
|
||||
# This matches the format expected by getUserConnectionFromConnectionReference()
|
||||
ref = self.getConnectionReferenceFromUserConnection(conn)
|
||||
connectionRefs.append(ref)
|
||||
return connectionRefs
|
||||
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting connection reference list: {str(e)}")
|
||||
return []
|
||||
|
|
@ -1,226 +0,0 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from typing import List, Dict, Any
|
||||
|
||||
# Ensure relative imports work when running directly
|
||||
CURRENT_DIR = os.path.dirname(__file__)
|
||||
GATEWAY_DIR = os.path.dirname(os.path.dirname(CURRENT_DIR))
|
||||
if GATEWAY_DIR not in sys.path:
|
||||
sys.path.append(GATEWAY_DIR)
|
||||
|
||||
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
||||
from modules.services.serviceGeneration.mainServiceGeneration import DocumentGenerationService
|
||||
from modules.datamodels.datamodelWorkflow import ActionResult, ActionDocument
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
|
||||
from modules.services.serviceAi.mainServiceAi import AiService
|
||||
|
||||
|
||||
TESTDATA_DIR = os.path.join(GATEWAY_DIR, "testdata")
|
||||
|
||||
|
||||
def _read_test_files() -> List[Dict[str, Any]]:
|
||||
files = []
|
||||
for name in os.listdir(TESTDATA_DIR):
|
||||
path = os.path.join(TESTDATA_DIR, name)
|
||||
if not os.path.isfile(path):
|
||||
continue
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
data = f.read()
|
||||
mime = _guess_mime(name)
|
||||
files.append({
|
||||
"id": name,
|
||||
"bytes": data,
|
||||
"fileName": name,
|
||||
"mimeType": mime,
|
||||
})
|
||||
except Exception:
|
||||
continue
|
||||
return files
|
||||
|
||||
|
||||
def _guess_mime(name: str) -> str:
|
||||
lower = name.lower()
|
||||
if lower.endswith(".pdf"):
|
||||
return "application/pdf"
|
||||
if lower.endswith(".xlsx"):
|
||||
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
if lower.endswith(".jpg") or lower.endswith(".jpeg"):
|
||||
return "image/jpeg"
|
||||
if lower.endswith(".png"):
|
||||
return "image/png"
|
||||
return "application/octet-stream"
|
||||
|
||||
|
||||
def run_extraction_1000_bytes() -> None:
|
||||
svc = ExtractionService()
|
||||
docs = _read_test_files()
|
||||
options = {
|
||||
# cap total pooled size per document set
|
||||
"maxSize": 1000,
|
||||
# allow chunking to respect the cap across parts
|
||||
"chunkAllowed": True,
|
||||
# chunk sizes for different content types to help fit under the cap
|
||||
"textChunkSize": 500,
|
||||
"tableChunkSize": 500,
|
||||
"structureChunkSize": 500,
|
||||
# simple merge strategy if supported
|
||||
"mergeStrategy": {},
|
||||
}
|
||||
results = svc.extractContent(docs, options)
|
||||
print("[extraction] documents:", len(docs), "results:", len(results))
|
||||
for i, ec in enumerate(results):
|
||||
total = sum(int(p.metadata.get("size", 0) or 0) for p in ec.parts)
|
||||
print(f" - doc[{i}] parts={len(ec.parts)} pooledBytes={total}")
|
||||
|
||||
|
||||
async def main():
|
||||
print("=== serviceExtraction: compress to 1000 bytes ===")
|
||||
run_extraction_1000_bytes()
|
||||
print("\n=== serviceGeneration: create ActionResult and write output to testdata ===")
|
||||
await run_generation_write_file()
|
||||
print("\n=== serviceAi: planning call + image + pdf extraction ===")
|
||||
await run_ai_tests()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
||||
async def run_generation_write_file() -> None:
|
||||
# Minimal stubs for interfaces expected by DocumentGenerationService
|
||||
class _FileItem:
|
||||
def __init__(self, file_id: str, file_name: str, mime_type: str, content: bytes):
|
||||
self.id = file_id
|
||||
self.fileName = file_name
|
||||
self.mimeType = mime_type
|
||||
self.fileSize = len(content)
|
||||
|
||||
class _ComponentInterface:
|
||||
def __init__(self):
|
||||
self._files = {}
|
||||
def createFile(self, name: str, mimeType: str, content: bytes):
|
||||
fid = f"test_{len(self._files)+1}"
|
||||
item = _FileItem(fid, name, mimeType, content)
|
||||
self._files[fid] = item
|
||||
return item
|
||||
def createFileData(self, fileId: str, content: bytes):
|
||||
# Persist into testdata directory as requested
|
||||
item = self._files[fileId]
|
||||
out_path = os.path.join(TESTDATA_DIR, f"output_{fileId}_{item.fileName}")
|
||||
with open(out_path, "wb") as f:
|
||||
f.write(content)
|
||||
def getFile(self, fileId: str):
|
||||
return self._files.get(fileId)
|
||||
|
||||
class _ServiceCenter:
|
||||
def __init__(self, comp):
|
||||
self.interfaceDbComponent = comp
|
||||
self.interfaceDbChat = None
|
||||
self.workflow = type("_Wf", (), {"id": "wf_test", "currentRound": 1, "currentTask": 1, "currentAction": 1, "status": "running", "totalTasks": 1, "totalActions": 1})()
|
||||
|
||||
component = _ComponentInterface()
|
||||
center = _ServiceCenter(component)
|
||||
gen = DocumentGenerationService(center)
|
||||
|
||||
# Build a fake action and ActionResult with a small text document
|
||||
class _Action:
|
||||
def __init__(self):
|
||||
self.id = "action_test"
|
||||
self.execMethod = "document"
|
||||
self.execAction = "generate"
|
||||
self.execParameters = {}
|
||||
self.execResultLabel = "round1_task1_action1_results"
|
||||
action = _Action()
|
||||
|
||||
content = "This is a generated test file from serviceGeneration test."
|
||||
action_doc = ActionDocument(documentName="test_generated.txt", documentData=content, mimeType="text/plain")
|
||||
action_result = ActionResult(success=True, documents=[action_doc])
|
||||
|
||||
docs = gen.createDocumentsFromActionResult(action_result, action, center.workflow, message_id="msg_test")
|
||||
print("[generation] created documents:", len(docs))
|
||||
|
||||
|
||||
async def run_ai_tests() -> None:
|
||||
# Create AiService instance (uses internal default model registry; no external creds required for this test)
|
||||
ai = await AiService.create()
|
||||
|
||||
# Planning AI call (like in handlingTasks.generateTaskPlan)
|
||||
plan_options = AiCallOptions(
|
||||
operationType=OperationType.GENERATE_PLAN,
|
||||
priority=Priority.QUALITY,
|
||||
compressPrompt=False,
|
||||
compressContext=False,
|
||||
processingMode=ProcessingMode.DETAILED,
|
||||
maxCost=0.05,
|
||||
maxProcessingTime=10,
|
||||
)
|
||||
plan_prompt = """
|
||||
You are a planning assistant. Return a compact JSON with fields: tasks:[{id, objective, success_criteria:[]}], languageUserDetected:"en".
|
||||
Create exactly one simple task id:"task_1" objective:"Test planning" success_criteria:["done"].
|
||||
""".strip()
|
||||
plan_resp = await ai.callAi(prompt=plan_prompt, placeholders=None, options=plan_options)
|
||||
print("[ai] planning response length:", len(plan_resp) if plan_resp else 0)
|
||||
|
||||
# Image content extraction prompt using test JPEG
|
||||
img_path = os.path.join(TESTDATA_DIR, "00Untitled.jpg")
|
||||
img_resp = None
|
||||
if os.path.exists(img_path):
|
||||
try:
|
||||
with open(img_path, "rb") as f:
|
||||
img_bytes = f.read()
|
||||
img_options = AiCallOptions(
|
||||
operationType=OperationType.ANALYSE_CONTENT,
|
||||
priority=Priority.BALANCED,
|
||||
compressPrompt=True,
|
||||
compressContext=False,
|
||||
processingMode=ProcessingMode.ADVANCED,
|
||||
maxCost=0.02,
|
||||
maxProcessingTime=10,
|
||||
)
|
||||
img_resp = await ai.callAiImage(
|
||||
prompt="Describe the content of this image succinctly.",
|
||||
imageData=img_bytes,
|
||||
mimeType="image/jpeg",
|
||||
options=img_options,
|
||||
)
|
||||
print("[ai] image analysis response length:", len(img_resp) if img_resp else 0)
|
||||
except Exception as e:
|
||||
print("[ai] image analysis error:", str(e))
|
||||
else:
|
||||
print("[ai] image test file not found; skipping")
|
||||
|
||||
# PDF extraction prompt: emulate text call with document context built via ExtractionService
|
||||
pdf_path = os.path.join(TESTDATA_DIR, "diagramm_komponenten.pdf")
|
||||
if os.path.exists(pdf_path):
|
||||
try:
|
||||
# Build a minimal ChatDocument-like shim that AiService._callAiText expects via extraction
|
||||
class _Doc:
|
||||
def __init__(self, file_path: str, mime: str):
|
||||
self.id = "doc_pdf"
|
||||
self.fileName = os.path.basename(file_path)
|
||||
self.mimeType = mime
|
||||
with open(file_path, "rb") as f:
|
||||
self.fileData = f.read()
|
||||
pdf_doc = _Doc(pdf_path, "application/pdf")
|
||||
|
||||
pdf_options = AiCallOptions(
|
||||
operationType=OperationType.ANALYSE_CONTENT,
|
||||
priority=Priority.BALANCED,
|
||||
compressPrompt=True,
|
||||
compressContext=True,
|
||||
processingMode=ProcessingMode.ADVANCED,
|
||||
maxContextBytes=1000,
|
||||
chunkAllowed=True,
|
||||
maxCost=0.02,
|
||||
maxProcessingTime=10,
|
||||
)
|
||||
pdf_prompt = "Extract key information from the attached PDF."
|
||||
pdf_resp = await ai.callAi(prompt=pdf_prompt, documents=[pdf_doc], options=pdf_options)
|
||||
print("[ai] pdf extraction response length:", len(pdf_resp) if pdf_resp else 0)
|
||||
except Exception as e:
|
||||
print("[ai] pdf extraction error:", str(e))
|
||||
else:
|
||||
print("[ai] pdf test file not found; skipping")
|
||||
|
||||
|
||||
|
|
@ -324,7 +324,7 @@ def getModelClasses() -> Dict[str, Type[BaseModel]]:
|
|||
os.path.dirname(os.path.dirname(__file__)), "interfaces"
|
||||
)
|
||||
|
||||
# Find all model files
|
||||
# Find all model files in interfaces directory
|
||||
for fileName in os.listdir(interfaces_dir):
|
||||
if fileName.endswith("Model.py"):
|
||||
# Convert fileName to module name (e.g., gatewayModel.py -> gatewayModel)
|
||||
|
|
@ -342,6 +342,29 @@ def getModelClasses() -> Dict[str, Type[BaseModel]]:
|
|||
):
|
||||
modelClasses[name] = obj
|
||||
|
||||
# Also get models from datamodels directory
|
||||
datamodels_dir = os.path.join(
|
||||
os.path.dirname(os.path.dirname(__file__)), "datamodels"
|
||||
)
|
||||
|
||||
# Find all model files in datamodels directory
|
||||
for fileName in os.listdir(datamodels_dir):
|
||||
if fileName.startswith("datamodel") and fileName.endswith(".py"):
|
||||
# Convert fileName to module name (e.g., datamodelUtils.py -> datamodelUtils)
|
||||
module_name = fileName[:-3]
|
||||
|
||||
# Import the module dynamically
|
||||
module = importlib.import_module(f"modules.datamodels.{module_name}")
|
||||
|
||||
# Get all classes from the module
|
||||
for name, obj in inspect.getmembers(module):
|
||||
if (
|
||||
inspect.isclass(obj)
|
||||
and issubclass(obj, BaseModel)
|
||||
and obj != BaseModel
|
||||
):
|
||||
modelClasses[name] = obj
|
||||
|
||||
return modelClasses
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -8,8 +8,10 @@ from typing import Dict, Any, List, Optional
|
|||
from datetime import datetime, UTC
|
||||
|
||||
from modules.workflows.methods.methodBase import MethodBase, action
|
||||
from modules.datamodels.datamodelWorkflow import ActionResult
|
||||
from modules.datamodels.datamodelChat import ActionResult
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, Priority
|
||||
from modules.datamodels.datamodelChat import ChatDocument
|
||||
from modules.datamodels.datamodelWeb import WebResearchRequest, WebResearchOptions
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -28,26 +30,36 @@ class MethodAi(MethodBase):
|
|||
@action
|
||||
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Perform an AI call for any type of task with optional document references
|
||||
GENERAL:
|
||||
- Purpose: AI-based analysis and content generation with optional document context.
|
||||
- Input requirements: aiPrompt (required); optional documentList, resultType, processingMode, includeMetadata, operationType, priority, maxCost, maxProcessingTime, requiredTags.
|
||||
- Output format: Single or multiple documents in requested format.
|
||||
|
||||
Parameters:
|
||||
aiPrompt (str): The AI prompt for processing
|
||||
documentList (list, optional): List of document references to include in context
|
||||
expectedDocumentFormat (str, optional): Expected document output format with extension, mimeType, description
|
||||
processingMode (str, optional): Processing mode - use 'basic', 'advanced', or 'detailed' (defaults to 'basic')
|
||||
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
||||
operationType (str, optional): Operation type - use 'general', 'generate_plan', 'analyse_content', 'generate_content', 'web_research', 'image_analysis', or 'image_generation'
|
||||
priority (str, optional): Priority level - use 'speed', 'quality', 'cost', or 'balanced'
|
||||
maxCost (float, optional): Maximum cost budget for the AI call
|
||||
maxProcessingTime (int, optional): Maximum processing time in seconds
|
||||
requiredTags (list, optional): Required model tags - use 'text', 'chat', 'reasoning', 'analysis', 'image', 'vision', 'web', 'search', etc.
|
||||
- aiPrompt (str, required): Instruction for the AI.
|
||||
- documentList (list, optional): Document reference(s) for context.
|
||||
- resultType (str, optional): Output extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png). Default: txt.
|
||||
- processingMode (str, optional): basic | advanced | detailed. Default: basic.
|
||||
- includeMetadata (bool, optional): Include metadata when available. Default: True.
|
||||
- operationType (str, optional): general | generate_plan | analyse_content | generate_content | web_research | image_analysis | image_generation. Default: general.
|
||||
- priority (str, optional): speed | quality | cost | balanced. Default: balanced.
|
||||
- maxCost (float, optional): Cost limit.
|
||||
- maxProcessingTime (int, optional): Time limit in seconds.
|
||||
- requiredTags (list, optional): Capability tags (e.g., text, chat, reasoning, analysis, image, vision, web, search).
|
||||
"""
|
||||
try:
|
||||
# Debug logging to see what parameters are received
|
||||
logger.info(f"MethodAi.process received parameters: {parameters}")
|
||||
logger.info(f"Parameters type: {type(parameters)}")
|
||||
logger.info(f"Parameters keys: {list(parameters.keys()) if isinstance(parameters, dict) else 'Not a dict'}")
|
||||
|
||||
aiPrompt = parameters.get("aiPrompt")
|
||||
logger.info(f"aiPrompt extracted: '{aiPrompt}' (type: {type(aiPrompt)})")
|
||||
|
||||
documentList = parameters.get("documentList", [])
|
||||
if isinstance(documentList, str):
|
||||
documentList = [documentList]
|
||||
expectedDocumentFormat = parameters.get("expectedDocumentFormat", "")
|
||||
resultType = parameters.get("resultType", "txt")
|
||||
processingMode = parameters.get("processingMode", "basic")
|
||||
includeMetadata = parameters.get("includeMetadata", True)
|
||||
operationType = parameters.get("operationType", "general")
|
||||
|
|
@ -57,102 +69,23 @@ class MethodAi(MethodBase):
|
|||
requiredTags = parameters.get("requiredTags")
|
||||
|
||||
if not aiPrompt:
|
||||
logger.error(f"aiPrompt is missing or empty. Parameters: {parameters}")
|
||||
return ActionResult.isFailure(
|
||||
error="AI prompt is required"
|
||||
)
|
||||
|
||||
# Determine output format first (needed for context building)
|
||||
output_extension = ".txt" # Default
|
||||
output_mime_type = "text/plain" # Default
|
||||
# Determine output extension and default MIME type without duplicating service logic
|
||||
normalized_result_type = (str(resultType).strip().lstrip('.').lower() or "txt")
|
||||
output_extension = f".{normalized_result_type}"
|
||||
output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available
|
||||
logger.info(f"Using result type: {resultType} -> {output_extension}")
|
||||
|
||||
if expectedDocumentFormat:
|
||||
output_extension = expected_format.get("extension", ".txt")
|
||||
output_mime_type = expected_format.get("mimeType", "text/plain")
|
||||
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||
|
||||
# Build context from documents if provided
|
||||
context = ""
|
||||
# Get ChatDocuments for AI service - let AI service handle all document processing
|
||||
chatDocuments = []
|
||||
if documentList:
|
||||
chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
|
||||
if chatDocuments:
|
||||
context_parts = []
|
||||
# Build batch payload for extraction
|
||||
batch_docs = []
|
||||
for doc in chatDocuments:
|
||||
try:
|
||||
fileBytes = self.services.workflow.getFileData(doc.fileId) if hasattr(doc, 'fileId') else None
|
||||
except Exception:
|
||||
fileBytes = None
|
||||
batch_docs.append({
|
||||
"id": getattr(doc, 'id', None),
|
||||
"bytes": fileBytes or b"",
|
||||
"fileName": getattr(doc, 'fileName', 'unknown'),
|
||||
"mimeType": getattr(doc, 'mimeType', None) or "application/octet-stream"
|
||||
})
|
||||
|
||||
extraction_prompt = (
|
||||
f"Extract content for AI task context. Task: {aiPrompt}. Mode: {processingMode}."
|
||||
)
|
||||
try:
|
||||
extracted_list = await self.services.extraction.extractContentFromDocuments(
|
||||
prompt=extraction_prompt,
|
||||
documents=batch_docs,
|
||||
options={"ai": {"enabled": False}, "mergeStrategy": {}}
|
||||
)
|
||||
except Exception:
|
||||
extracted_list = []
|
||||
|
||||
# Helper to aggregate readable text from parts
|
||||
def _partsToText(parts) -> str:
|
||||
lines: List[str] = []
|
||||
for p in (parts or []):
|
||||
try:
|
||||
if getattr(p, 'typeGroup', '') in ("text", "table", "structure") and getattr(p, 'data', None):
|
||||
lines.append(p.data)
|
||||
except Exception:
|
||||
continue
|
||||
return "\n\n".join(lines)
|
||||
|
||||
for i, doc in enumerate(chatDocuments):
|
||||
file_info = self.services.workflow.getFileInfo(doc.fileId)
|
||||
content = ""
|
||||
try:
|
||||
ec = extracted_list[i] if i < len(extracted_list) else None
|
||||
if ec:
|
||||
content = _partsToText(getattr(ec, 'parts', []))
|
||||
except Exception:
|
||||
content = ""
|
||||
|
||||
if content.strip():
|
||||
metadata_info = ""
|
||||
if file_info and includeMetadata:
|
||||
metadata_info = f" (Size: {file_info.get('fileSize', 'unknown')}, Type: {file_info.get('mimeType', 'unknown')})"
|
||||
base_length = 5000 if processingMode == "detailed" else 3000 if processingMode == "advanced" else 2000
|
||||
if processingMode == "detailed":
|
||||
context_parts.append(
|
||||
f"Document: {doc.fileName}{metadata_info}\nRelevance to AI Task: This document contains content directly related to '{aiPrompt[:100]}...'\nContent:\n{content[:base_length]}..."
|
||||
)
|
||||
else:
|
||||
context_parts.append(
|
||||
f"Document: {doc.fileName}{metadata_info}\nContent:\n{content[:base_length]}..."
|
||||
)
|
||||
else:
|
||||
context_parts.append(f"Document: {doc.fileName} [No readable text content - binary file]")
|
||||
|
||||
if context_parts:
|
||||
context_header = f"""
|
||||
=== DOCUMENT CONTEXT FOR AI PROCESSING ===
|
||||
AI Task: {aiPrompt[:100]}...
|
||||
Processing Mode: {processingMode}
|
||||
Expected Output Format: {output_extension.upper()}
|
||||
Total Documents: {len(chatDocuments)}
|
||||
|
||||
The following documents contain content relevant to your task.
|
||||
Use this information to provide the most accurate and helpful response.
|
||||
================================================
|
||||
"""
|
||||
context = context_header + "\n\n" + "\n\n".join(context_parts)
|
||||
logger.info(f"Included {len(chatDocuments)} documents in AI context with task-specific extraction")
|
||||
logger.info(f"Prepared {len(chatDocuments)} documents for AI processing")
|
||||
|
||||
# Build enhanced prompt
|
||||
enhanced_prompt = aiPrompt
|
||||
|
|
@ -163,52 +96,14 @@ class MethodAi(MethodBase):
|
|||
elif processingMode == "advanced":
|
||||
enhanced_prompt += "\n\nPlease provide an advanced response with deep insights."
|
||||
|
||||
# Add custom instructions if provided
|
||||
if customInstructions:
|
||||
enhanced_prompt += f"\n\nAdditional Instructions: {customInstructions}"
|
||||
# Note: customInstructions parameter was removed as it's not defined in the method signature
|
||||
|
||||
# Add format-specific instructions only if non-text format is requested
|
||||
if output_extension != ".txt":
|
||||
if output_extension == ".csv":
|
||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows."
|
||||
elif output_extension == ".json":
|
||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure JSON data without any markdown formatting, code blocks, or additional text. Output only the JSON content."
|
||||
elif output_extension == ".xml":
|
||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure XML data without any markdown formatting, code blocks, or additional text. Output only the XML content."
|
||||
else:
|
||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure {output_extension.upper()} data without any markdown formatting, code blocks, or additional text."
|
||||
|
||||
# Call appropriate AI service based on processing mode
|
||||
logger.info(f"Executing AI call with mode: {processingMode}, prompt length: {len(enhanced_prompt)}")
|
||||
if context:
|
||||
logger.info(f"Including context from {len(documentList)} documents")
|
||||
|
||||
# Encourage longer, structured outputs with a min-length hint
|
||||
min_tokens_hint = "\n\nPlease ensure the response is substantial and complete."
|
||||
call_prompt = enhanced_prompt + min_tokens_hint
|
||||
|
||||
# Centralized AI call with optional document context
|
||||
documents = []
|
||||
try:
|
||||
if documentList:
|
||||
for d in (chatDocuments or []):
|
||||
try:
|
||||
file_data = self.services.workflow.getFileData(d.fileId)
|
||||
documents.append(
|
||||
ChatDocument(
|
||||
fileData=file_data,
|
||||
fileName=d.fileName,
|
||||
mimeType=d.mimeType
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
documents = None
|
||||
# Add format guidance to prompt
|
||||
if normalized_result_type != "txt":
|
||||
enhanced_prompt += f"\n\nPlease deliver the result in {normalized_result_type.upper()} format. Ensure the output follows the proper {normalized_result_type.upper()} syntax and structure."
|
||||
|
||||
# Build options and delegate document handling to AI/Extraction/Generation services
|
||||
output_format = output_extension.replace('.', '') or 'txt'
|
||||
|
||||
# Build options using new AiCallOptions format
|
||||
options = AiCallOptions(
|
||||
operationType=operationType,
|
||||
priority=priority,
|
||||
|
|
@ -222,75 +117,239 @@ class MethodAi(MethodBase):
|
|||
requiredTags=requiredTags
|
||||
)
|
||||
|
||||
supported_generation_formats = {"html", "pdf", "docx", "txt", "md", "json", "csv", "xlsx"}
|
||||
output_format_arg = output_format if output_format in supported_generation_formats else None
|
||||
|
||||
result = await self.services.ai.callAi(
|
||||
prompt=call_prompt,
|
||||
documents=documents or None,
|
||||
options=options
|
||||
prompt=enhanced_prompt,
|
||||
documents=chatDocuments if chatDocuments else None,
|
||||
options=options,
|
||||
outputFormat=output_format_arg
|
||||
)
|
||||
|
||||
# If expected JSON and too short/not JSON, retry with stricter JSON guardrails
|
||||
if output_extension == ".json":
|
||||
import json
|
||||
cleaned = (result or "").strip()
|
||||
if cleaned.startswith('```json'):
|
||||
cleaned = cleaned[7:]
|
||||
if cleaned.endswith('```'):
|
||||
cleaned = cleaned[:-3]
|
||||
cleaned = cleaned.strip()
|
||||
needs_retry = False
|
||||
try:
|
||||
parsed = json.loads(cleaned)
|
||||
# Heuristic: small dict -> possibly underfilled
|
||||
if isinstance(parsed, dict) and len(parsed.keys()) <= 2:
|
||||
needs_retry = True
|
||||
except Exception:
|
||||
needs_retry = True
|
||||
from modules.datamodels.datamodelChat import ActionDocument
|
||||
|
||||
if needs_retry:
|
||||
guardrail_prompt = (
|
||||
enhanced_prompt
|
||||
+ "\n\nCRITICAL: Return ONLY valid JSON, no markdown, no code fences. "
|
||||
"Include all requested fields with detailed content."
|
||||
if isinstance(result, dict) and isinstance(result.get("documents"), list):
|
||||
action_documents = []
|
||||
for d in result["documents"]:
|
||||
action_documents.append(ActionDocument(
|
||||
documentName=d.get("documentName"),
|
||||
documentData=d.get("documentData"),
|
||||
mimeType=d.get("mimeType") or output_mime_type
|
||||
))
|
||||
return ActionResult.isSuccess(documents=action_documents)
|
||||
|
||||
extension = output_extension.lstrip('.')
|
||||
meaningful_name = self._generateMeaningfulFileName(
|
||||
base_name="ai",
|
||||
extension=extension,
|
||||
action_name="result"
|
||||
)
|
||||
try:
|
||||
result = await self.services.ai.callAi(
|
||||
prompt=guardrail_prompt,
|
||||
documents=context or None,
|
||||
options=AiCallOptions(
|
||||
operationType=OperationType.GENERATE_CONTENT,
|
||||
priority=Priority.QUALITY,
|
||||
compressPrompt=False,
|
||||
compressContext=True,
|
||||
processDocumentsIndividually=True,
|
||||
processingMode="detailed",
|
||||
resultFormat="json",
|
||||
maxCost=0.03,
|
||||
maxProcessingTime=30
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
result = cleaned # fallback to first attempt
|
||||
|
||||
# Create result document
|
||||
fileName = f"ai_{processingMode}_{self._format_timestamp_for_filename()}{output_extension}"
|
||||
|
||||
|
||||
|
||||
# Return result in the standard ActionResult format
|
||||
return ActionResult.isSuccess(
|
||||
documents=[{
|
||||
"documentName": fileName,
|
||||
"documentData": {
|
||||
"result": result,
|
||||
"fileName": fileName,
|
||||
"processedDocuments": len(documentList) if documentList else 0
|
||||
},
|
||||
"mimeType": output_mime_type
|
||||
}]
|
||||
action_document = ActionDocument(
|
||||
documentName=meaningful_name,
|
||||
documentData=result,
|
||||
mimeType=output_mime_type
|
||||
)
|
||||
return ActionResult.isSuccess(documents=[action_document])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in AI processing: {str(e)}")
|
||||
return ActionResult.isFailure(
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
@action
|
||||
async def webResearch(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
GENERAL:
|
||||
- Purpose: Web research and information gathering with basic analysis and sources.
|
||||
- Input requirements: user_prompt (required); optional urls, max_results, max_pages, search_depth, extract_depth, pages_search_depth, country, time_range, topic, language.
|
||||
- Output format: JSON with results and sources.
|
||||
|
||||
Parameters:
|
||||
- user_prompt (str, required): Research question or topic.
|
||||
- urls (list, optional): Specific URLs to crawl.
|
||||
- max_results (int, optional): Max search results. Default: 10.
|
||||
- max_pages (int, optional): Max pages to crawl per site. Default: 10.
|
||||
- search_depth (str, optional): basic | advanced. Default: basic.
|
||||
- extract_depth (str, optional): basic | advanced. Default: advanced.
|
||||
- pages_search_depth (int, optional): Crawl depth level. Default: 2.
|
||||
- country (str, optional): Country code for bias.
|
||||
- time_range (str, optional): d | w | m | y.
|
||||
- topic (str, optional): general | news | academic.
|
||||
- language (str, optional): Language code (e.g., de, en, fr).
|
||||
"""
|
||||
try:
|
||||
user_prompt = parameters.get("user_prompt")
|
||||
urls = parameters.get("urls")
|
||||
max_results = parameters.get("max_results", 10)
|
||||
max_pages = parameters.get("max_pages", 10)
|
||||
search_depth = parameters.get("search_depth", "basic")
|
||||
extract_depth = parameters.get("extract_depth", "advanced")
|
||||
pages_search_depth = parameters.get("pages_search_depth", 2)
|
||||
country = parameters.get("country")
|
||||
time_range = parameters.get("time_range")
|
||||
topic = parameters.get("topic")
|
||||
language = parameters.get("language")
|
||||
|
||||
if not user_prompt:
|
||||
return ActionResult.isFailure(
|
||||
error="Search query is required"
|
||||
)
|
||||
|
||||
# Build WebResearchOptions
|
||||
options = WebResearchOptions(
|
||||
max_pages=max_pages,
|
||||
search_depth=search_depth,
|
||||
extract_depth=extract_depth,
|
||||
pages_search_depth=pages_search_depth,
|
||||
country=country,
|
||||
time_range=time_range,
|
||||
topic=topic,
|
||||
language=language
|
||||
)
|
||||
|
||||
# Build WebResearchRequest
|
||||
request = WebResearchRequest(
|
||||
user_prompt=user_prompt,
|
||||
urls=urls,
|
||||
max_results=max_results,
|
||||
options=options
|
||||
)
|
||||
|
||||
# Call web research service
|
||||
logger.info(f"Performing comprehensive web research for: {user_prompt}")
|
||||
logger.info(f"Max results: {max_results}, Max pages: {max_pages}")
|
||||
if urls:
|
||||
logger.info(f"Using provided URLs: {len(urls)}")
|
||||
|
||||
result = await self.services.ai.webResearch(request)
|
||||
|
||||
if not result.success:
|
||||
return ActionResult.isFailure(error=result.error)
|
||||
|
||||
# Convert WebResearchActionResult to ActionResult format
|
||||
documents = []
|
||||
for doc in result.documents:
|
||||
documents.append({
|
||||
"documentName": doc.documentName,
|
||||
"documentData": {
|
||||
"user_prompt": doc.documentData.user_prompt,
|
||||
"websites_analyzed": doc.documentData.websites_analyzed,
|
||||
"additional_links_found": doc.documentData.additional_links_found,
|
||||
"analysis_result": doc.documentData.analysis_result,
|
||||
"sources": [{"title": s.title, "url": str(s.url)} for s in doc.documentData.sources],
|
||||
"additional_links": doc.documentData.additional_links,
|
||||
"debug_info": doc.documentData.debug_info
|
||||
},
|
||||
"mimeType": doc.mimeType
|
||||
})
|
||||
|
||||
# Return result in the standard ActionResult format
|
||||
return ActionResult.isSuccess(
|
||||
documents=documents
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in web research: {str(e)}")
|
||||
return ActionResult.isFailure(
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
def _mergeDataChunks(self, chunks: List[str], resultType: str, mimeType: str) -> str:
|
||||
"""Intelligently merge data chunks using strategies based on content type"""
|
||||
try:
|
||||
if resultType == "json":
|
||||
return self._mergeJsonChunks(chunks)
|
||||
elif resultType in ["csv", "table"]:
|
||||
return self._mergeTableChunks(chunks)
|
||||
elif resultType in ["txt", "md", "text"]:
|
||||
return self._mergeTextChunks(chunks)
|
||||
else:
|
||||
# Default: simple concatenation
|
||||
return "\n".join(str(chunk) for chunk in chunks)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to merge chunks intelligently: {str(e)}, using simple concatenation")
|
||||
return "\n".join(str(chunk) for chunk in chunks)
|
||||
|
||||
def _mergeJsonChunks(self, chunks: List[str]) -> str:
|
||||
"""Merge JSON chunks intelligently"""
|
||||
import json
|
||||
|
||||
merged_data = []
|
||||
for i, chunk in enumerate(chunks):
|
||||
try:
|
||||
if isinstance(chunk, str):
|
||||
chunk_data = json.loads(chunk)
|
||||
else:
|
||||
chunk_data = chunk
|
||||
|
||||
if isinstance(chunk_data, list):
|
||||
merged_data.extend(chunk_data)
|
||||
elif isinstance(chunk_data, dict):
|
||||
# For objects, merge by combining keys
|
||||
if not merged_data:
|
||||
merged_data = chunk_data
|
||||
else:
|
||||
if isinstance(merged_data, dict):
|
||||
merged_data.update(chunk_data)
|
||||
else:
|
||||
merged_data.append(chunk_data)
|
||||
else:
|
||||
merged_data.append(chunk_data)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse chunk {i}: {str(e)}")
|
||||
# Add as string if JSON parsing fails
|
||||
merged_data.append(str(chunk))
|
||||
|
||||
return json.dumps(merged_data, indent=2)
|
||||
|
||||
def _mergeTableChunks(self, chunks: List[str]) -> str:
|
||||
"""Merge table chunks (CSV) intelligently"""
|
||||
import csv
|
||||
import io
|
||||
|
||||
merged_rows = []
|
||||
headers = None
|
||||
|
||||
for i, chunk in enumerate(chunks):
|
||||
try:
|
||||
# Parse CSV chunk
|
||||
reader = csv.reader(io.StringIO(str(chunk)))
|
||||
rows = list(reader)
|
||||
|
||||
if not rows:
|
||||
continue
|
||||
|
||||
# First chunk: capture headers
|
||||
if i == 0:
|
||||
headers = rows[0] if rows else []
|
||||
merged_rows.extend(rows)
|
||||
else:
|
||||
# Subsequent chunks: skip header if it matches
|
||||
if rows and rows[0] == headers:
|
||||
merged_rows.extend(rows[1:]) # Skip duplicate header
|
||||
else:
|
||||
merged_rows.extend(rows)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse table chunk {i}: {str(e)}")
|
||||
# Add as raw text if CSV parsing fails
|
||||
merged_rows.append([f"Raw chunk {i}: {str(chunk)[:100]}..."])
|
||||
|
||||
# Convert back to CSV
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output)
|
||||
writer.writerows(merged_rows)
|
||||
return output.getvalue()
|
||||
|
||||
def _mergeTextChunks(self, chunks: List[str]) -> str:
|
||||
"""Merge text chunks intelligently"""
|
||||
# Simple concatenation with proper spacing
|
||||
merged = []
|
||||
for chunk in chunks:
|
||||
chunk_str = str(chunk).strip()
|
||||
if chunk_str:
|
||||
merged.append(chunk_str)
|
||||
|
||||
return "\n\n".join(merged) # Double newline between chunks for readability
|
||||
|
|
|
|||
|
|
@ -130,6 +130,9 @@ class MethodBase:
|
|||
# Extract parameter name and type
|
||||
if '(' in paramPart:
|
||||
paramName = paramPart.split('(')[0].strip()
|
||||
# Normalize bullet-prefixed parameter names like "- aiPrompt" or "* aiPrompt"
|
||||
if paramName.startswith('-') or paramName.startswith('*'):
|
||||
paramName = paramName[1:].strip()
|
||||
paramType = paramPart[paramPart.find('(')+1:paramPart.find(')')].strip()
|
||||
descriptions[paramName] = descPart
|
||||
types[paramName] = paramType
|
||||
|
|
@ -166,3 +169,53 @@ class MethodBase:
|
|||
return type_annotation._name
|
||||
else:
|
||||
return str(type_annotation)
|
||||
|
||||
def _generateMeaningfulFileName(self, base_name: str, extension: str, workflow_context: Dict[str, Any] = None, action_name: str = None) -> str:
|
||||
"""
|
||||
Generate a meaningful file name with round/task/action information.
|
||||
|
||||
Format: {base_name}_alpha_r{round}t{task}a{action}.{extension}
|
||||
Example: report_alpha_r1t3a4.json
|
||||
|
||||
Args:
|
||||
base_name: Base name for the file (e.g., "report", "analysis", "summary")
|
||||
extension: File extension without dot (e.g., "json", "html", "txt")
|
||||
workflow_context: Dictionary with currentRound, currentTask, currentAction
|
||||
action_name: Name of the action being performed (optional, for additional context)
|
||||
|
||||
Returns:
|
||||
Formatted file name string
|
||||
"""
|
||||
try:
|
||||
# Get workflow context from services if not provided
|
||||
if workflow_context is None and hasattr(self.services, 'workflow'):
|
||||
workflow_context = self.services.workflow.getWorkflowContext()
|
||||
|
||||
# Extract round, task, action numbers
|
||||
round_num = workflow_context.get('currentRound', 0) if workflow_context else 0
|
||||
task_num = workflow_context.get('currentTask', 0) if workflow_context else 0
|
||||
action_num = workflow_context.get('currentAction', 0) if workflow_context else 0
|
||||
|
||||
# Clean base name (remove special characters, spaces)
|
||||
clean_base = base_name.lower().replace(' ', '_').replace('-', '_')
|
||||
# Remove any non-alphanumeric characters except underscores
|
||||
import re
|
||||
clean_base = re.sub(r'[^a-z0-9_]', '', clean_base)
|
||||
|
||||
# Add action name if provided
|
||||
if action_name:
|
||||
clean_action = action_name.lower().replace(' ', '_').replace('-', '_')
|
||||
clean_action = re.sub(r'[^a-z0-9_]', '', clean_action)
|
||||
clean_base = f"{clean_base}_{clean_action}"
|
||||
|
||||
# Generate the meaningful file name
|
||||
meaningful_name = f"{clean_base}_r{round_num}t{task_num}a{action_num}.{extension}"
|
||||
|
||||
self.logger.debug(f"Generated meaningful file name: {meaningful_name} (Round: {round_num}, Task: {task_num}, Action: {action_num})")
|
||||
return meaningful_name
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error generating meaningful file name, using fallback: {str(e)}")
|
||||
# Fallback to timestamp-based naming
|
||||
timestamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
|
||||
return f"{base_name}_{timestamp}.{extension}"
|
||||
|
|
@ -9,7 +9,8 @@ from typing import Dict, Any, List, Optional
|
|||
from datetime import datetime, UTC
|
||||
|
||||
from modules.workflows.methods.methodBase import MethodBase, action
|
||||
from modules.datamodels.datamodelWorkflow import ActionResult, ChatDocument
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
from modules.datamodels.datamodelChat import ChatDocument
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, Priority
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -30,19 +31,34 @@ class MethodDocument(MethodBase):
|
|||
@action
|
||||
async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Extract content from any document using AI prompt.
|
||||
GENERAL:
|
||||
- Purpose: Extract and analyze content from existing documents using AI.
|
||||
- Input requirements: documentList (required); prompt (required).
|
||||
- Output format: Plain text per source document (.txt by default).
|
||||
|
||||
Parameters:
|
||||
documentList (list): Document list reference(s)
|
||||
aiPrompt (str): AI prompt for extraction
|
||||
expectedDocumentFormats (list, optional): Output formats
|
||||
includeMetadata (bool, optional): Include metadata (default: True)
|
||||
- documentList (list, required): Document reference(s) to extract from.
|
||||
- prompt (str, required): Instruction describing what to extract.
|
||||
- operationType (str, optional): extract_content | analyze_document | summarize_content. Default: extract_content.
|
||||
- processDocumentsIndividually (bool, optional): Process each document separately. Default: True.
|
||||
- chunkAllowed (bool, optional): Allow chunking for large inputs. Default: True.
|
||||
- mergeStrategy (dict, optional): Merge strategy for chunked content.
|
||||
- expectedDocumentFormats (list, optional): Desired output format specs.
|
||||
- includeMetadata (bool, optional): Include file metadata. Default: True.
|
||||
"""
|
||||
try:
|
||||
documentList = parameters.get("documentList")
|
||||
if isinstance(documentList, str):
|
||||
documentList = [documentList]
|
||||
aiPrompt = parameters.get("aiPrompt")
|
||||
prompt = parameters.get("prompt")
|
||||
operationType = parameters.get("operationType", "extract_content")
|
||||
processDocumentsIndividually = parameters.get("processDocumentsIndividually", True)
|
||||
chunkAllowed = parameters.get("chunkAllowed", True)
|
||||
mergeStrategy = parameters.get("mergeStrategy", {
|
||||
"groupBy": "typeGroup",
|
||||
"orderBy": "id",
|
||||
"mergeType": "concatenate"
|
||||
})
|
||||
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||
includeMetadata = parameters.get("includeMetadata", True)
|
||||
|
||||
|
|
@ -51,9 +67,9 @@ class MethodDocument(MethodBase):
|
|||
error="Document list reference is required"
|
||||
)
|
||||
|
||||
if not aiPrompt:
|
||||
if not prompt:
|
||||
return ActionResult.isFailure(
|
||||
error="AI prompt is required"
|
||||
error="Prompt is required"
|
||||
)
|
||||
|
||||
chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
|
||||
|
|
@ -62,120 +78,77 @@ class MethodDocument(MethodBase):
|
|||
error="No documents found for the provided reference"
|
||||
)
|
||||
|
||||
# Batch extract content from all documents at once
|
||||
all_extracted_content = []
|
||||
file_infos = []
|
||||
batch_docs = []
|
||||
for chatDocument in chatDocuments:
|
||||
file_info = self.services.workflow.getFileInfo(chatDocument.fileId)
|
||||
if includeMetadata:
|
||||
file_infos.append(file_info)
|
||||
# Use enhanced AI service with integrated extraction
|
||||
try:
|
||||
data = self.services.workflow.getFileData(chatDocument.fileId) if hasattr(chatDocument, 'fileId') else None
|
||||
except Exception:
|
||||
data = None
|
||||
batch_docs.append({
|
||||
"id": getattr(chatDocument, 'id', None),
|
||||
"bytes": data or b"",
|
||||
"fileName": getattr(chatDocument, 'fileName', 'unknown'),
|
||||
"mimeType": getattr(chatDocument, 'mimeType', None) or "application/octet-stream"
|
||||
})
|
||||
|
||||
try:
|
||||
extracted_list = await self.services.extraction.extractContentFromDocuments(
|
||||
prompt=aiPrompt,
|
||||
documents=batch_docs,
|
||||
options={"ai": {"enabled": False}}
|
||||
# Build AI call options
|
||||
ai_options = AiCallOptions(
|
||||
operationType=operationType,
|
||||
processDocumentsIndividually=processDocumentsIndividually,
|
||||
compressContext=not chunkAllowed
|
||||
)
|
||||
|
||||
# Add format instructions to prompt if expected formats are provided
|
||||
enhanced_prompt = prompt
|
||||
if expectedDocumentFormats:
|
||||
format_instructions = []
|
||||
for fmt in expectedDocumentFormats:
|
||||
extension = fmt.get("extension", ".txt")
|
||||
mime_type = fmt.get("mimeType", "text/plain")
|
||||
description = fmt.get("description", "")
|
||||
format_instructions.append(f"- {extension} ({mime_type}): {description}")
|
||||
|
||||
if format_instructions:
|
||||
enhanced_prompt += f"\n\nPlease format the output as: {', '.join([fmt.get('extension', '.txt') for fmt in expectedDocumentFormats])}"
|
||||
enhanced_prompt += f"\nExpected formats:\n" + "\n".join(format_instructions)
|
||||
|
||||
# Use enhanced AI service for extraction
|
||||
ai_response = await self.services.ai.callAi(
|
||||
prompt=enhanced_prompt,
|
||||
documents=chatDocuments,
|
||||
options=ai_options
|
||||
)
|
||||
|
||||
logger.info(f"AI extraction completed: {len(ai_response)} characters")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Batch extraction failed: {str(e)}")
|
||||
extracted_list = []
|
||||
logger.error(f"AI extraction failed: {str(e)}")
|
||||
ai_response = ""
|
||||
|
||||
all_extracted_content = extracted_list or []
|
||||
|
||||
if not all_extracted_content:
|
||||
if not ai_response or ai_response.strip() == "":
|
||||
return ActionResult.isFailure(
|
||||
error="No content could be extracted from any documents"
|
||||
)
|
||||
|
||||
# Process each document individually with its own format conversion
|
||||
output_documents = []
|
||||
# Process each document individually with extracted content
|
||||
action_documents = []
|
||||
|
||||
for i, chatDocument in enumerate(chatDocuments):
|
||||
# Extract text content from this document
|
||||
text_content = ""
|
||||
try:
|
||||
ec = all_extracted_content[i] if i < len(all_extracted_content) else None
|
||||
if ec and hasattr(ec, 'parts'):
|
||||
text_parts = []
|
||||
for part in getattr(ec, 'parts', []):
|
||||
try:
|
||||
if getattr(part, 'typeGroup', '') in ("text", "table", "structure") and getattr(part, 'data', None):
|
||||
text_parts.append(part.data)
|
||||
except Exception:
|
||||
continue
|
||||
text_content = "\n".join(text_parts)
|
||||
else:
|
||||
text_content = ""
|
||||
except Exception:
|
||||
text_content = ""
|
||||
|
||||
# Get the expected format for this document (or use default)
|
||||
target_format = None
|
||||
if expectedDocumentFormats and i < len(expectedDocumentFormats):
|
||||
target_format = expectedDocumentFormats[i]
|
||||
elif expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||
# If fewer formats than documents, use the last format for remaining documents
|
||||
target_format = expectedDocumentFormats[-1]
|
||||
|
||||
# Determine output format and fileName
|
||||
if target_format:
|
||||
target_extension = target_format.get("extension", ".txt")
|
||||
target_mime_type = target_format.get("mimeType", "text/plain")
|
||||
|
||||
# Check if format conversion is needed
|
||||
if target_extension not in [".txt", ".text"] or target_mime_type != "text/plain":
|
||||
logger.info(f"Converting document {i+1} to format: {target_extension} ({target_mime_type})")
|
||||
# Use AI to convert format
|
||||
formatted_content = await self._convertContentToFormat(text_content, target_format)
|
||||
final_content = formatted_content
|
||||
final_mime_type = target_mime_type
|
||||
final_extension = target_extension
|
||||
else:
|
||||
logger.info(f"Document {i+1}: No format conversion needed, using plain text")
|
||||
final_content = text_content
|
||||
final_mime_type = "text/plain"
|
||||
final_extension = ".txt"
|
||||
else:
|
||||
logger.info(f"Document {i+1}: No expected format specified, using plain text")
|
||||
final_content = text_content
|
||||
# Use the AI response directly - it already contains processed content
|
||||
final_content = ai_response
|
||||
final_mime_type = "text/plain"
|
||||
final_extension = ".txt"
|
||||
|
||||
# Create output fileName based on original fileName and target format
|
||||
# Create meaningful output fileName with workflow context
|
||||
original_fileName = chatDocument.fileName
|
||||
base_name = original_fileName.rsplit('.', 1)[0] if '.' in original_fileName else original_fileName
|
||||
output_fileName = f"{base_name}_extracted_{self._format_timestamp_for_filename()}{final_extension}"
|
||||
|
||||
# Create result data for this document
|
||||
result_data = {
|
||||
"documentCount": 1,
|
||||
"content": final_content,
|
||||
"originalfileName": original_fileName,
|
||||
"fileInfos": [file_infos[i]] if includeMetadata and i < len(file_infos) else None,
|
||||
"timestamp": self.services.utils.getUtcTimestamp()
|
||||
}
|
||||
extension = final_extension.lstrip('.') # Remove leading dot for meaningful naming
|
||||
output_fileName = self._generateMeaningfulFileName(
|
||||
base_name=f"{base_name}_extracted",
|
||||
extension=extension,
|
||||
action_name="extract"
|
||||
)
|
||||
|
||||
logger.info(f"Created output document: {output_fileName} with {len(final_content)} characters")
|
||||
|
||||
output_documents.append({
|
||||
"documentName": output_fileName,
|
||||
"documentData": result_data,
|
||||
"mimeType": final_mime_type
|
||||
})
|
||||
# Create proper ActionDocument object
|
||||
action_documents.append(ActionDocument(
|
||||
documentName=output_fileName,
|
||||
documentData=final_content,
|
||||
mimeType=final_mime_type
|
||||
))
|
||||
|
||||
return ActionResult.isSuccess(
|
||||
documents=output_documents
|
||||
documents=action_documents
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting content: {str(e)}")
|
||||
|
|
@ -183,454 +156,27 @@ class MethodDocument(MethodBase):
|
|||
error=str(e)
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
@action
|
||||
async def generate(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Convert TEXT-ONLY documents to target formats (NO AI usage).
|
||||
GENERAL:
|
||||
- Purpose: Generate formatted documents and reports from source documents.
|
||||
- Input requirements: documentList (required); prompt (required); optional title and outputFormat.
|
||||
- Any output format, e.g.: html | pdf | docx | txt | md | json | csv | xlsx
|
||||
|
||||
Parameters:
|
||||
documentList (list): TEXT-ONLY documents only
|
||||
expectedDocumentFormats (list): Target formats
|
||||
originalDocuments (list, optional): Original names
|
||||
includeMetadata (bool, optional): Include metadata (default: True)
|
||||
mergeDocuments (bool, optional): Merge all documents into single output (default: False)
|
||||
"""
|
||||
try:
|
||||
document_list = parameters.get("documentList", [])
|
||||
if isinstance(document_list, str):
|
||||
document_list = [document_list]
|
||||
expected_document_formats = parameters.get("expectedDocumentFormats", [])
|
||||
original_documents = parameters.get("originalDocuments", [])
|
||||
include_metadata = parameters.get("includeMetadata", True)
|
||||
merge_documents = parameters.get("mergeDocuments", False)
|
||||
|
||||
if not document_list:
|
||||
return ActionResult.isFailure(
|
||||
error="Document list is required for generation"
|
||||
)
|
||||
|
||||
if not expected_document_formats or len(expected_document_formats) == 0:
|
||||
return ActionResult.isFailure(
|
||||
error="Expected document formats specification is required"
|
||||
)
|
||||
|
||||
# Get chat documents for original documents list
|
||||
chat_documents = self.services.workflow.getChatDocumentsFromDocumentList(document_list)
|
||||
logger.info(f"Found {len(chat_documents)} chat documents")
|
||||
|
||||
if not chat_documents:
|
||||
return ActionResult.isFailure(
|
||||
error="No documents found for the provided documentList reference"
|
||||
)
|
||||
|
||||
# Update original documents list if not provided
|
||||
if not original_documents:
|
||||
original_documents = [doc.fileName if hasattr(doc, 'fileName') else str(doc.id) for doc in chat_documents]
|
||||
|
||||
# Extract content from all documents first
|
||||
document_contents = []
|
||||
for i, chat_document in enumerate(chat_documents):
|
||||
# Extract content from this document directly - NO AI, just read the data as-is
|
||||
# This ensures we get the original text content for format conversion
|
||||
content = ""
|
||||
if hasattr(chat_document, 'fileId') and chat_document.fileId:
|
||||
try:
|
||||
# Get file data directly without AI processing
|
||||
file_data = self.services.workflow.getFileData(chat_document.fileId)
|
||||
if file_data:
|
||||
# Check if it's text data and convert to string
|
||||
if isinstance(file_data, bytes):
|
||||
try:
|
||||
# Try to decode as UTF-8 to check if it's text
|
||||
content = file_data.decode('utf-8')
|
||||
logger.info(f"Document {i+1} ({chat_document.fileName}): Successfully decoded as UTF-8 text")
|
||||
except UnicodeDecodeError:
|
||||
logger.info(f"Document {i+1} ({chat_document.fileName}): Binary data, not text - skipping")
|
||||
continue
|
||||
else:
|
||||
# Already a string
|
||||
content = str(file_data)
|
||||
logger.info(f"Document {i+1} ({chat_document.fileName}): Already text data")
|
||||
else:
|
||||
logger.warning(f"Document {i+1} ({chat_document.fileName}): No file data found")
|
||||
continue
|
||||
|
||||
if not content.strip():
|
||||
logger.info(f"Document {i+1} ({chat_document.fileName}): Empty text content, skipping")
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error reading document {i+1} ({chat_document.fileName}): {str(e)}")
|
||||
continue
|
||||
else:
|
||||
logger.warning(f"Document {i+1} has no fileId, skipping")
|
||||
continue
|
||||
|
||||
logger.info(f"Extracted content from document {i+1}: {len(content)} characters")
|
||||
|
||||
document_contents.append({
|
||||
"document": chat_document,
|
||||
"content": content,
|
||||
"index": i,
|
||||
"original_name": original_documents[i] if i < len(original_documents) else f"document_{i+1}"
|
||||
})
|
||||
|
||||
if not document_contents:
|
||||
return ActionResult.isFailure(
|
||||
error="No valid text content could be extracted from any documents"
|
||||
)
|
||||
|
||||
if merge_documents and len(document_contents) > 1:
|
||||
# Merge all documents into single output
|
||||
logger.info("Merging all documents into single output")
|
||||
return await self._mergeDocuments(document_contents, expected_document_formats, include_metadata)
|
||||
else:
|
||||
# Process each document individually with its own format conversion
|
||||
logger.info("Processing documents individually")
|
||||
output_documents = []
|
||||
|
||||
for item in document_contents:
|
||||
chat_document = item["document"]
|
||||
content = item["content"]
|
||||
i = item["index"]
|
||||
original_name = item["original_name"]
|
||||
|
||||
# Get the expected format for this document (or use default)
|
||||
target_format = None
|
||||
if i < len(expected_document_formats):
|
||||
target_format = expected_document_formats[i]
|
||||
elif len(expected_document_formats) > 0:
|
||||
# If fewer formats than documents, use the last format for remaining documents
|
||||
target_format = expected_document_formats[-1]
|
||||
|
||||
if not target_format:
|
||||
logger.warning(f"No expected format for document {i+1}, skipping")
|
||||
continue
|
||||
|
||||
# Use AI to convert format
|
||||
formatted_content = await self._convertContentToFormat(content, target_format)
|
||||
if not formatted_content:
|
||||
logger.warning(f"Failed to format document {i+1}, skipping")
|
||||
continue
|
||||
|
||||
target_extension = target_format.get("extension", ".txt")
|
||||
target_mime_type = target_format.get("mimeType", "text/plain")
|
||||
|
||||
# Create output fileName
|
||||
base_name = original_name.rsplit('.', 1)[0] if '.' in original_name else original_name
|
||||
output_fileName = f"{base_name}_generated_{self._format_timestamp_for_filename()}{target_extension}"
|
||||
|
||||
# Create result data
|
||||
result_data = {
|
||||
"documentCount": 1,
|
||||
"content": formatted_content,
|
||||
"outputFormat": target_format,
|
||||
"originalDocument": original_name,
|
||||
"timestamp": self.services.utils.getUtcTimestamp()
|
||||
}
|
||||
|
||||
logger.info(f"Generated document: {output_fileName} with {len(formatted_content)} characters")
|
||||
|
||||
output_documents.append({
|
||||
"documentName": output_fileName,
|
||||
"documentData": result_data,
|
||||
"mimeType": target_mime_type
|
||||
})
|
||||
|
||||
if not output_documents:
|
||||
return ActionResult.isFailure(
|
||||
error="No documents could be generated"
|
||||
)
|
||||
|
||||
return ActionResult.isSuccess(
|
||||
documents=output_documents
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating document: {str(e)}")
|
||||
return ActionResult.isFailure(
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def _mergeDocuments(self, document_contents: List[Dict[str, Any]],
|
||||
expected_document_formats: List[Dict[str, Any]],
|
||||
include_metadata: bool) -> ActionResult:
|
||||
"""
|
||||
Merge all documents into a single output document.
|
||||
"""
|
||||
try:
|
||||
# Combine all document content
|
||||
combined_content_parts = []
|
||||
original_file_names = []
|
||||
|
||||
for item in document_contents:
|
||||
chat_document = item["document"]
|
||||
content = item["content"]
|
||||
original_name = item["original_name"]
|
||||
|
||||
if content.strip():
|
||||
combined_content_parts.append(f"=== Document: {original_name} ===\n{content}\n")
|
||||
original_file_names.append(original_name)
|
||||
|
||||
if not combined_content_parts:
|
||||
return ActionResult.isFailure(
|
||||
error="No content could be extracted from any documents for merging"
|
||||
)
|
||||
|
||||
# Combine all content
|
||||
combined_content = "\n".join(combined_content_parts)
|
||||
logger.info(f"Combined content from {len(original_file_names)} documents: {len(combined_content)} characters")
|
||||
|
||||
# Get the expected format for the merged output
|
||||
target_format = None
|
||||
if expected_document_formats and len(expected_document_formats) > 0:
|
||||
target_format = expected_document_formats[0] # Use first format for merged output
|
||||
|
||||
if not target_format:
|
||||
logger.warning("No expected format specified for merged output, using plain text")
|
||||
target_format = {"extension": ".txt", "mimeType": "text/plain"}
|
||||
|
||||
# Use AI to convert format
|
||||
formatted_content = await self._convertContentToFormat(combined_content, target_format)
|
||||
if not formatted_content:
|
||||
logger.warning("Failed to format merged content, using raw content")
|
||||
formatted_content = combined_content
|
||||
|
||||
target_extension = target_format.get("extension", ".txt")
|
||||
target_mime_type = target_format.get("mimeType", "text/plain")
|
||||
|
||||
# Create output fileName for merged document
|
||||
timestamp = self._format_timestamp_for_filename()
|
||||
output_fileName = f"merged_documents_{timestamp}{target_extension}"
|
||||
|
||||
# Create result data for merged document
|
||||
result_data = {
|
||||
"documentCount": len(document_contents),
|
||||
"content": formatted_content,
|
||||
"outputFormat": target_format,
|
||||
"originalDocuments": original_file_names,
|
||||
"timestamp": self.services.utils.getUtcTimestamp(),
|
||||
"merged": True
|
||||
}
|
||||
|
||||
logger.info(f"Created merged document: {output_fileName} with {len(formatted_content)} characters")
|
||||
|
||||
return ActionResult.isSuccess(
|
||||
documents=[{
|
||||
"documentName": output_fileName,
|
||||
"documentData": result_data,
|
||||
"mimeType": target_mime_type
|
||||
}]
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error merging documents: {str(e)}")
|
||||
return ActionResult.isFailure(
|
||||
error=f"Failed to merge documents: {str(e)}"
|
||||
)
|
||||
|
||||
async def _convertContentToFormat(self, content: str, target_format: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Helper function to convert content to the specified format using AI.
|
||||
"""
|
||||
try:
|
||||
extension = target_format.get("extension", ".txt")
|
||||
mime_type = target_format.get("mimeType", "text/plain")
|
||||
|
||||
logger.info(f"Converting content to format: {extension} ({mime_type})")
|
||||
|
||||
# Create AI prompt for format conversion
|
||||
format_prompts = {
|
||||
".csv": f"""
|
||||
Convert the following content into a proper CSV format.
|
||||
|
||||
Requirements:
|
||||
1. Output ONLY the CSV data without any markdown, code blocks, or additional text
|
||||
2. Use appropriate headers based on the content
|
||||
3. Ensure proper CSV formatting with commas and quotes where needed
|
||||
4. Make the data easily readable and importable into spreadsheet applications
|
||||
|
||||
Content to convert:
|
||||
{content}
|
||||
|
||||
Generate ONLY the CSV data:
|
||||
""",
|
||||
|
||||
".json": f"""
|
||||
Convert the following content into a proper JSON format.
|
||||
|
||||
Requirements:
|
||||
1. Output ONLY the JSON data without any markdown, code blocks, or additional text
|
||||
2. Structure the data logically with appropriate keys and values
|
||||
3. Ensure valid JSON syntax
|
||||
4. Make the data easily parseable and readable
|
||||
|
||||
Content to convert:
|
||||
{content}
|
||||
|
||||
Generate ONLY the JSON data:
|
||||
""",
|
||||
|
||||
".xml": f"""
|
||||
Convert the following content into a proper XML format.
|
||||
|
||||
Requirements:
|
||||
1. Output ONLY the XML data without any markdown, code blocks, or additional text
|
||||
2. Use appropriate XML tags and structure
|
||||
3. Ensure valid XML syntax
|
||||
4. Make the data easily parseable and readable
|
||||
|
||||
Content to convert:
|
||||
{content}
|
||||
|
||||
Generate ONLY the XML data:
|
||||
""",
|
||||
|
||||
".html": f"""
|
||||
Convert the following content into a proper HTML format.
|
||||
|
||||
Requirements:
|
||||
1. Output ONLY the HTML data without any markdown, code blocks, or additional text
|
||||
2. Use appropriate HTML tags and structure
|
||||
3. Ensure valid HTML syntax
|
||||
4. Make the data easily readable in web browsers
|
||||
|
||||
Content to convert:
|
||||
{content}
|
||||
|
||||
Generate ONLY the HTML data:
|
||||
""",
|
||||
|
||||
".md": f"""
|
||||
Convert the following content into a proper Markdown format.
|
||||
|
||||
Requirements:
|
||||
1. Output ONLY the Markdown data without any code blocks or additional text
|
||||
2. Use appropriate Markdown syntax for headers, lists, emphasis, etc.
|
||||
3. Structure the content logically
|
||||
4. Make the data easily readable and convertible to other formats
|
||||
|
||||
Content to convert:
|
||||
{content}
|
||||
|
||||
Generate ONLY the Markdown data:
|
||||
"""
|
||||
}
|
||||
|
||||
# Get the appropriate prompt for the target format
|
||||
if extension in format_prompts:
|
||||
ai_prompt = format_prompts[extension]
|
||||
else:
|
||||
# Generic format conversion
|
||||
ai_prompt = f"""
|
||||
Convert the following content into {extension.upper()} format.
|
||||
|
||||
Requirements:
|
||||
1. Output ONLY the {extension.upper()} data without any markdown, code blocks, or additional text
|
||||
2. Use appropriate formatting for {extension.upper()} files
|
||||
3. Ensure the output is valid and usable
|
||||
4. Make the data easily readable and importable
|
||||
|
||||
Content to convert:
|
||||
{content}
|
||||
|
||||
Generate ONLY the {extension.upper()} data:
|
||||
"""
|
||||
|
||||
# Call AI to generate the formatted content
|
||||
logger.info(f"Calling AI for {extension} format conversion")
|
||||
formatted_content = await self.services.ai.callAi(
|
||||
prompt=ai_prompt,
|
||||
documents=None,
|
||||
options=AiCallOptions(
|
||||
operationType=OperationType.GENERATE_CONTENT,
|
||||
priority=Priority.SPEED,
|
||||
compressPrompt=True,
|
||||
compressContext=False,
|
||||
maxCost=0.02
|
||||
)
|
||||
)
|
||||
|
||||
if not formatted_content or formatted_content.strip() == "":
|
||||
logger.warning("AI format conversion failed, using fallback")
|
||||
return self._generateFallbackFormattedContent(content, extension, mime_type)
|
||||
|
||||
# Clean up the AI response
|
||||
formatted_content = formatted_content.strip()
|
||||
|
||||
# Remove markdown code blocks if present
|
||||
if formatted_content.startswith("```") and formatted_content.endswith("```"):
|
||||
lines = formatted_content.split('\n')
|
||||
if len(lines) > 2:
|
||||
formatted_content = '\n'.join(lines[1:-1])
|
||||
|
||||
# For HTML format, check if AI returned complete HTML document
|
||||
if extension == ".html" and (formatted_content.startswith('<!DOCTYPE') or formatted_content.startswith('<html')):
|
||||
return formatted_content
|
||||
|
||||
return formatted_content
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in AI format conversion: {str(e)}")
|
||||
return self._generateFallbackFormattedContent(content, extension, mime_type)
|
||||
|
||||
def _generateFallbackFormattedContent(self, content: str, extension: str, mime_type: str) -> str:
|
||||
"""
|
||||
Generate fallback formatted content when AI conversion fails.
|
||||
"""
|
||||
try:
|
||||
if extension == ".csv":
|
||||
# Simple CSV fallback - split by lines and create basic CSV
|
||||
lines = content.strip().split('\n')
|
||||
if lines:
|
||||
# Create a simple CSV with line numbers and content
|
||||
csv_lines = ["Line,Content"]
|
||||
for i, line in enumerate(lines, 1):
|
||||
# Escape quotes and wrap in quotes if comma present
|
||||
if ',' in line:
|
||||
line = f'"{line.replace(chr(34), chr(34) + chr(34))}"'
|
||||
csv_lines.append(f"{i},{line}")
|
||||
return '\n'.join(csv_lines)
|
||||
return "Line,Content\n1,No content available"
|
||||
|
||||
elif extension == ".json":
|
||||
# Simple JSON fallback
|
||||
content_escaped = content.replace('"', '\\"')
|
||||
timestamp = self.services.utils.getUtcTimestamp()
|
||||
return f'{{"content": "{content_escaped}", "format": "json", "timestamp": {timestamp}}}'
|
||||
|
||||
elif extension == ".xml":
|
||||
# Simple XML fallback
|
||||
timestamp = self.services.utils.getUtcTimestamp()
|
||||
return f'<?xml version="1.0" encoding="UTF-8"?>\n<document>\n<content>{content}</content>\n<format>xml</format>\n<timestamp>{timestamp}</timestamp>\n</document>'
|
||||
|
||||
elif extension == ".html":
|
||||
# Simple HTML fallback
|
||||
timestamp = int(self.services.utils.getUtcTimestamp())
|
||||
return f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>Generated Document</title></head>\n<body>\n<pre>{content}</pre>\n<p><em>Generated on {timestamp}</em></p>\n</body>\n</html>'
|
||||
|
||||
elif extension == ".md":
|
||||
# Simple Markdown fallback
|
||||
timestamp = int(self.services.utils.getUtcTimestamp())
|
||||
return f"# Generated Document\n\n{content}\n\n---\n*Generated on {timestamp}*"
|
||||
|
||||
else:
|
||||
# Generic fallback - return content as-is
|
||||
return content
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in fallback format conversion: {str(e)}")
|
||||
return content
|
||||
|
||||
@action
|
||||
async def generateReport(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Generate HTML report from multiple documents using AI.
|
||||
|
||||
Parameters:
|
||||
documentList (list): Document list reference(s)
|
||||
prompt (str): AI prompt for report generation
|
||||
title (str, optional): Report title (default: "Summary Report")
|
||||
includeMetadata (bool, optional): Include metadata (default: True)
|
||||
- documentList (list, required): Document reference(s) to include as context.
|
||||
- prompt (str, required): Instruction describing the desired document/report.
|
||||
- title (str, optional): Title for the generated document. Default: "Summary Report".
|
||||
- outputFormat (str, optional): html | pdf | docx | txt | md | json | csv | xlsx. Default: html.
|
||||
- operationType (str, optional): generate_report | analyze_documents. Default: generate_report.
|
||||
- processDocumentsIndividually (bool, optional): Process per document. Default: True.
|
||||
- chunkAllowed (bool, optional): Allow chunking for large inputs. Default: True.
|
||||
- mergeStrategy (dict, optional): Merging rules for multi-part generation.
|
||||
- includeMetadata (bool, optional): Include file metadata. Default: True.
|
||||
"""
|
||||
try:
|
||||
documentList = parameters.get("documentList")
|
||||
|
|
@ -638,6 +184,15 @@ class MethodDocument(MethodBase):
|
|||
documentList = [documentList]
|
||||
prompt = parameters.get("prompt")
|
||||
title = parameters.get("title", "Summary Report")
|
||||
outputFormat = parameters.get("outputFormat", "html")
|
||||
operationType = parameters.get("operationType", "generate_report")
|
||||
processDocumentsIndividually = parameters.get("processDocumentsIndividually", True)
|
||||
chunkAllowed = parameters.get("chunkAllowed", True)
|
||||
mergeStrategy = parameters.get("mergeStrategy", {
|
||||
"groupBy": "typeGroup",
|
||||
"orderBy": "id",
|
||||
"mergeType": "concatenate"
|
||||
})
|
||||
includeMetadata = parameters.get("includeMetadata", True)
|
||||
|
||||
if not documentList:
|
||||
|
|
@ -658,179 +213,54 @@ class MethodDocument(MethodBase):
|
|||
error="No documents found for the provided reference"
|
||||
)
|
||||
|
||||
# Generate HTML report
|
||||
html_content = await self._generateHtmlReport(chatDocuments, title, includeMetadata, prompt)
|
||||
|
||||
# Create output fileName
|
||||
timestamp = int(self.services.utils.getUtcTimestamp())
|
||||
output_fileName = f"report_{self._format_timestamp_for_filename()}.html"
|
||||
|
||||
result_data = {
|
||||
"documentCount": len(chatDocuments),
|
||||
"content": html_content,
|
||||
"title": title,
|
||||
"timestamp": self.services.utils.getUtcTimestamp()
|
||||
}
|
||||
|
||||
logger.info(f"Generated HTML report: {output_fileName} with {len(html_content)} characters")
|
||||
|
||||
return ActionResult.isSuccess(
|
||||
documents=[{
|
||||
"documentName": output_fileName,
|
||||
"documentData": result_data,
|
||||
"mimeType": "text/html"
|
||||
}]
|
||||
# Use enhanced AI service with document generation
|
||||
try:
|
||||
# Build AI call options
|
||||
ai_options = AiCallOptions(
|
||||
operationType=operationType,
|
||||
processDocumentsIndividually=processDocumentsIndividually,
|
||||
compressContext=not chunkAllowed
|
||||
)
|
||||
|
||||
# Use enhanced AI service with document generation
|
||||
result = await self.services.ai.callAi(
|
||||
prompt=prompt,
|
||||
documents=chatDocuments,
|
||||
options=ai_options,
|
||||
outputFormat=outputFormat,
|
||||
title=title
|
||||
)
|
||||
|
||||
if isinstance(result, dict) and result.get("success"):
|
||||
# Extract document information from result
|
||||
documents = result.get("documents", [])
|
||||
if documents:
|
||||
# Convert to ActionDocument format
|
||||
action_documents = []
|
||||
for doc in documents:
|
||||
action_documents.append(ActionDocument(
|
||||
documentName=doc["documentName"],
|
||||
documentData=doc["documentData"],
|
||||
mimeType=doc["mimeType"]
|
||||
))
|
||||
|
||||
logger.info(f"Generated {outputFormat.upper()} report: {len(action_documents)} documents")
|
||||
return ActionResult.isSuccess(documents=action_documents)
|
||||
else:
|
||||
return ActionResult.isFailure(error="No documents generated")
|
||||
else:
|
||||
error_msg = result.get("error", "Unknown error") if isinstance(result, dict) else "AI generation failed"
|
||||
return ActionResult.isFailure(error=error_msg)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"AI generation failed: {str(e)}")
|
||||
return ActionResult.isFailure(error=str(e))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating report: {str(e)}")
|
||||
return ActionResult.isFailure(
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def _generateHtmlReport(self, chatDocuments: List[Any], title: str, includeMetadata: bool, prompt: str) -> str:
|
||||
"""
|
||||
Generate a comprehensive HTML report using AI from all input documents.
|
||||
"""
|
||||
try:
|
||||
# Filter out empty documents and collect content
|
||||
validDocuments = []
|
||||
allContent = []
|
||||
|
||||
for doc in chatDocuments:
|
||||
content = ""
|
||||
logger.info(f"Processing document: type={type(doc)}")
|
||||
|
||||
# Batch extraction approach: prepare one doc payload and call extractor
|
||||
try:
|
||||
try:
|
||||
data = self.services.workflow.getFileData(doc.fileId) if hasattr(doc, 'fileId') else None
|
||||
except Exception:
|
||||
data = None
|
||||
extracted_list = await self.services.extraction.extractContentFromDocuments(
|
||||
prompt="Extract readable text content for HTML report generation",
|
||||
documents=[{
|
||||
"id": getattr(doc, 'id', None),
|
||||
"bytes": data or b"",
|
||||
"fileName": getattr(doc, 'fileName', 'unknown'),
|
||||
"mimeType": getattr(doc, 'mimeType', None) or "application/octet-stream"
|
||||
}],
|
||||
options={"ai": {"enabled": False}}
|
||||
)
|
||||
ec = extracted_list[0] if extracted_list else None
|
||||
if ec and hasattr(ec, 'parts'):
|
||||
for part in getattr(ec, 'parts', []):
|
||||
try:
|
||||
if getattr(part, 'typeGroup', '') in ("text", "table", "structure") and getattr(part, 'data', None):
|
||||
content += part.data + " "
|
||||
except Exception:
|
||||
continue
|
||||
if content.strip():
|
||||
logger.info(f" Retrieved content from file: {len(content)} characters")
|
||||
else:
|
||||
logger.info(f" No readable text content found (binary file)")
|
||||
else:
|
||||
logger.info(f" No content extracted (binary file)")
|
||||
except Exception as e:
|
||||
logger.info(f" Could not extract content (binary file): {str(e)}")
|
||||
|
||||
# Skip empty documents
|
||||
if content and content.strip():
|
||||
validDocuments.append(doc)
|
||||
allContent.append(f"Document: {doc.fileName}\n{content}\n")
|
||||
logger.info(f" Added document to valid documents list")
|
||||
else:
|
||||
logger.info(f" Skipping document with no readable text content")
|
||||
|
||||
if not validDocuments:
|
||||
# No readable content; return a minimal valid HTML document
|
||||
timestamp = int(self.services.utils.getUtcTimestamp())
|
||||
return f"<!DOCTYPE html><html><head><meta charset=\"UTF-8\"><title>{title}</title></head><body><h1>{title}</h1><p>Keine auswertbaren Inhalte gefunden.</p><p>Generated: {timestamp}</p></body></html>"
|
||||
|
||||
# Create AI prompt for comprehensive report generation using user's prompt
|
||||
combinedContent = "\n\n".join(allContent)
|
||||
aiPrompt = f"""
|
||||
{prompt}
|
||||
|
||||
Report Title: {title}
|
||||
|
||||
OUTPUT POLICY:
|
||||
- Return ONLY a complete, raw HTML document.
|
||||
- Start with: <!DOCTYPE html>
|
||||
- Must include: <html>, <head> (with <meta charset="UTF-8"> and <title>), and <body>.
|
||||
- The response must be valid, self-contained HTML suitable for saving as .html.
|
||||
|
||||
Structure:
|
||||
- Title and short subtitle
|
||||
- Executive summary
|
||||
- Sections with clear headings
|
||||
- Use tables for structured data when helpful
|
||||
- Key findings and recommendations
|
||||
- Generation date and number of documents
|
||||
|
||||
Quality and design requirements:
|
||||
- Use clear, professional, and accessible styling in a <style> block
|
||||
- Apply clean layout, spacing, and visual hierarchy for headings
|
||||
- Keep HTML and CSS standards-compliant and lightweight
|
||||
|
||||
SOURCE DOCUMENT CONTENT:
|
||||
---START---
|
||||
{combinedContent}
|
||||
---END---
|
||||
"""
|
||||
|
||||
# Call AI to generate the report
|
||||
logger.info(f"Generating AI report for {len(validDocuments)} documents")
|
||||
# Build ChatDocument list from chatDocuments
|
||||
documents = []
|
||||
try:
|
||||
for d in validDocuments:
|
||||
try:
|
||||
data = self.services.workflow.getFileData(d.fileId) if hasattr(d, 'fileId') else None
|
||||
if data:
|
||||
documents.append(ChatDocument(fileData=data, fileName=d.fileName, mimeType=d.mimeType))
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
documents = None
|
||||
aiReport = await self.services.ai.callAi(
|
||||
prompt=aiPrompt,
|
||||
documents=documents or None,
|
||||
options=AiCallOptions(
|
||||
operationType=OperationType.GENERATE_CONTENT, # Using GENERATE_CONTENT for report generation
|
||||
priority=Priority.QUALITY,
|
||||
compressPrompt=False,
|
||||
compressContext=True,
|
||||
processDocumentsIndividually=True,
|
||||
resultFormat="html",
|
||||
processingMode="detailed",
|
||||
maxCost=0.08,
|
||||
maxProcessingTime=90
|
||||
)
|
||||
)
|
||||
|
||||
# If AI call fails, return error - AI is crucial for report generation
|
||||
if not aiReport or aiReport.strip() == "":
|
||||
logger.error("AI report generation failed - AI is crucial for this action")
|
||||
raise Exception("AI report generation failed - AI is required for report generation")
|
||||
|
||||
# Clean up the AI response and ensure it's valid HTML
|
||||
aiReport = aiReport.strip()
|
||||
|
||||
# Normalize: strip code fences if present
|
||||
if aiReport.startswith("```") and aiReport.endswith("```"):
|
||||
lines = aiReport.split('\n')
|
||||
if len(lines) >= 2:
|
||||
aiReport = '\n'.join(lines[1:-1]).strip()
|
||||
|
||||
cleaned = aiReport.strip()
|
||||
|
||||
# Return exactly what we have (no wrapping)
|
||||
return cleaned
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating AI report: {str(e)}")
|
||||
# Re-raise the error - AI is crucial for report generation
|
||||
raise
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -14,7 +14,7 @@ import aiohttp
|
|||
import asyncio
|
||||
|
||||
from modules.workflows.methods.methodBase import MethodBase, action
|
||||
from modules.datamodels.datamodelWorkflow import ActionResult
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -443,13 +443,16 @@ class MethodSharepoint(MethodBase):
|
|||
@action
|
||||
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Find documents/folders by searching their NAMES across SharePoint sites.
|
||||
GENERAL:
|
||||
- Purpose: Find documents and folders by name/path across sites.
|
||||
- Input requirements: connectionReference (required); searchQuery (required); optional site, maxResults.
|
||||
- Output format: JSON with found items and paths.
|
||||
|
||||
Parameters:
|
||||
connectionReference (str): Microsoft connection reference
|
||||
site (str, optional): Site hint (e.g., "SSS", "KM XYZ")
|
||||
searchQuery (str): Search query - "budget", "folders:alpha", "files:budget", "/Documents/Project1", "namepart1 namepart2 namepart3". Use "folders:" prefix when user wants to store files or find folders
|
||||
maxResults (int, optional): Max results (default: 100)
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- site (str, optional): Site hint.
|
||||
- searchQuery (str, required): Search terms or path.
|
||||
- maxResults (int, optional): Maximum items to return. Default: 100.
|
||||
"""
|
||||
try:
|
||||
connectionReference = parameters.get("connectionReference")
|
||||
|
|
@ -799,11 +802,11 @@ class MethodSharepoint(MethodBase):
|
|||
return ActionResult(
|
||||
success=True,
|
||||
documents=[
|
||||
{
|
||||
"documentName": f"sharepoint_find_path_{self._format_timestamp_for_filename()}{output_extension}",
|
||||
"documentData": result_data,
|
||||
"mimeType": output_mime_type
|
||||
}
|
||||
ActionDocument(
|
||||
documentName=f"sharepoint_find_path_{self._format_timestamp_for_filename()}{output_extension}",
|
||||
documentData=json.dumps(result_data, indent=2),
|
||||
mimeType=output_mime_type
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
|
|
@ -814,14 +817,17 @@ class MethodSharepoint(MethodBase):
|
|||
@action
|
||||
async def readDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Read documents from SharePoint across all accessible sites
|
||||
GENERAL:
|
||||
- Purpose: Read documents from SharePoint and extract content/metadata.
|
||||
- Input requirements: connectionReference (required); documentList (required); optional pathObject or pathQuery; includeMetadata.
|
||||
- Output format: JSON with read results per document.
|
||||
|
||||
Parameters:
|
||||
documentList (list): Reference(s) to the document list to read
|
||||
connectionReference (str): Reference to the Microsoft connection
|
||||
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
|
||||
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
|
||||
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
||||
- documentList (list, required): Document list reference(s) to read.
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- pathObject (str, optional): Reference to a previous path result.
|
||||
- pathQuery (str, optional): Path query if no pathObject.
|
||||
- includeMetadata (bool, optional): Include metadata. Default: True.
|
||||
"""
|
||||
try:
|
||||
documentList = parameters.get("documentList")
|
||||
|
|
@ -1073,11 +1079,11 @@ class MethodSharepoint(MethodBase):
|
|||
return ActionResult(
|
||||
success=True,
|
||||
documents=[
|
||||
{
|
||||
"documentName": f"sharepoint_documents_{self._format_timestamp_for_filename()}{output_extension}",
|
||||
"documentData": result_data,
|
||||
"mimeType": output_mime_type
|
||||
}
|
||||
ActionDocument(
|
||||
documentName=f"sharepoint_documents_{self._format_timestamp_for_filename()}{output_extension}",
|
||||
documentData=json.dumps(result_data, indent=2),
|
||||
mimeType=output_mime_type
|
||||
)
|
||||
]
|
||||
)
|
||||
except Exception as e:
|
||||
|
|
@ -1090,14 +1096,17 @@ class MethodSharepoint(MethodBase):
|
|||
@action
|
||||
async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Upload documents to SharePoint across accessible sites
|
||||
GENERAL:
|
||||
- Purpose: Upload documents to SharePoint.
|
||||
- Input requirements: connectionReference (required); documentList (required); fileNames (required); optional pathObject or pathQuery.
|
||||
- Output format: JSON with upload status and file info.
|
||||
|
||||
Parameters:
|
||||
connectionReference (str): Reference to the Microsoft connection
|
||||
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
|
||||
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
|
||||
documentList (list): Reference(s) to the document list to upload
|
||||
fileNames (List[str]): List of names for the uploaded files
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- pathObject (str, optional): Reference to a previous path result.
|
||||
- pathQuery (str, optional): Upload target path if no pathObject.
|
||||
- documentList (list, required): Document reference(s) to upload.
|
||||
- fileNames (list, required): Output file names.
|
||||
"""
|
||||
try:
|
||||
connectionReference = parameters.get("connectionReference")
|
||||
|
|
@ -1424,11 +1433,11 @@ class MethodSharepoint(MethodBase):
|
|||
return ActionResult(
|
||||
success=True,
|
||||
documents=[
|
||||
{
|
||||
"documentName": f"sharepoint_upload_{self._format_timestamp_for_filename()}{output_extension}",
|
||||
"documentData": result_data,
|
||||
"mimeType": output_mime_type
|
||||
}
|
||||
ActionDocument(
|
||||
documentName=f"sharepoint_upload_{self._format_timestamp_for_filename()}{output_extension}",
|
||||
documentData=json.dumps(result_data, indent=2),
|
||||
mimeType=output_mime_type
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
|
|
@ -1442,13 +1451,16 @@ class MethodSharepoint(MethodBase):
|
|||
@action
|
||||
async def listDocuments(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
List documents in SharePoint folders across accessible sites
|
||||
GENERAL:
|
||||
- Purpose: List documents and folders in SharePoint paths across sites.
|
||||
- Input requirements: connectionReference (required); optional pathObject or pathQuery; includeSubfolders.
|
||||
- Output format: JSON with folder items and metadata.
|
||||
|
||||
Parameters:
|
||||
connectionReference (str): Reference to the Microsoft connection
|
||||
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
|
||||
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
|
||||
includeSubfolders (bool, optional): Whether to include subfolders (default: False)
|
||||
- connectionReference (str, required): Microsoft connection label.
|
||||
- pathObject (str, optional): Reference to a previous path result.
|
||||
- pathQuery (str, optional): Path query if no pathObject.
|
||||
- includeSubfolders (bool, optional): Include one level of subfolders. Default: False.
|
||||
"""
|
||||
try:
|
||||
connectionReference = parameters.get("connectionReference")
|
||||
|
|
@ -1817,11 +1829,11 @@ class MethodSharepoint(MethodBase):
|
|||
return ActionResult(
|
||||
success=True,
|
||||
documents=[
|
||||
{
|
||||
"documentName": f"sharepoint_document_list_{self._format_timestamp_for_filename()}{output_extension}",
|
||||
"documentData": result_data,
|
||||
"mimeType": output_mime_type
|
||||
}
|
||||
ActionDocument(
|
||||
documentName=f"sharepoint_document_list_{self._format_timestamp_for_filename()}{output_extension}",
|
||||
documentData=json.dumps(result_data, indent=2),
|
||||
mimeType=output_mime_type
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,437 +0,0 @@
|
|||
import logging
|
||||
import csv
|
||||
import io
|
||||
import json as _json
|
||||
from typing import Any, Dict
|
||||
from modules.workflows.methods.methodBase import MethodBase, action
|
||||
from modules.datamodels.datamodelWorkflow import ActionResult, ActionDocument
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, Priority
|
||||
from modules.datamodels.datamodelWeb import (
|
||||
WebSearchRequest,
|
||||
WebCrawlRequest,
|
||||
WebScrapeRequest,
|
||||
)
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MethodWeb(MethodBase):
|
||||
"""Web method implementation for web operations."""
|
||||
|
||||
def __init__(self, services):
|
||||
super().__init__(services)
|
||||
self.name = "web"
|
||||
self.description = "Web search, crawling, and scraping operations using Tavily"
|
||||
|
||||
@action
|
||||
async def search(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""Perform a web search and output a CSV with the found URLs. Each result row contains columns "url" and "title".
|
||||
|
||||
Parameters:
|
||||
query (str, required): Search query.
|
||||
maxResults (int, optional): Max number of results. Default: 10.
|
||||
searchDepth ("basic"|"advanced", optional): Search depth. Default: provider default.
|
||||
timeRange ("d"|"w"|"m"|"y", optional): Limit to last day/week/month/year.
|
||||
topic ("general"|"news"|"academic", optional): Result domain preference.
|
||||
includeDomains (list[str], optional): Only include these domains.
|
||||
excludeDomains (list[str], optional): Exclude these domains.
|
||||
language (str, optional): ISO code like "de", "en" to bias results.
|
||||
includeAnswer (bool, optional): Ask provider to generate a short answer.
|
||||
includeRawContent (bool, optional): Include raw content where possible.
|
||||
"""
|
||||
|
||||
try:
|
||||
# Prepare request data (generic, no region/language bias)
|
||||
raw_query = parameters.get("query")
|
||||
max_results = parameters.get("maxResults", 10)
|
||||
|
||||
if not raw_query or not isinstance(raw_query, str):
|
||||
return ActionResult(success=False, error="Search query is required")
|
||||
|
||||
web_search_request = WebSearchRequest(
|
||||
query=raw_query.strip(),
|
||||
max_results=max_results,
|
||||
search_depth=parameters.get("searchDepth"),
|
||||
time_range=parameters.get("timeRange"),
|
||||
topic=parameters.get("topic"),
|
||||
include_domains=parameters.get("includeDomains"),
|
||||
exclude_domains=parameters.get("excludeDomains"),
|
||||
language=parameters.get("language"),
|
||||
include_answer=parameters.get("includeAnswer"),
|
||||
include_raw_content=parameters.get("includeRawContent"),
|
||||
)
|
||||
|
||||
# Perform request via centralized service wrappers
|
||||
web_search_result = await self.services.web.webSearch(web_search_request)
|
||||
|
||||
# Convert search results to CSV format (generic)
|
||||
if web_search_result.success and web_search_result.documents:
|
||||
csv_content = self._convert_web_result_to_csv(web_search_result)
|
||||
csv_document = ActionDocument(
|
||||
documentName=f"web_search_results.csv",
|
||||
documentData=csv_content,
|
||||
mimeType="text/csv"
|
||||
)
|
||||
return ActionResult(success=True, documents=[csv_document])
|
||||
else:
|
||||
return web_search_result
|
||||
|
||||
except Exception as e:
|
||||
return ActionResult(success=False, error=str(e))
|
||||
|
||||
|
||||
|
||||
def _read_csv_with_urls(self, csv_content: str) -> list:
|
||||
"""Read CSV content and extract URLs from url,title or title,url format (both ; and , delimiters)"""
|
||||
urls = []
|
||||
|
||||
# Try both semicolon and comma delimiters
|
||||
for delimiter in [';', ',']:
|
||||
try:
|
||||
reader = csv.DictReader(io.StringIO(csv_content), delimiter=delimiter)
|
||||
for row in reader:
|
||||
# Look for url column (case insensitive)
|
||||
url = None
|
||||
for key in row.keys():
|
||||
if key.lower() == 'url':
|
||||
url = row[key].strip()
|
||||
break
|
||||
|
||||
if url and (url.startswith('http://') or url.startswith('https://')):
|
||||
urls.append(url)
|
||||
|
||||
# If we found URLs with this delimiter, return them
|
||||
if urls:
|
||||
return urls
|
||||
|
||||
except Exception:
|
||||
# Try next delimiter
|
||||
continue
|
||||
|
||||
# If no valid CSV found, try simple text parsing as fallback
|
||||
lines = csv_content.split('\n')
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line and (line.startswith('http://') or line.startswith('https://')):
|
||||
urls.append(line)
|
||||
|
||||
return urls
|
||||
|
||||
@action
|
||||
async def crawl(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""Crawl a list of URLs and extract text content.
|
||||
|
||||
Parameters:
|
||||
documentList (list[str]|str, required): Reference(s) to documents containing URLs (e.g., CSV from search). Can be a single ref or list.
|
||||
expectedDocumentFormats (list, optional): Hint for downstream handling.
|
||||
extractDepth ("basic"|"advanced", optional): Extraction depth. Default: "advanced".
|
||||
format ("text"|"markdown", optional): Output format. Default: "text".
|
||||
"""
|
||||
try:
|
||||
document_list = parameters.get("documentList")
|
||||
|
||||
# Normalize to list if a single string reference is provided
|
||||
if isinstance(document_list, str):
|
||||
document_list = [document_list]
|
||||
|
||||
if not document_list:
|
||||
return ActionResult(
|
||||
success=False, error="No document list reference provided."
|
||||
)
|
||||
|
||||
# Resolve document list reference to ChatDocument objects
|
||||
chat_documents = self.services.workflow.getChatDocumentsFromDocumentList(document_list)
|
||||
|
||||
if not chat_documents:
|
||||
return ActionResult(
|
||||
success=False,
|
||||
error=f"No documents found for reference: {document_list}",
|
||||
)
|
||||
|
||||
# Extract URLs from all documents and combine them
|
||||
all_urls = []
|
||||
import json
|
||||
import re
|
||||
|
||||
for i, doc in enumerate(chat_documents):
|
||||
logger.info(f"Processing document {i+1}/{len(chat_documents)}: {doc.fileName}")
|
||||
|
||||
# Get file data using the service center
|
||||
file_data = self.services.workflow.getFileData(doc.fileId)
|
||||
if not file_data:
|
||||
logger.warning(f"Could not retrieve file data for document: {doc.fileName}")
|
||||
continue
|
||||
|
||||
content = file_data.decode("utf-8")
|
||||
|
||||
# Try to parse as CSV first (for new CSV format)
|
||||
if doc.fileName.lower().endswith('.csv') or 'csv' in doc.mimeType.lower():
|
||||
logger.info(f"Processing CSV file: {doc.fileName}")
|
||||
doc_urls = self._read_csv_with_urls(content)
|
||||
else:
|
||||
# Parse JSON to extract URLs from search results
|
||||
try:
|
||||
# The document structure from WebSearchActionResult
|
||||
search_data = json.loads(content)
|
||||
|
||||
# Extract URLs from the search results structure
|
||||
doc_urls = []
|
||||
if isinstance(search_data, dict):
|
||||
# Handle the document structure: documentData contains the actual search results
|
||||
doc_data = search_data.get("documentData", search_data)
|
||||
if "results" in doc_data and isinstance(doc_data["results"], list):
|
||||
doc_urls = [
|
||||
result["url"]
|
||||
for result in doc_data["results"]
|
||||
if isinstance(result, dict) and "url" in result
|
||||
]
|
||||
elif "urls" in doc_data and isinstance(doc_data["urls"], list):
|
||||
# Fallback: if URLs are stored directly in a 'urls' field
|
||||
doc_urls = [url for url in doc_data["urls"] if isinstance(url, str)]
|
||||
|
||||
# Fallback: try to parse as plain text with regex (for backward compatibility)
|
||||
if not doc_urls:
|
||||
logger.warning(
|
||||
f"Could not extract URLs from JSON structure in {doc.fileName}, trying plain text parsing"
|
||||
)
|
||||
doc_urls = re.split(r"[\n,;]+", content)
|
||||
doc_urls = [
|
||||
u.strip()
|
||||
for u in doc_urls
|
||||
if u.strip()
|
||||
and (
|
||||
u.strip().startswith("http://")
|
||||
or u.strip().startswith("https://")
|
||||
)
|
||||
]
|
||||
|
||||
except json.JSONDecodeError:
|
||||
# Fallback to plain text parsing if JSON parsing fails
|
||||
logger.warning(f"Document {doc.fileName} is not valid JSON, trying plain text parsing")
|
||||
doc_urls = re.split(r"[\n,;]+", content)
|
||||
doc_urls = [
|
||||
u.strip()
|
||||
for u in doc_urls
|
||||
if u.strip()
|
||||
and (
|
||||
u.strip().startswith("http://")
|
||||
or u.strip().startswith("https://")
|
||||
)
|
||||
]
|
||||
|
||||
if doc_urls:
|
||||
all_urls.extend(doc_urls)
|
||||
logger.info(f"Extracted {len(doc_urls)} URLs from {doc.fileName}")
|
||||
else:
|
||||
logger.warning(f"No valid URLs found in document: {doc.fileName}")
|
||||
|
||||
if not all_urls:
|
||||
return ActionResult(
|
||||
success=False, error="No valid URLs found in any of the documents."
|
||||
)
|
||||
|
||||
# Remove duplicates while preserving order
|
||||
unique_urls = list(dict.fromkeys(all_urls))
|
||||
logger.info(f"Extracted {len(unique_urls)} unique URLs from {len(chat_documents)} documents")
|
||||
|
||||
# Prepare request data with normalization
|
||||
allowed_extract_depth = {"basic", "advanced"}
|
||||
allowed_formats = {"text", "markdown"}
|
||||
extract_depth = parameters.get("extractDepth")
|
||||
if extract_depth and extract_depth not in allowed_extract_depth:
|
||||
logger.warning(f"Invalid extractDepth '{extract_depth}' provided. Falling back to 'advanced'.")
|
||||
extract_depth = "advanced"
|
||||
fmt = parameters.get("format")
|
||||
if fmt and fmt not in allowed_formats:
|
||||
logger.warning(f"Invalid format '{fmt}' provided. Falling back to 'text'.")
|
||||
fmt = "text"
|
||||
|
||||
web_crawl_request = WebCrawlRequest(
|
||||
urls=unique_urls,
|
||||
extract_depth=extract_depth,
|
||||
format=fmt,
|
||||
)
|
||||
|
||||
# Perform request via centralized service wrappers
|
||||
web_crawl_result = await self.services.web.webCrawl(web_crawl_request)
|
||||
|
||||
# Convert and enrich with concise summaries per URL for better context
|
||||
if web_crawl_result.success:
|
||||
try:
|
||||
doc = web_crawl_result.documents[0]
|
||||
results = getattr(doc.documentData, "results", [])
|
||||
enriched = []
|
||||
# Summarize each result briefly using AI for added context
|
||||
for item in results:
|
||||
url = str(getattr(item, "url", ""))
|
||||
content = str(getattr(item, "content", ""))
|
||||
summary = ""
|
||||
try:
|
||||
if content:
|
||||
prompt = (
|
||||
"Summarize the following webpage content in 3-5 concise bullet points. "
|
||||
"Focus on key points, figures, named entities (companies/institutions), and location context. "
|
||||
"Return only bullet points without any preface."
|
||||
)
|
||||
context = content[:4000]
|
||||
# Centralized AI summary (balanced analyse_content)
|
||||
summary = await self.services.ai.callAi(
|
||||
prompt=prompt,
|
||||
documents=None,
|
||||
options=AiCallOptions(
|
||||
operationType=OperationType.ANALYSE_CONTENT,
|
||||
priority=Priority.BALANCED,
|
||||
compressPrompt=True,
|
||||
compressContext=False,
|
||||
processingMode="advanced",
|
||||
maxCost=0.05,
|
||||
maxProcessingTime=30
|
||||
)
|
||||
)
|
||||
summary = summary.strip()
|
||||
except Exception:
|
||||
summary = ""
|
||||
enriched.append({
|
||||
"url": url,
|
||||
"summary": summary,
|
||||
"snippet": content[:500]
|
||||
})
|
||||
|
||||
import json as _json
|
||||
payload = {
|
||||
"success": True,
|
||||
"total_count": len(enriched),
|
||||
"results": enriched,
|
||||
}
|
||||
json_content = _json.dumps(payload, ensure_ascii=False, indent=2)
|
||||
except Exception:
|
||||
# Fallback to original conversion
|
||||
json_content = self._convert_web_result_to_json(web_crawl_result)
|
||||
|
||||
json_document = ActionDocument(
|
||||
documentName=f"web_crawl_results.json",
|
||||
documentData=json_content,
|
||||
mimeType="application/json"
|
||||
)
|
||||
return ActionResult(success=True, documents=[json_document])
|
||||
else:
|
||||
return web_crawl_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in crawl method: {str(e)}")
|
||||
return ActionResult(success=False, error=str(e))
|
||||
|
||||
@action
|
||||
async def scrape(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""Search and then crawl the found URLs in one step. To use for market analysis, web research, internet searches
|
||||
|
||||
Parameters:
|
||||
query (str, required): Search query.
|
||||
maxResults (int, optional): Max number of results. Default: 10.
|
||||
searchDepth ("basic"|"advanced", optional): Search depth.
|
||||
timeRange ("d"|"w"|"m"|"y", optional): Time window.
|
||||
topic ("general"|"news"|"academic", optional): Result domain preference.
|
||||
includeDomains (list[str], optional): Only include these domains.
|
||||
excludeDomains (list[str], optional): Exclude these domains.
|
||||
language (str, optional): ISO language bias.
|
||||
includeAnswer (bool, optional): Ask provider to include an answer.
|
||||
includeRawContent (bool, optional): Include raw content where possible.
|
||||
extractDepth ("basic"|"advanced", optional): Crawl extraction depth. Default: "advanced".
|
||||
format ("text"|"markdown", optional): Crawl output format. Default: "text".
|
||||
"""
|
||||
try:
|
||||
query = parameters.get("query")
|
||||
max_results = parameters.get("maxResults", 10)
|
||||
# Normalize optional enums to avoid validation errors
|
||||
allowed_search_depth = {"basic", "advanced"}
|
||||
allowed_extract_depth = {"basic", "advanced"}
|
||||
allowed_formats = {"text", "markdown"}
|
||||
|
||||
search_depth = parameters.get("searchDepth")
|
||||
if search_depth and search_depth not in allowed_search_depth:
|
||||
logger.warning(f"Invalid searchDepth '{search_depth}' provided. Falling back to None.")
|
||||
search_depth = None
|
||||
|
||||
extract_depth = parameters.get("extractDepth")
|
||||
if extract_depth and extract_depth not in allowed_extract_depth:
|
||||
logger.warning(f"Invalid extractDepth '{extract_depth}' provided. Falling back to 'advanced'.")
|
||||
extract_depth = "advanced"
|
||||
|
||||
fmt = parameters.get("format")
|
||||
if fmt and fmt not in allowed_formats:
|
||||
logger.warning(f"Invalid format '{fmt}' provided. Falling back to 'text'.")
|
||||
fmt = "text"
|
||||
|
||||
if not query:
|
||||
return ActionResult(success=False, error="Search query is required")
|
||||
|
||||
# Prepare request data
|
||||
web_scrape_request = WebScrapeRequest(
|
||||
query=query,
|
||||
max_results=max_results,
|
||||
search_depth=search_depth,
|
||||
time_range=parameters.get("timeRange"),
|
||||
topic=parameters.get("topic"),
|
||||
include_domains=parameters.get("includeDomains"),
|
||||
exclude_domains=parameters.get("excludeDomains"),
|
||||
language=parameters.get("language"),
|
||||
include_answer=parameters.get("includeAnswer"),
|
||||
include_raw_content=parameters.get("includeRawContent"),
|
||||
extract_depth=extract_depth,
|
||||
format=fmt,
|
||||
)
|
||||
|
||||
# Perform request via centralized service wrappers
|
||||
web_scrape_result = await self.services.web.webScrape(web_scrape_request)
|
||||
|
||||
# Convert to proper JSON format
|
||||
if web_scrape_result.success:
|
||||
json_content = self._convert_web_result_to_json(web_scrape_result)
|
||||
json_document = ActionDocument(
|
||||
documentName=f"web_scrape_results.json",
|
||||
documentData=json_content,
|
||||
mimeType="application/json"
|
||||
)
|
||||
return ActionResult(
|
||||
success=True,
|
||||
documents=[json_document]
|
||||
)
|
||||
else:
|
||||
return web_scrape_result
|
||||
|
||||
except Exception as e:
|
||||
return ActionResult(success=False, error=str(e))
|
||||
|
||||
# Helpers
|
||||
def _convert_web_result_to_json(self, web_result):
|
||||
if not getattr(web_result, 'success', False) or not getattr(web_result, 'documents', None):
|
||||
return _json.dumps({"success": getattr(web_result, 'success', False), "error": getattr(web_result, 'error', None)})
|
||||
document_data = web_result.documents[0].documentData
|
||||
result_dict = {
|
||||
"success": True,
|
||||
"results": [
|
||||
{
|
||||
"url": str(getattr(result, 'url', "")),
|
||||
"content": getattr(result, 'content', "")
|
||||
}
|
||||
for result in getattr(document_data, 'results', [])
|
||||
],
|
||||
"total_count": getattr(document_data, 'total_count', 0)
|
||||
}
|
||||
if hasattr(document_data, 'urls'):
|
||||
result_dict["urls"] = [str(url) for url in getattr(document_data, 'urls', [])]
|
||||
elif hasattr(document_data, 'query'):
|
||||
result_dict["query"] = getattr(document_data, 'query', None)
|
||||
return _json.dumps(result_dict, indent=2, ensure_ascii=False)
|
||||
|
||||
def _convert_web_result_to_csv(self, web_search_result):
|
||||
if not getattr(web_search_result, 'success', False) or not getattr(web_search_result, 'documents', None):
|
||||
return ""
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output, delimiter=';')
|
||||
writer.writerow(['url', 'title'])
|
||||
document_data = web_search_result.documents[0].documentData
|
||||
for result in getattr(document_data, 'results', []):
|
||||
writer.writerow([str(getattr(result, 'url', "")), getattr(result, 'title', "")])
|
||||
return output.getvalue()
|
||||
9
modules/workflows/processing/adaptive/__init__.py
Normal file
9
modules/workflows/processing/adaptive/__init__.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
# adaptive module for React mode
|
||||
# Provides adaptive learning capabilities
|
||||
|
||||
from .intentAnalyzer import IntentAnalyzer, DataType, ExpectedFormat
|
||||
from .contentValidator import ContentValidator
|
||||
from .learningEngine import LearningEngine
|
||||
from .progressTracker import ProgressTracker
|
||||
|
||||
__all__ = ['IntentAnalyzer', 'ContentValidator', 'LearningEngine', 'ProgressTracker', 'DataType', 'ExpectedFormat']
|
||||
308
modules/workflows/processing/adaptive/contentValidator.py
Normal file
308
modules/workflows/processing/adaptive/contentValidator.py
Normal file
|
|
@ -0,0 +1,308 @@
|
|||
# contentValidator.py
|
||||
# Content validation for adaptive React mode
|
||||
|
||||
import re
|
||||
import logging
|
||||
from typing import List, Dict, Any
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ContentValidator:
|
||||
"""Validates delivered content against user intent"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validates delivered content against user intent"""
|
||||
try:
|
||||
validationDetails = []
|
||||
|
||||
for doc in documents:
|
||||
content = self._extractContent(doc)
|
||||
detail = self._validateSingleDocument(content, doc, intent)
|
||||
validationDetails.append(detail)
|
||||
|
||||
# Calculate overall success
|
||||
overallSuccess = all(detail.get("successCriteriaMet", [False]) for detail in validationDetails)
|
||||
|
||||
# Calculate quality score
|
||||
qualityScore = self._calculateQualityScore(validationDetails)
|
||||
|
||||
# Generate improvement suggestions
|
||||
improvementSuggestions = self._generateImprovementSuggestions(validationDetails, intent)
|
||||
|
||||
return {
|
||||
"overallSuccess": overallSuccess,
|
||||
"qualityScore": qualityScore,
|
||||
"validationDetails": validationDetails,
|
||||
"improvementSuggestions": improvementSuggestions
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating content: {str(e)}")
|
||||
return self._createFailedValidationResult(str(e))
|
||||
|
||||
def _extractContent(self, doc: Any) -> str:
|
||||
"""Extracts content from a document"""
|
||||
try:
|
||||
if hasattr(doc, 'documentData'):
|
||||
data = doc.documentData
|
||||
if isinstance(data, dict) and 'content' in data:
|
||||
return str(data['content'])
|
||||
else:
|
||||
return str(data)
|
||||
return ""
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def _validateSingleDocument(self, content: str, doc: Any, intent: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Validates a single document against intent"""
|
||||
# Check data type match
|
||||
dataTypeMatch = self._checkDataTypeMatch(content, intent.get("dataType", "unknown"))
|
||||
|
||||
# Check format match
|
||||
formatMatch = self._checkFormatMatch(content, intent.get("expectedFormat", "unknown"))
|
||||
|
||||
# Calculate quality score
|
||||
qualityScore = self._calculateDocumentQualityScore(content, intent)
|
||||
|
||||
# Check success criteria
|
||||
successCriteriaMet = self._checkSuccessCriteria(content, intent)
|
||||
|
||||
# Identify specific issues
|
||||
specificIssues = self._identifySpecificIssues(content, intent)
|
||||
|
||||
# Generate improvement suggestions
|
||||
improvementSuggestions = self._generateDocumentImprovementSuggestions(content, intent)
|
||||
|
||||
return {
|
||||
"documentName": getattr(doc, 'documentName', 'Unknown'),
|
||||
"dataTypeMatch": dataTypeMatch,
|
||||
"formatMatch": formatMatch,
|
||||
"qualityScore": qualityScore,
|
||||
"successCriteriaMet": successCriteriaMet,
|
||||
"specificIssues": specificIssues,
|
||||
"improvementSuggestions": improvementSuggestions
|
||||
}
|
||||
|
||||
def _checkDataTypeMatch(self, content: str, dataType: str) -> bool:
|
||||
"""Checks if content matches the expected data type"""
|
||||
if dataType == "numbers":
|
||||
return self._containsNumbers(content)
|
||||
elif dataType == "text":
|
||||
return self._containsText(content)
|
||||
elif dataType == "documents":
|
||||
return self._containsDocumentContent(content)
|
||||
elif dataType == "analysis":
|
||||
return self._containsAnalysis(content)
|
||||
elif dataType == "code":
|
||||
return self._containsCode(content)
|
||||
else:
|
||||
return True # Unknown type, assume match
|
||||
|
||||
def _containsNumbers(self, content: str) -> bool:
|
||||
"""Checks if content contains actual numbers (not code)"""
|
||||
# Look for actual numbers in the content
|
||||
numbers = re.findall(r'\b\d+\b', content)
|
||||
|
||||
# Check if it's code (contains function definitions, etc.)
|
||||
isCode = any(keyword in content.lower() for keyword in [
|
||||
'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
|
||||
'return', 'print(', 'console.log', 'public ', 'private '
|
||||
])
|
||||
|
||||
# If it's code, it doesn't contain actual numbers
|
||||
if isCode:
|
||||
return False
|
||||
|
||||
# If it has numbers and it's not code, it contains actual numbers
|
||||
return len(numbers) > 0
|
||||
|
||||
def _containsText(self, content: str) -> bool:
|
||||
"""Checks if content contains readable text"""
|
||||
# Remove numbers and special characters
|
||||
textContent = re.sub(r'[^\w\s]', '', content)
|
||||
words = textContent.split()
|
||||
|
||||
# Check if there are enough words to be considered text
|
||||
return len(words) > 5
|
||||
|
||||
def _containsDocumentContent(self, content: str) -> bool:
|
||||
"""Checks if content is suitable for document creation"""
|
||||
# Check for structured content
|
||||
hasStructure = any(indicator in content for indicator in [
|
||||
'\n', '\t', '|', '-', '*', '1.', '2.', '•', '◦'
|
||||
])
|
||||
|
||||
# Check for meaningful content
|
||||
hasMeaningfulContent = len(content.strip()) > 50
|
||||
|
||||
return hasStructure and hasMeaningfulContent
|
||||
|
||||
def _containsAnalysis(self, content: str) -> bool:
|
||||
"""Checks if content contains analysis"""
|
||||
analysisIndicators = [
|
||||
'analysis', 'findings', 'conclusion', 'summary', 'insights',
|
||||
'trends', 'patterns', 'comparison', 'evaluation', 'assessment'
|
||||
]
|
||||
|
||||
contentLower = content.lower()
|
||||
return any(indicator in contentLower for indicator in analysisIndicators)
|
||||
|
||||
def _containsCode(self, content: str) -> bool:
|
||||
"""Checks if content contains code"""
|
||||
codeIndicators = [
|
||||
'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
|
||||
'return', 'print(', 'console.log', 'public ', 'private ', 'void ',
|
||||
'int ', 'string ', 'var ', 'let ', 'const '
|
||||
]
|
||||
|
||||
contentLower = content.lower()
|
||||
return any(indicator in contentLower for indicator in codeIndicators)
|
||||
|
||||
def _checkFormatMatch(self, content: str, expectedFormat: str) -> bool:
|
||||
"""Checks if content matches expected format"""
|
||||
if expectedFormat == "raw_data":
|
||||
# Raw data should be simple, not heavily formatted
|
||||
return not any(indicator in content for indicator in [
|
||||
'<html>', '<div>', '<table>', '## ', '### ', '**', '__'
|
||||
])
|
||||
elif expectedFormat == "formatted":
|
||||
# Formatted content should have structure
|
||||
return any(indicator in content for indicator in [
|
||||
'\n', '\t', '|', '-', '*', '1.', '2.', '•'
|
||||
])
|
||||
elif expectedFormat == "structured":
|
||||
# Structured content should have clear organization
|
||||
return any(indicator in content for indicator in [
|
||||
'{', '}', '[', ']', '|', '\t', ' '
|
||||
])
|
||||
else:
|
||||
return True # Unknown format, assume match
|
||||
|
||||
def _checkSuccessCriteria(self, content: str, intent: Dict[str, Any]) -> List[bool]:
|
||||
"""Checks if content meets success criteria"""
|
||||
criteriaMet = []
|
||||
successCriteria = intent.get("successCriteria", [])
|
||||
|
||||
for criterion in successCriteria:
|
||||
if 'prime numbers' in criterion.lower():
|
||||
# Check if content contains actual prime numbers, not code
|
||||
hasNumbers = bool(re.search(r'\b\d+\b', content))
|
||||
isNotCode = not any(keyword in content.lower() for keyword in [
|
||||
'def ', 'function', 'import ', 'class '
|
||||
])
|
||||
criteriaMet.append(hasNumbers and isNotCode)
|
||||
elif 'document' in criterion.lower():
|
||||
# Check if content is suitable for document creation
|
||||
hasStructure = any(indicator in content for indicator in [
|
||||
'\n', '\t', '|', '-', '*', '1.', '2.'
|
||||
])
|
||||
criteriaMet.append(hasStructure)
|
||||
elif 'format' in criterion.lower():
|
||||
# Check if content is properly formatted
|
||||
hasFormatting = any(indicator in content for indicator in [
|
||||
'\n', '\t', '|', '-', '*', '1.', '2.', '•'
|
||||
])
|
||||
criteriaMet.append(hasFormatting)
|
||||
else:
|
||||
# Generic check - content should not be empty
|
||||
criteriaMet.append(len(content.strip()) > 0)
|
||||
|
||||
return criteriaMet
|
||||
|
||||
def _calculateDocumentQualityScore(self, content: str, intent: Dict[str, Any]) -> float:
|
||||
"""Calculates quality score for a single document"""
|
||||
score = 0.0
|
||||
|
||||
# Base score for having content
|
||||
if len(content.strip()) > 0:
|
||||
score += 0.2
|
||||
|
||||
# Score for data type match
|
||||
if self._checkDataTypeMatch(content, intent.get("dataType", "unknown")):
|
||||
score += 0.3
|
||||
|
||||
# Score for format match
|
||||
if self._checkFormatMatch(content, intent.get("expectedFormat", "unknown")):
|
||||
score += 0.2
|
||||
|
||||
# Score for success criteria
|
||||
successCriteriaMet = self._checkSuccessCriteria(content, intent)
|
||||
if successCriteriaMet:
|
||||
successRate = sum(successCriteriaMet) / len(successCriteriaMet)
|
||||
score += 0.3 * successRate
|
||||
|
||||
return min(score, 1.0)
|
||||
|
||||
def _calculateQualityScore(self, validationDetails: List[Dict[str, Any]]) -> float:
|
||||
"""Calculates overall quality score from validation details"""
|
||||
if not validationDetails:
|
||||
return 0.0
|
||||
|
||||
totalScore = sum(detail.get("qualityScore", 0) for detail in validationDetails)
|
||||
return totalScore / len(validationDetails)
|
||||
|
||||
def _identifySpecificIssues(self, content: str, intent: Dict[str, Any]) -> List[str]:
|
||||
"""Identifies specific issues with the content"""
|
||||
issues = []
|
||||
|
||||
# Check for common issues
|
||||
if intent.get("dataType") == "numbers" and self._containsCode(content):
|
||||
issues.append("Content contains code instead of actual numbers")
|
||||
|
||||
if intent.get("expectedFormat") == "raw_data" and any(indicator in content for indicator in ['<html>', '## ', '**']):
|
||||
issues.append("Content is formatted when raw data was requested")
|
||||
|
||||
if len(content.strip()) == 0:
|
||||
issues.append("Content is empty")
|
||||
|
||||
return issues
|
||||
|
||||
def _generateDocumentImprovementSuggestions(self, content: str, intent: Dict[str, Any]) -> List[str]:
|
||||
"""Generates improvement suggestions for a single document"""
|
||||
suggestions = []
|
||||
|
||||
dataType = intent.get("dataType", "unknown")
|
||||
expectedFormat = intent.get("expectedFormat", "unknown")
|
||||
|
||||
if dataType == "numbers" and self._containsCode(content):
|
||||
suggestions.append("Deliver actual numbers, not code to generate them")
|
||||
|
||||
if expectedFormat == "raw_data" and any(indicator in content for indicator in ['<html>', '## ']):
|
||||
suggestions.append("Provide raw data without formatting")
|
||||
|
||||
if len(content.strip()) == 0:
|
||||
suggestions.append("Provide actual content")
|
||||
|
||||
return suggestions
|
||||
|
||||
def _generateImprovementSuggestions(self, validationDetails: List[Dict[str, Any]],
|
||||
intent: Dict[str, Any]) -> List[str]:
|
||||
"""Generates improvement suggestions based on validation results"""
|
||||
suggestions = []
|
||||
|
||||
# Check for common issues
|
||||
if not any(detail.get("dataTypeMatch", False) for detail in validationDetails):
|
||||
dataType = intent.get("dataType", "unknown")
|
||||
suggestions.append(f"Content should contain {dataType} data, not code or other formats")
|
||||
|
||||
if not any(detail.get("formatMatch", False) for detail in validationDetails):
|
||||
expectedFormat = intent.get("expectedFormat", "unknown")
|
||||
suggestions.append(f"Content should be in {expectedFormat} format")
|
||||
|
||||
# Add specific suggestions from validation details
|
||||
for detail in validationDetails:
|
||||
suggestions.extend(detail.get("improvementSuggestions", []))
|
||||
|
||||
return list(set(suggestions)) # Remove duplicates
|
||||
|
||||
def _createFailedValidationResult(self, error: str) -> Dict[str, Any]:
|
||||
"""Creates a failed validation result"""
|
||||
return {
|
||||
"overallSuccess": False,
|
||||
"qualityScore": 0.0,
|
||||
"validationDetails": [],
|
||||
"improvementSuggestions": [f"Validation failed: {error}"]
|
||||
}
|
||||
239
modules/workflows/processing/adaptive/intentAnalyzer.py
Normal file
239
modules/workflows/processing/adaptive/intentAnalyzer.py
Normal file
|
|
@ -0,0 +1,239 @@
|
|||
# intentAnalyzer.py
|
||||
# Intent analysis for adaptive React mode
|
||||
|
||||
import re
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
from enum import Enum
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DataType(Enum):
|
||||
NUMBERS = "numbers"
|
||||
TEXT = "text"
|
||||
DOCUMENTS = "documents"
|
||||
ANALYSIS = "analysis"
|
||||
CODE = "code"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
class ExpectedFormat(Enum):
|
||||
RAW_DATA = "raw_data"
|
||||
FORMATTED = "formatted"
|
||||
STRUCTURED = "structured"
|
||||
VISUAL = "visual"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
class IntentAnalyzer:
|
||||
"""Analyzes user intent to understand what they actually want"""
|
||||
|
||||
def __init__(self):
|
||||
self.dataTypePatterns = {
|
||||
DataType.NUMBERS: [
|
||||
r'\b(numbers?|digits?|count|list|sequence)\b',
|
||||
r'\b(prime|fibonacci|random|even|odd)\s+(numbers?)\b',
|
||||
r'\b(calculate|compute|generate)\s+(numbers?)\b',
|
||||
r'\b(first|last)\s+\d+\s+(numbers?)\b'
|
||||
],
|
||||
DataType.TEXT: [
|
||||
r'\b(text|content|words?|sentences?|paragraphs?)\b',
|
||||
r'\b(write|create|generate)\s+(text|content)\b',
|
||||
r'\b(summary|description|explanation)\b',
|
||||
r'\b(article|essay|report)\b'
|
||||
],
|
||||
DataType.DOCUMENTS: [
|
||||
r'\b(document|file|report|pdf|word|excel)\b',
|
||||
r'\b(create|generate|make)\s+(document|file|report)\b',
|
||||
r'\b(format|structure|organize)\s+(document)\b',
|
||||
r'\b(presentation|slides?)\b'
|
||||
],
|
||||
DataType.ANALYSIS: [
|
||||
r'\b(analyze|analysis|examine|study|evaluate)\b',
|
||||
r'\b(insights?|findings?|results?)\b',
|
||||
r'\b(compare|contrast|evaluate)\b',
|
||||
r'\b(trends?|patterns?)\b'
|
||||
],
|
||||
DataType.CODE: [
|
||||
r'\b(code|program|script|algorithm|function)\b',
|
||||
r'\b(write|create|develop)\s+(code|program|script)\b',
|
||||
r'\b(implement|build|construct)\b',
|
||||
r'\b(debug|fix|optimize)\s+(code)\b'
|
||||
]
|
||||
}
|
||||
|
||||
self.formatPatterns = {
|
||||
ExpectedFormat.RAW_DATA: [
|
||||
r'\b(raw|plain|simple|basic)\b',
|
||||
r'\b(numbers?|data|list)\b(?!\s+(in|as|with))',
|
||||
r'\b(just|only)\s+(numbers?|data)\b'
|
||||
],
|
||||
ExpectedFormat.FORMATTED: [
|
||||
r'\b(formatted|structured|organized|presented)\b',
|
||||
r'\b(table|chart|graph|visual)\b',
|
||||
r'\b(pretty|nice|clean)\s+(format|presentation)\b',
|
||||
r'\b(professional|polished)\b'
|
||||
],
|
||||
ExpectedFormat.STRUCTURED: [
|
||||
r'\b(json|xml|csv|structured)\b',
|
||||
r'\b(organized|categorized|grouped)\b',
|
||||
r'\b(systematic|methodical)\b',
|
||||
r'\b(database|spreadsheet)\b'
|
||||
]
|
||||
}
|
||||
|
||||
def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]:
|
||||
"""Analyzes user intent from prompt and context"""
|
||||
try:
|
||||
# Extract primary goal
|
||||
primaryGoal = self._extractPrimaryGoal(userPrompt)
|
||||
|
||||
# Classify data type
|
||||
dataType = self._classifyDataType(userPrompt)
|
||||
|
||||
# Determine expected format
|
||||
expectedFormat = self._determineExpectedFormat(userPrompt)
|
||||
|
||||
# Assess quality requirements
|
||||
qualityRequirements = self._assessQualityRequirements(userPrompt, context)
|
||||
|
||||
# Extract success criteria
|
||||
successCriteria = self._extractSuccessCriteria(userPrompt, context)
|
||||
|
||||
# Calculate confidence score
|
||||
confidenceScore = self._calculateConfidenceScore(dataType, expectedFormat, successCriteria)
|
||||
|
||||
return {
|
||||
"primaryGoal": primaryGoal,
|
||||
"dataType": dataType.value,
|
||||
"expectedFormat": expectedFormat.value,
|
||||
"qualityRequirements": qualityRequirements,
|
||||
"successCriteria": successCriteria,
|
||||
"confidenceScore": confidenceScore
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing user intent: {str(e)}")
|
||||
return self._createDefaultIntentAnalysis(userPrompt)
|
||||
|
||||
def _extractPrimaryGoal(self, userPrompt: str) -> str:
|
||||
"""Extracts the primary goal from user prompt"""
|
||||
# Simple extraction - can be enhanced
|
||||
return userPrompt.strip()
|
||||
|
||||
def _classifyDataType(self, userPrompt: str) -> DataType:
|
||||
"""Classifies the type of data the user wants"""
|
||||
promptLower = userPrompt.lower()
|
||||
|
||||
for dataType, patterns in self.dataTypePatterns.items():
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, promptLower):
|
||||
return dataType
|
||||
|
||||
return DataType.UNKNOWN
|
||||
|
||||
def _determineExpectedFormat(self, userPrompt: str) -> ExpectedFormat:
|
||||
"""Determines the expected format of the output"""
|
||||
promptLower = userPrompt.lower()
|
||||
|
||||
for formatType, patterns in self.formatPatterns.items():
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, promptLower):
|
||||
return formatType
|
||||
|
||||
return ExpectedFormat.UNKNOWN
|
||||
|
||||
def _assessQualityRequirements(self, userPrompt: str, context: Any) -> Dict[str, Any]:
|
||||
"""Assesses quality requirements from prompt and context"""
|
||||
promptLower = userPrompt.lower()
|
||||
|
||||
# Check for accuracy requirements
|
||||
accuracyThreshold = 0.8
|
||||
if any(word in promptLower for word in ['exact', 'precise', 'accurate', 'correct']):
|
||||
accuracyThreshold = 0.95
|
||||
elif any(word in promptLower for word in ['approximate', 'rough', 'estimate']):
|
||||
accuracyThreshold = 0.7
|
||||
|
||||
# Check for completeness requirements
|
||||
completenessThreshold = 0.8
|
||||
if any(word in promptLower for word in ['complete', 'full', 'comprehensive', 'all']):
|
||||
completenessThreshold = 0.95
|
||||
elif any(word in promptLower for word in ['summary', 'brief', 'overview']):
|
||||
completenessThreshold = 0.6
|
||||
|
||||
# Check for format requirements
|
||||
formatRequirement = "any"
|
||||
if any(word in promptLower for word in ['formatted', 'structured', 'organized']):
|
||||
formatRequirement = "formatted"
|
||||
elif any(word in promptLower for word in ['raw', 'plain', 'simple']):
|
||||
formatRequirement = "raw"
|
||||
|
||||
return {
|
||||
"accuracyThreshold": accuracyThreshold,
|
||||
"completenessThreshold": completenessThreshold,
|
||||
"formatRequirement": formatRequirement
|
||||
}
|
||||
|
||||
def _extractSuccessCriteria(self, userPrompt: str, context: Any) -> List[str]:
|
||||
"""Extracts success criteria from prompt and context"""
|
||||
criteria = []
|
||||
promptLower = userPrompt.lower()
|
||||
|
||||
# Extract explicit criteria
|
||||
if 'first' in promptLower and 'numbers' in promptLower:
|
||||
criteria.append("Contains the first N numbers as requested")
|
||||
|
||||
if 'prime' in promptLower:
|
||||
criteria.append("Contains actual prime numbers, not code to generate them")
|
||||
|
||||
if 'document' in promptLower:
|
||||
criteria.append("Creates a properly formatted document")
|
||||
|
||||
if 'format' in promptLower:
|
||||
criteria.append("Content is properly formatted as requested")
|
||||
|
||||
# Add context-based criteria
|
||||
if hasattr(context, 'task_step') and context.task_step:
|
||||
taskObjective = context.task_step.objective.lower()
|
||||
if 'word' in taskObjective:
|
||||
criteria.append("Creates a Word document")
|
||||
if 'excel' in taskObjective:
|
||||
criteria.append("Creates an Excel spreadsheet")
|
||||
|
||||
return criteria if criteria else ["Delivers what the user requested"]
|
||||
|
||||
def _calculateConfidenceScore(self, dataType: DataType, expectedFormat: ExpectedFormat,
|
||||
successCriteria: List[str]) -> float:
|
||||
"""Calculates confidence score for the intent analysis"""
|
||||
score = 0.0
|
||||
|
||||
# Data type confidence
|
||||
if dataType != DataType.UNKNOWN:
|
||||
score += 0.3
|
||||
|
||||
# Format confidence
|
||||
if expectedFormat != ExpectedFormat.UNKNOWN:
|
||||
score += 0.2
|
||||
|
||||
# Success criteria confidence
|
||||
if len(successCriteria) > 0:
|
||||
score += 0.3
|
||||
|
||||
# Additional confidence for specific patterns
|
||||
if len(successCriteria) > 1:
|
||||
score += 0.2
|
||||
|
||||
return min(score, 1.0)
|
||||
|
||||
def _createDefaultIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]:
|
||||
"""Creates a default intent analysis when analysis fails"""
|
||||
return {
|
||||
"primaryGoal": userPrompt,
|
||||
"dataType": "unknown",
|
||||
"expectedFormat": "unknown",
|
||||
"qualityRequirements": {
|
||||
"accuracyThreshold": 0.8,
|
||||
"completenessThreshold": 0.8,
|
||||
"formatRequirement": "any"
|
||||
},
|
||||
"successCriteria": ["Delivers what the user requested"],
|
||||
"confidenceScore": 0.1
|
||||
}
|
||||
166
modules/workflows/processing/adaptive/learningEngine.py
Normal file
166
modules/workflows/processing/adaptive/learningEngine.py
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
# learningEngine.py
|
||||
# Learning engine for adaptive React mode
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime, timezone
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class LearningEngine:
|
||||
"""Learns from feedback and adapts future behavior"""
|
||||
|
||||
def __init__(self):
|
||||
self.strategies = {}
|
||||
self.feedbackHistory = []
|
||||
|
||||
def learnFromFeedback(self, feedback: Dict[str, Any], context: Any, intent: Dict[str, Any]):
|
||||
"""Learns from feedback and updates strategies"""
|
||||
try:
|
||||
# Store feedback
|
||||
self.feedbackHistory.append({
|
||||
"feedback": feedback,
|
||||
"context": self._serializeContext(context),
|
||||
"intent": intent,
|
||||
"timestamp": datetime.now(timezone.utc).timestamp()
|
||||
})
|
||||
|
||||
# Update strategies based on feedback
|
||||
self._updateStrategies(feedback, intent)
|
||||
|
||||
logger.info(f"Learning from feedback: {feedback.get('actionAttempted', 'unknown')} - "
|
||||
f"Quality: {feedback.get('qualityScore', 0):.2f}, Intent Match: {feedback.get('intentMatchScore', 0):.2f}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error learning from feedback: {str(e)}")
|
||||
|
||||
def getImprovedStrategy(self, context: Any, intent: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Returns improved strategy based on learning"""
|
||||
try:
|
||||
# Get strategy key based on intent
|
||||
strategyKey = self._getStrategyKey(intent)
|
||||
|
||||
# Get existing strategy or create default
|
||||
if strategyKey in self.strategies:
|
||||
strategy = self.strategies[strategyKey]
|
||||
logger.info(f"Using learned strategy for {strategyKey}: {strategy}")
|
||||
return strategy
|
||||
else:
|
||||
# Create default strategy
|
||||
defaultStrategy = self._createDefaultStrategy(intent)
|
||||
self.strategies[strategyKey] = defaultStrategy
|
||||
logger.info(f"Created default strategy for {strategyKey}")
|
||||
return defaultStrategy
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting improved strategy: {str(e)}")
|
||||
return self._createDefaultStrategy(intent)
|
||||
|
||||
def _updateStrategies(self, feedback: Dict[str, Any], intent: Dict[str, Any]):
|
||||
"""Updates strategies based on feedback"""
|
||||
strategyKey = self._getStrategyKey(intent)
|
||||
actionAttempted = feedback.get('actionAttempted', 'unknown')
|
||||
qualityScore = feedback.get('qualityScore', 0)
|
||||
intentMatchScore = feedback.get('intentMatchScore', 0)
|
||||
|
||||
# Get or create strategy
|
||||
if strategyKey not in self.strategies:
|
||||
self.strategies[strategyKey] = self._createDefaultStrategy(intent)
|
||||
|
||||
strategy = self.strategies[strategyKey]
|
||||
|
||||
# Update based on success/failure
|
||||
if qualityScore > 0.7 and intentMatchScore > 0.7:
|
||||
# Successful action - reinforce it
|
||||
if 'successfulActions' not in strategy:
|
||||
strategy['successfulActions'] = []
|
||||
if actionAttempted not in strategy['successfulActions']:
|
||||
strategy['successfulActions'].append(actionAttempted)
|
||||
strategy['successRate'] = min(strategy.get('successRate', 0.5) + 0.1, 1.0)
|
||||
logger.info(f"Reinforced successful action: {actionAttempted}")
|
||||
|
||||
elif qualityScore < 0.3 or intentMatchScore < 0.3:
|
||||
# Failed action - avoid it
|
||||
if 'failedActions' not in strategy:
|
||||
strategy['failedActions'] = []
|
||||
if actionAttempted not in strategy['failedActions']:
|
||||
strategy['failedActions'].append(actionAttempted)
|
||||
strategy['successRate'] = max(strategy.get('successRate', 0.5) - 0.1, 0.0)
|
||||
logger.info(f"Marked failed action to avoid: {actionAttempted}")
|
||||
|
||||
# Update last modified
|
||||
strategy['lastModified'] = datetime.now(timezone.utc).timestamp()
|
||||
|
||||
def _getStrategyKey(self, intent: Dict[str, Any]) -> str:
|
||||
"""Gets strategy key based on intent"""
|
||||
dataType = intent.get('dataType', 'unknown')
|
||||
expectedFormat = intent.get('expectedFormat', 'unknown')
|
||||
return f"{dataType}_{expectedFormat}"
|
||||
|
||||
def _createDefaultStrategy(self, intent: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Creates a default strategy for the intent"""
|
||||
dataType = intent.get('dataType', 'unknown')
|
||||
expectedFormat = intent.get('expectedFormat', 'unknown')
|
||||
|
||||
# Create strategy based on intent type
|
||||
if dataType == 'numbers':
|
||||
return {
|
||||
'strategyId': f"numbers_{expectedFormat}",
|
||||
'successfulActions': [],
|
||||
'failedActions': [],
|
||||
'successRate': 0.5,
|
||||
'lastModified': datetime.now(timezone.utc).timestamp(),
|
||||
'recommendedPrompt': f"Deliver {dataType} data in {expectedFormat} format. Provide actual numbers, not code to generate them.",
|
||||
'avoidPrompt': "Do not ask AI to write code when user wants data. Deliver the data directly."
|
||||
}
|
||||
elif dataType == 'text':
|
||||
return {
|
||||
'strategyId': f"text_{expectedFormat}",
|
||||
'successfulActions': [],
|
||||
'failedActions': [],
|
||||
'successRate': 0.5,
|
||||
'lastModified': datetime.now(timezone.utc).timestamp(),
|
||||
'recommendedPrompt': f"Generate {dataType} content in {expectedFormat} format.",
|
||||
'avoidPrompt': "Ensure content is readable and well-structured."
|
||||
}
|
||||
elif dataType == 'documents':
|
||||
return {
|
||||
'strategyId': f"documents_{expectedFormat}",
|
||||
'successfulActions': [],
|
||||
'failedActions': [],
|
||||
'successRate': 0.5,
|
||||
'lastModified': datetime.now(timezone.utc).timestamp(),
|
||||
'recommendedPrompt': f"Create {dataType} in {expectedFormat} format with proper structure.",
|
||||
'avoidPrompt': "Ensure document is properly formatted and organized."
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'strategyId': f"unknown_{expectedFormat}",
|
||||
'successfulActions': [],
|
||||
'failedActions': [],
|
||||
'successRate': 0.5,
|
||||
'lastModified': datetime.now(timezone.utc).timestamp(),
|
||||
'recommendedPrompt': f"Deliver {dataType} content in {expectedFormat} format.",
|
||||
'avoidPrompt': "Ensure content matches user requirements."
|
||||
}
|
||||
|
||||
def _serializeContext(self, context: Any) -> Dict[str, Any]:
|
||||
"""Serializes context for storage"""
|
||||
try:
|
||||
return {
|
||||
"taskObjective": getattr(context, 'task_step', {}).get('objective', '') if hasattr(context, 'task_step') else '',
|
||||
"workflowId": getattr(context, 'workflow_id', ''),
|
||||
"availableDocuments": getattr(context, 'available_documents', [])
|
||||
}
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
def getLearningSummary(self) -> Dict[str, Any]:
|
||||
"""Gets a summary of what has been learned"""
|
||||
return {
|
||||
"totalStrategies": len(self.strategies),
|
||||
"totalFeedback": len(self.feedbackHistory),
|
||||
"strategies": list(self.strategies.keys()),
|
||||
"averageSuccessRate": sum(s.get('successRate', 0) for s in self.strategies.values()) / max(len(self.strategies), 1)
|
||||
}
|
||||
142
modules/workflows/processing/adaptive/progressTracker.py
Normal file
142
modules/workflows/processing/adaptive/progressTracker.py
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
# progressTracker.py
|
||||
# Progress tracking for adaptive React mode
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime, timezone
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ProgressTracker:
|
||||
"""Tracks what has been accomplished and what's still needed"""
|
||||
|
||||
def __init__(self):
|
||||
self.completedObjectives = []
|
||||
self.partialAchievements = []
|
||||
self.failedAttempts = []
|
||||
self.learningInsights = []
|
||||
self.currentPhase = "planning"
|
||||
|
||||
def updateProgress(self, result: Any, validation: Dict[str, Any], intent: Dict[str, Any]):
|
||||
"""Updates progress tracking based on action result"""
|
||||
try:
|
||||
overallSuccess = validation.get('overallSuccess', False)
|
||||
qualityScore = validation.get('qualityScore', 0)
|
||||
improvementSuggestions = validation.get('improvementSuggestions', [])
|
||||
|
||||
if overallSuccess and qualityScore > 0.7:
|
||||
# Successful completion
|
||||
self.completedObjectives.append({
|
||||
"objective": intent.get('primaryGoal', 'Unknown'),
|
||||
"achievement": f"Quality score: {qualityScore:.2f}",
|
||||
"qualityScore": qualityScore,
|
||||
"timestamp": datetime.now(timezone.utc).timestamp()
|
||||
})
|
||||
self.currentPhase = "completed"
|
||||
logger.info(f"Objective completed: {intent.get('primaryGoal', 'Unknown')}")
|
||||
|
||||
elif qualityScore > 0.3:
|
||||
# Partial achievement
|
||||
self.partialAchievements.append({
|
||||
"objective": intent.get('primaryGoal', 'Unknown'),
|
||||
"partialAchievement": f"Quality score: {qualityScore:.2f}",
|
||||
"missingParts": improvementSuggestions,
|
||||
"timestamp": datetime.now(timezone.utc).timestamp()
|
||||
})
|
||||
self.currentPhase = "partial"
|
||||
logger.info(f"Partial achievement: {intent.get('primaryGoal', 'Unknown')}")
|
||||
|
||||
else:
|
||||
# Failed attempt
|
||||
self.failedAttempts.append({
|
||||
"objective": intent.get('primaryGoal', 'Unknown'),
|
||||
"failureReason": f"Quality score: {qualityScore:.2f}",
|
||||
"learningOpportunity": improvementSuggestions,
|
||||
"timestamp": datetime.now(timezone.utc).timestamp()
|
||||
})
|
||||
self.currentPhase = "failed"
|
||||
logger.info(f"Failed attempt: {intent.get('primaryGoal', 'Unknown')}")
|
||||
|
||||
# Extract learning insights
|
||||
if improvementSuggestions:
|
||||
for suggestion in improvementSuggestions:
|
||||
if suggestion not in self.learningInsights:
|
||||
self.learningInsights.append(suggestion)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating progress: {str(e)}")
|
||||
|
||||
def getCurrentProgress(self) -> Dict[str, Any]:
|
||||
"""Gets current progress state"""
|
||||
return {
|
||||
"completedObjectives": self.completedObjectives,
|
||||
"partialAchievements": self.partialAchievements,
|
||||
"failedAttempts": self.failedAttempts,
|
||||
"learningInsights": self.learningInsights,
|
||||
"currentPhase": self.currentPhase,
|
||||
"nextActionsSuggested": self._getNextActionSuggestions()
|
||||
}
|
||||
|
||||
def shouldContinue(self, progress: Dict[str, Any], validation: Dict[str, Any]) -> bool:
|
||||
"""Determines if the task should continue"""
|
||||
try:
|
||||
# If we have completed objectives, don't continue
|
||||
if progress.get('completedObjectives'):
|
||||
return False
|
||||
|
||||
# If we have too many failed attempts, don't continue
|
||||
if len(progress.get('failedAttempts', [])) >= 3:
|
||||
return False
|
||||
|
||||
# If validation shows success, don't continue
|
||||
if validation.get('overallSuccess', False):
|
||||
return False
|
||||
|
||||
# Otherwise, continue
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking if should continue: {str(e)}")
|
||||
return True # Default to continue on error
|
||||
|
||||
def _getNextActionSuggestions(self) -> List[str]:
|
||||
"""Suggests next actions based on progress"""
|
||||
suggestions = []
|
||||
|
||||
# If we have failed attempts, suggest avoiding those actions
|
||||
if self.failedAttempts:
|
||||
suggestions.append("Avoid actions that have failed before")
|
||||
|
||||
# If we have partial achievements, suggest building on them
|
||||
if self.partialAchievements:
|
||||
suggestions.append("Build on partial achievements")
|
||||
|
||||
# If we have learning insights, suggest applying them
|
||||
if self.learningInsights:
|
||||
suggestions.extend(self.learningInsights[:3]) # Top 3 insights
|
||||
|
||||
# Default suggestions
|
||||
if not suggestions:
|
||||
suggestions.append("Try a different approach")
|
||||
suggestions.append("Focus on user intent")
|
||||
|
||||
return suggestions
|
||||
|
||||
def getProgressSummary(self) -> Dict[str, Any]:
|
||||
"""Gets a summary of progress"""
|
||||
return {
|
||||
"totalCompleted": len(self.completedObjectives),
|
||||
"totalPartial": len(self.partialAchievements),
|
||||
"totalFailed": len(self.failedAttempts),
|
||||
"totalInsights": len(self.learningInsights),
|
||||
"currentPhase": self.currentPhase,
|
||||
"successRate": len(self.completedObjectives) / max(len(self.completedObjectives) + len(self.failedAttempts), 1)
|
||||
}
|
||||
|
||||
def reset(self):
|
||||
"""Resets progress tracking"""
|
||||
self.completedObjectives = []
|
||||
self.partialAchievements = []
|
||||
self.failedAttempts = []
|
||||
self.learningInsights = []
|
||||
self.currentPhase = "planning"
|
||||
1
modules/workflows/processing/core/__init__.py
Normal file
1
modules/workflows/processing/core/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
# Core workflow processing modules
|
||||
302
modules/workflows/processing/core/actionExecutor.py
Normal file
302
modules/workflows/processing/core/actionExecutor.py
Normal file
|
|
@ -0,0 +1,302 @@
|
|||
# actionExecutor.py
|
||||
# Action execution functionality for workflows
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
from modules.datamodels.datamodelChat import ActionResult, ActionItem, TaskStep
|
||||
from modules.datamodels.datamodelChat import ChatWorkflow
|
||||
from modules.workflows.processing.shared.methodDiscovery import methods
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ActionExecutor:
|
||||
"""Handles execution of workflow actions"""
|
||||
|
||||
def __init__(self, services):
|
||||
self.services = services
|
||||
|
||||
def _checkWorkflowStopped(self, workflow):
|
||||
"""Check if workflow has been stopped by user and raise exception if so"""
|
||||
try:
|
||||
# Get the current workflow status from the database to avoid stale data
|
||||
current_workflow = self.services.interfaceDbChat.getWorkflow(workflow.id)
|
||||
if current_workflow and current_workflow.status == "stopped":
|
||||
logger.info("Workflow stopped by user, aborting action execution")
|
||||
raise Exception("Workflow was stopped by user")
|
||||
except Exception as e:
|
||||
# If we can't get the current status due to other database issues, fall back to the in-memory object
|
||||
logger.warning(f"Could not check current workflow status from database: {str(e)}")
|
||||
if workflow and workflow.status == "stopped":
|
||||
logger.info("Workflow stopped by user (from in-memory object), aborting action execution")
|
||||
raise Exception("Workflow was stopped by user")
|
||||
|
||||
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""Execute a method action"""
|
||||
try:
|
||||
if methodName not in methods:
|
||||
raise ValueError(f"Unknown method: {methodName}")
|
||||
|
||||
method = methods[methodName]
|
||||
if actionName not in method['actions']:
|
||||
raise ValueError(f"Unknown action: {actionName} for method {methodName}")
|
||||
|
||||
action = method['actions'][actionName]
|
||||
|
||||
# Execute the action
|
||||
return await action['method'](parameters)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing method {methodName}.{actionName}: {str(e)}")
|
||||
raise
|
||||
|
||||
async def executeCompoundAction(self, compoundActionName: str, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""Execute a compound action (method.action format)"""
|
||||
try:
|
||||
# Parse compound action name (e.g., "ai.process" -> method="ai", action="process")
|
||||
if '.' not in compoundActionName:
|
||||
raise ValueError(f"Invalid compound action name: {compoundActionName}. Expected format: method.action")
|
||||
|
||||
methodName, actionName = compoundActionName.split('.', 1)
|
||||
|
||||
# Execute using the existing method
|
||||
return await self.executeAction(methodName, actionName, parameters)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing compound action {compoundActionName}: {str(e)}")
|
||||
raise
|
||||
|
||||
async def executeSingleAction(self, action: ActionItem, workflow: ChatWorkflow, taskStep: TaskStep,
|
||||
taskIndex: int = None, actionIndex: int = None, totalActions: int = None) -> ActionResult:
|
||||
"""Execute a single action and return ActionResult with enhanced document processing"""
|
||||
try:
|
||||
# Check workflow status before executing action
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
# Use passed indices or fallback to '?'
|
||||
taskNum = taskIndex if taskIndex is not None else '?'
|
||||
actionNum = actionIndex if actionIndex is not None else '?'
|
||||
|
||||
logger.info(f"=== TASK {taskNum} ACTION {actionNum}: {action.execMethod}.{action.execAction} ===")
|
||||
|
||||
# Log input parameters
|
||||
inputDocs = action.execParameters.get('documentList', [])
|
||||
inputConnections = action.execParameters.get('connections', [])
|
||||
logger.info(f"Input documents: {inputDocs} (type: {type(inputDocs)})")
|
||||
if inputConnections:
|
||||
logger.info(f"Input connections: {inputConnections}")
|
||||
|
||||
# Log all action parameters for debugging
|
||||
logger.info(f"All action parameters: {action.execParameters}")
|
||||
|
||||
enhancedParameters = action.execParameters.copy()
|
||||
if action.expectedDocumentFormats:
|
||||
enhancedParameters['expectedDocumentFormats'] = action.expectedDocumentFormats
|
||||
logger.info(f"Expected formats: {action.expectedDocumentFormats}")
|
||||
|
||||
# Check workflow status before executing the action
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
result = await self.executeAction(
|
||||
methodName=action.execMethod,
|
||||
actionName=action.execAction,
|
||||
parameters=enhancedParameters
|
||||
)
|
||||
resultLabel = action.execResultLabel
|
||||
|
||||
# Trace action result with full document metadata
|
||||
actionResultTrace = {
|
||||
"method": action.execMethod,
|
||||
"action": action.execAction,
|
||||
"success": result.success,
|
||||
"error": result.error,
|
||||
"resultLabel": resultLabel,
|
||||
"documentsCount": len(result.documents) if result.documents else 0
|
||||
}
|
||||
|
||||
# Add full document metadata if documents exist
|
||||
if result.documents:
|
||||
actionResultTrace["documents"] = []
|
||||
for doc in result.documents:
|
||||
docMetadata = {
|
||||
"name": getattr(doc, 'documentName', 'Unknown'),
|
||||
"mimeType": getattr(doc, 'mimeType', 'Unknown'),
|
||||
"size": getattr(doc, 'size', 'Unknown'),
|
||||
"created": getattr(doc, 'created', 'Unknown'),
|
||||
"modified": getattr(doc, 'modified', 'Unknown'),
|
||||
"typeGroup": getattr(doc, 'typeGroup', 'Unknown'),
|
||||
"documentId": getattr(doc, 'documentId', 'Unknown'),
|
||||
"reference": getattr(doc, 'reference', 'Unknown')
|
||||
}
|
||||
# Remove 'Unknown' values to keep it clean
|
||||
docMetadata = {k: v for k, v in docMetadata.items() if v != 'Unknown'}
|
||||
actionResultTrace["documents"].append(docMetadata)
|
||||
|
||||
self._writeTraceLog("Action Result", actionResultTrace)
|
||||
|
||||
# Process action result
|
||||
if result.success:
|
||||
action.setSuccess()
|
||||
# Extract result text from ALL documents using generation service
|
||||
action.result = self._extractResultText(result)
|
||||
# Preserve the action's execResultLabel for document routing
|
||||
# Action methods should NOT return resultLabel - this is managed by the action handler
|
||||
if not action.execResultLabel:
|
||||
logger.warning(f"Action {action.execMethod}.{action.execAction} has no execResultLabel set")
|
||||
|
||||
# Log action results
|
||||
logger.info(f"Action completed successfully")
|
||||
|
||||
if result.documents:
|
||||
logger.info(f"Output documents ({len(result.documents)}):")
|
||||
for i, doc in enumerate(result.documents):
|
||||
logger.info(f" {i+1}. {doc.documentName}")
|
||||
else:
|
||||
logger.info("Output: No documents created")
|
||||
else:
|
||||
action.setError(result.error or "Action execution failed")
|
||||
logger.error(f"Action failed: {result.error}")
|
||||
|
||||
# Create database log entry for action failure
|
||||
self.services.interfaceDbChat.createLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"❌ **Task {taskNum}**\n\n❌ **Action {actionNum}/{totalActions}** failed: {result.error}",
|
||||
"type": "error"
|
||||
})
|
||||
|
||||
# Log action summary
|
||||
logger.info(f"=== TASK {taskNum} ACTION {actionNum} COMPLETED ===")
|
||||
|
||||
# Create action completion message with documents (generic)
|
||||
await self._createActionCompletionMessage(action, result, workflow, taskStep, taskIndex, actionIndex, totalActions)
|
||||
|
||||
return ActionResult(
|
||||
success=result.success,
|
||||
documents=result.documents, # Return original ActionDocument objects
|
||||
resultLabel=action.execResultLabel, # Always use action's execResultLabel
|
||||
error=result.error or ""
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing single action: {str(e)}")
|
||||
action.setError(str(e))
|
||||
return ActionResult(
|
||||
success=False,
|
||||
documents=[], # Empty documents for error case
|
||||
resultLabel=action.execResultLabel,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
def _extractResultText(self, result: ActionResult) -> str:
|
||||
"""Extract result text from ActionResult documents"""
|
||||
if not result.success or not result.documents:
|
||||
return ""
|
||||
|
||||
# Extract text directly from ActionDocument objects
|
||||
resultParts = []
|
||||
for doc in result.documents:
|
||||
if hasattr(doc, 'documentData') and doc.documentData:
|
||||
resultParts.append(str(doc.documentData))
|
||||
|
||||
# Join all document results with separators
|
||||
return "\n\n---\n\n".join(resultParts) if resultParts else ""
|
||||
|
||||
async def _createActionCompletionMessage(self, action: ActionItem, result: ActionResult, workflow: ChatWorkflow,
|
||||
taskStep: TaskStep, taskIndex: int, actionIndex: int, totalActions: int):
|
||||
"""Create action completion message with documents (generic)"""
|
||||
try:
|
||||
# Convert ActionDocument objects to ChatDocument objects for message creation
|
||||
createdDocuments = []
|
||||
if result.documents:
|
||||
createdDocuments = self.services.generation.createDocumentsFromActionResult(result, action, workflow, None)
|
||||
|
||||
# Create action message using message creator
|
||||
from modules.workflows.processing.core.messageCreator import MessageCreator
|
||||
messageCreator = MessageCreator(self.services)
|
||||
|
||||
await messageCreator.createActionMessage(
|
||||
action=action,
|
||||
result=result,
|
||||
workflow=workflow,
|
||||
resultLabel=action.execResultLabel,
|
||||
createdDocuments=createdDocuments,
|
||||
taskStep=taskStep,
|
||||
taskIndex=taskIndex,
|
||||
actionIndex=actionIndex,
|
||||
totalActions=totalActions
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating action completion message: {str(e)}")
|
||||
|
||||
def _writeTraceLog(self, contextText: str, data: Any) -> None:
|
||||
"""Write trace data to configured trace file if in debug mode with improved JSON formatting"""
|
||||
try:
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime, UTC
|
||||
|
||||
# Only write if logger is in debug mode
|
||||
if logger.level > logging.DEBUG:
|
||||
return
|
||||
|
||||
# Get log directory from configuration
|
||||
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
|
||||
if not os.path.isabs(logDir):
|
||||
# If relative path, make it relative to the gateway directory
|
||||
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
logDir = os.path.join(gatewayDir, logDir)
|
||||
|
||||
# Ensure log directory exists
|
||||
os.makedirs(logDir, exist_ok=True)
|
||||
|
||||
# Create trace file path
|
||||
traceFile = os.path.join(logDir, "log_trace.log")
|
||||
|
||||
# Format the trace entry with better structure
|
||||
timestamp = datetime.fromtimestamp(self.services.utils.getUtcTimestamp(), UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||
|
||||
# Create a structured trace entry
|
||||
traceEntry = f"[{timestamp}] {contextText}\n"
|
||||
traceEntry += "=" * 80 + "\n"
|
||||
|
||||
# Add data if provided with improved formatting
|
||||
if data is not None:
|
||||
try:
|
||||
if isinstance(data, (dict, list)):
|
||||
# Format as pretty JSON with better settings
|
||||
jsonStr = json.dumps(data, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||
traceEntry += f"JSON Data:\n{jsonStr}\n"
|
||||
elif isinstance(data, str):
|
||||
# For string data, try to parse as JSON first, then fall back to plain text
|
||||
try:
|
||||
parsed = json.loads(data)
|
||||
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||
traceEntry += f"JSON Data (parsed from string):\n{jsonStr}\n"
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# Not valid JSON, show as plain text with proper line breaks
|
||||
formatted_data = data.replace('\\n', '\n')
|
||||
traceEntry += f"Text Data:\n{formatted_data}\n"
|
||||
else:
|
||||
# For other types, convert to string and try to parse as JSON
|
||||
dataStr = str(data)
|
||||
try:
|
||||
parsed = json.loads(dataStr)
|
||||
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||
traceEntry += f"JSON Data (parsed from object):\n{jsonStr}\n"
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# Not valid JSON, show as plain text with proper line breaks
|
||||
formatted_data = dataStr.replace('\\n', '\n')
|
||||
traceEntry += f"Object Data:\n{formatted_data}\n"
|
||||
except Exception as e:
|
||||
# Fallback to simple string representation
|
||||
traceEntry += f"Data (fallback): {str(data)}\n"
|
||||
else:
|
||||
traceEntry += "No data provided\n"
|
||||
|
||||
traceEntry += "=" * 80 + "\n\n"
|
||||
|
||||
# Write to trace file
|
||||
with open(traceFile, "a", encoding="utf-8") as f:
|
||||
f.write(traceEntry)
|
||||
|
||||
except Exception as e:
|
||||
# Don't log trace errors to avoid recursion
|
||||
pass
|
||||
368
modules/workflows/processing/core/messageCreator.py
Normal file
368
modules/workflows/processing/core/messageCreator.py
Normal file
|
|
@ -0,0 +1,368 @@
|
|||
# messageCreator.py
|
||||
# Generic message creation for all workflow phases
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, List
|
||||
from modules.datamodels.datamodelChat import TaskPlan, TaskStep, ActionResult, ReviewResult
|
||||
from modules.datamodels.datamodelChat import ChatWorkflow
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MessageCreator:
|
||||
"""Handles creation of all workflow messages"""
|
||||
|
||||
def __init__(self, services):
|
||||
self.services = services
|
||||
|
||||
def _checkWorkflowStopped(self, workflow):
|
||||
"""Check if workflow has been stopped by user and raise exception if so"""
|
||||
try:
|
||||
# Get the current workflow status from the database to avoid stale data
|
||||
current_workflow = self.services.interfaceDbChat.getWorkflow(workflow.id)
|
||||
if current_workflow and current_workflow.status == "stopped":
|
||||
logger.info("Workflow stopped by user, aborting message creation")
|
||||
raise Exception("Workflow was stopped by user")
|
||||
except Exception as e:
|
||||
# If we can't get the current status due to other database issues, fall back to the in-memory object
|
||||
logger.warning(f"Could not check current workflow status from database: {str(e)}")
|
||||
if workflow and workflow.status == "stopped":
|
||||
logger.info("Workflow stopped by user (from in-memory object), aborting message creation")
|
||||
raise Exception("Workflow was stopped by user")
|
||||
|
||||
async def createTaskPlanMessage(self, taskPlan: TaskPlan, workflow: ChatWorkflow):
|
||||
"""Create a chat message containing the task plan with user-friendly messages"""
|
||||
try:
|
||||
# Check workflow status before creating message
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
# Build task plan summary
|
||||
taskSummary = f"📋 **Task Plan**\n\n"
|
||||
|
||||
# Get overall user message from task plan if available
|
||||
overallMessage = taskPlan.userMessage
|
||||
if overallMessage:
|
||||
taskSummary += f"{overallMessage}\n\n"
|
||||
|
||||
# Add each task with its user message
|
||||
for i, task in enumerate(taskPlan.tasks):
|
||||
if task.userMessage:
|
||||
taskSummary += f"💬 {task.userMessage}\n"
|
||||
taskSummary += "\n"
|
||||
|
||||
# Create workflow message
|
||||
messageData = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": taskSummary,
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||
"documentsLabel": "task_plan",
|
||||
"documents": [],
|
||||
# Add workflow context fields - use current workflow round instead of hardcoded 1
|
||||
"roundNumber": workflow.currentRound, # Use current workflow round
|
||||
"taskNumber": 1, # Task plan is before individual tasks; to keep 1, that UI not filtering the message
|
||||
"actionNumber": 0,
|
||||
# Add task progress status
|
||||
"taskProgress": "pending"
|
||||
}
|
||||
|
||||
message = self.services.interfaceDbChat.createMessage(messageData)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info("Task plan message created successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating task plan message: {str(e)}")
|
||||
|
||||
async def createTaskStartMessage(self, taskStep: TaskStep, workflow: ChatWorkflow, taskIndex: int, totalTasks: int):
|
||||
"""Create a task start message for the user"""
|
||||
try:
|
||||
# Check workflow status before creating message
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
# Create a task start message for the user
|
||||
taskProgress = f"{taskIndex}/{totalTasks}" if totalTasks is not None else str(taskIndex)
|
||||
taskStartMessage = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": f"🚀 **Task {taskProgress}**",
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||
"documentsLabel": f"task_{taskIndex}_start",
|
||||
"documents": [],
|
||||
# Add workflow context fields
|
||||
"roundNumber": workflow.currentRound, # Use current workflow round
|
||||
"taskNumber": taskIndex,
|
||||
"actionNumber": 0,
|
||||
# Add task progress status
|
||||
"taskProgress": "running"
|
||||
}
|
||||
|
||||
# Add user-friendly message if available
|
||||
if taskStep.userMessage:
|
||||
taskStartMessage["message"] += f"\n\n💬 {taskStep.userMessage}"
|
||||
|
||||
message = self.services.interfaceDbChat.createMessage(taskStartMessage)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Task start message created for task {taskIndex}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating task start message: {str(e)}")
|
||||
|
||||
async def createActionMessage(self, action, result: ActionResult, workflow: ChatWorkflow, resultLabel: str = None,
|
||||
createdDocuments: List = None, taskStep: TaskStep = None,
|
||||
taskIndex: int = None, actionIndex: int = None, totalActions: int = None):
|
||||
"""Create and store a message for the action result in the workflow with enhanced document processing"""
|
||||
try:
|
||||
# Check workflow status before creating action message
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
if resultLabel is None:
|
||||
resultLabel = action.execResultLabel
|
||||
|
||||
# Log delivered documents
|
||||
if createdDocuments:
|
||||
logger.info(f"Result label: {resultLabel} - {len(createdDocuments)} documents")
|
||||
else:
|
||||
logger.info(f"Result label: {resultLabel} - No documents")
|
||||
|
||||
# Get current workflow context and stats
|
||||
workflowContext = self.services.workflow.getWorkflowContext()
|
||||
workflowStats = self.services.workflow.getWorkflowStats()
|
||||
|
||||
# Create a more meaningful message that includes task context
|
||||
taskObjective = taskStep.objective if taskStep else 'Unknown task'
|
||||
|
||||
# Extract round, task, and action numbers from resultLabel first, then fallback to workflow context
|
||||
currentRound = self._extractRoundNumberFromLabel(resultLabel) if resultLabel else workflowContext.get('currentRound', 0)
|
||||
currentTask = self._extractTaskNumberFromLabel(resultLabel) if resultLabel else (taskIndex if taskIndex is not None else workflowContext.get('currentTask', 0))
|
||||
totalTasks = workflowStats.get('totalTasks', 0)
|
||||
currentAction = self._extractActionNumberFromLabel(resultLabel) if resultLabel else (actionIndex if actionIndex is not None else workflowContext.get('currentAction', 0))
|
||||
totalActions = totalActions if totalActions is not None else workflowStats.get('totalActions', 0)
|
||||
|
||||
# Debug logging for round number extraction
|
||||
logger.info(f"Action message round number extraction: resultLabel='{resultLabel}', extractedRound={currentRound}, workflowRound={workflowContext.get('currentRound', 0)}")
|
||||
|
||||
# Build a user-friendly message based on success/failure
|
||||
if result.success:
|
||||
messageText = f"**Action {currentAction}/{totalActions} ({action.execMethod}.{action.execAction})**\n\n"
|
||||
messageText += f"✅ {taskObjective}\n\n"
|
||||
else:
|
||||
# ⚠️ FAILURE MESSAGE - Show error details to user
|
||||
errorDetails = result.error if result.error else "Unknown error occurred"
|
||||
messageText = f"**Action {currentAction}/{totalActions} ({action.execMethod}.{action.execAction})**\n\n"
|
||||
messageText += f"❌ {taskObjective}\n\n"
|
||||
messageText += f"{errorDetails}\n\n"
|
||||
|
||||
# Build concise summary to persist for history context
|
||||
doc_count = len(createdDocuments) if createdDocuments else 0
|
||||
trimmed_msg = (messageText or "").strip().replace("\n", " ")
|
||||
if len(trimmed_msg) > 160:
|
||||
trimmed_msg = trimmed_msg[:157] + "..."
|
||||
|
||||
messageData = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": messageText,
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"documentsLabel": resultLabel,
|
||||
"documents": createdDocuments,
|
||||
# Add workflow context fields - extract from resultLabel to match document reference
|
||||
"roundNumber": currentRound,
|
||||
"taskNumber": currentTask,
|
||||
"actionNumber": currentAction,
|
||||
"actionProgress": "success" if result.success else "fail",
|
||||
"summary": f"{action.execMethod}.{action.execAction}: {doc_count} docs | msg='{trimmed_msg}'"
|
||||
}
|
||||
|
||||
# Add debugging for error messages
|
||||
if not result.success:
|
||||
logger.info(f"Creating ERROR message: {messageText}")
|
||||
logger.info(f"Message data: {messageData}")
|
||||
|
||||
message = self.services.interfaceDbChat.createMessage(messageData)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Message created: {action.execMethod}.{action.execAction}")
|
||||
return message
|
||||
else:
|
||||
logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating action message: {str(e)}")
|
||||
return None
|
||||
|
||||
async def createTaskCompletionMessage(self, taskStep: TaskStep, workflow: ChatWorkflow, taskIndex: int,
|
||||
totalTasks: int, reviewResult: ReviewResult):
|
||||
"""Create a task completion message for the user"""
|
||||
try:
|
||||
# Check workflow status before creating message
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
# Create a task completion message for the user
|
||||
taskProgress = f"{taskIndex}/{totalTasks}" if totalTasks is not None else str(taskIndex)
|
||||
|
||||
# Enhanced completion message with criteria details
|
||||
completionMessage = f"🎯 **Task {taskProgress}**\n\n✅ {reviewResult.reason or 'Task completed successfully'}"
|
||||
|
||||
# Add criteria status if available
|
||||
if hasattr(reviewResult, 'met_criteria') and reviewResult.met_criteria:
|
||||
for criterion in reviewResult.met_criteria:
|
||||
completionMessage += f"\n• {criterion}"
|
||||
|
||||
if hasattr(reviewResult, 'quality_score'):
|
||||
completionMessage += f"\n📊 Score {reviewResult.quality_score}/10"
|
||||
|
||||
taskCompletionMessage = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": completionMessage,
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||
"documentsLabel": f"task_{taskIndex}_completion",
|
||||
"documents": [],
|
||||
# Add workflow context fields
|
||||
"roundNumber": workflow.currentRound, # Use current workflow round
|
||||
"taskNumber": taskIndex,
|
||||
"actionNumber": 0,
|
||||
# Add task progress status
|
||||
"taskProgress": "success"
|
||||
}
|
||||
|
||||
message = self.services.interfaceDbChat.createMessage(taskCompletionMessage)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Task completion message created for task {taskIndex}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating task completion message: {str(e)}")
|
||||
|
||||
async def createRetryMessage(self, taskStep: TaskStep, workflow: ChatWorkflow, taskIndex: int, reviewResult: ReviewResult):
|
||||
"""Create a retry message for the user"""
|
||||
try:
|
||||
# Check workflow status before creating message
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
# Create retry message for user
|
||||
retryMessage = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": f"🔄 **Task {taskIndex}** needs retry: {reviewResult.improvements}",
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||
"documentsLabel": f"task_{taskIndex}_retry",
|
||||
"documents": [],
|
||||
"roundNumber": workflow.currentRound,
|
||||
"taskNumber": taskIndex,
|
||||
"actionNumber": 0,
|
||||
"taskProgress": "retry"
|
||||
}
|
||||
|
||||
message = self.services.interfaceDbChat.createMessage(retryMessage)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Retry message created for task {taskIndex}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating retry message: {str(e)}")
|
||||
|
||||
async def createErrorMessage(self, taskStep: TaskStep, workflow: ChatWorkflow, taskIndex: int, errorDetails: str):
|
||||
"""Create an error message for the user"""
|
||||
try:
|
||||
# Check workflow status before creating message
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
# Create user-facing error message for task failure
|
||||
errorMessage = f"**Task {taskIndex}**\n\n❌ '{taskStep.objective}' failed\n\n"
|
||||
|
||||
# Add specific error details if available
|
||||
if errorDetails:
|
||||
errorMessage += f"{errorDetails}\n\n"
|
||||
|
||||
# Create workflow message for user
|
||||
messageData = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": errorMessage,
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||
"actionId": None,
|
||||
"actionMethod": "task",
|
||||
"actionName": "task_error",
|
||||
"documentsLabel": None,
|
||||
"documents": [],
|
||||
# Add workflow context fields
|
||||
"roundNumber": workflow.currentRound, # Use current workflow round
|
||||
"taskNumber": taskIndex,
|
||||
"actionNumber": 0,
|
||||
# Add task progress status
|
||||
"taskProgress": "fail"
|
||||
}
|
||||
|
||||
message = self.services.interfaceDbChat.createMessage(messageData)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Error message created for task {taskIndex}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating error message: {str(e)}")
|
||||
|
||||
def _extractRoundNumberFromLabel(self, label: str) -> int:
|
||||
"""Extract round number from a document label like 'round1_task1_action1_diagram_analysis'"""
|
||||
try:
|
||||
if not label or not isinstance(label, str):
|
||||
return 0
|
||||
|
||||
# Parse label format: round{round}_task{task}_action{action}_{context}
|
||||
if label.startswith('round'):
|
||||
roundPart = label.split('_')[0] # Get 'round1' part
|
||||
if roundPart.startswith('round'):
|
||||
roundNumber = roundPart[5:] # Remove 'round' prefix
|
||||
return int(roundNumber)
|
||||
|
||||
return 0
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not extract round number from label '{label}': {str(e)}")
|
||||
return 0
|
||||
|
||||
def _extractTaskNumberFromLabel(self, label: str) -> int:
|
||||
"""Extract task number from a document label like 'round1_task1_action1_diagram_analysis'"""
|
||||
try:
|
||||
if not label or not isinstance(label, str):
|
||||
return 0
|
||||
|
||||
# Parse label format: round{round}_task{task}_action{action}_{context}
|
||||
if '_task' in label:
|
||||
taskPart = label.split('_task')[1]
|
||||
if taskPart and '_' in taskPart:
|
||||
taskNumber = taskPart.split('_')[0]
|
||||
return int(taskNumber)
|
||||
|
||||
return 0
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not extract task number from label '{label}': {str(e)}")
|
||||
return 0
|
||||
|
||||
def _extractActionNumberFromLabel(self, label: str) -> int:
|
||||
"""Extract action number from a document label like 'round1_task1_action1_diagram_analysis'"""
|
||||
try:
|
||||
if not label or not isinstance(label, str):
|
||||
return 0
|
||||
|
||||
# Parse label format: round{round}_task{task}_action{action}_{context}
|
||||
if '_action' in label:
|
||||
actionPart = label.split('_action')[1]
|
||||
if actionPart and '_' in actionPart:
|
||||
actionNumber = actionPart.split('_')[0]
|
||||
return int(actionNumber)
|
||||
|
||||
return 0
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not extract action number from label '{label}': {str(e)}")
|
||||
return 0
|
||||
333
modules/workflows/processing/core/taskPlanner.py
Normal file
333
modules/workflows/processing/core/taskPlanner.py
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
# taskPlanner.py
|
||||
# Task planning functionality for workflows
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
from modules.datamodels.datamodelChat import TaskStep, TaskContext, TaskPlan
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
|
||||
from modules.workflows.processing.shared.promptGenerationTaskplan import (
|
||||
generateTaskPlanningPrompt
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class TaskPlanner:
|
||||
"""Handles task planning for workflows"""
|
||||
|
||||
def __init__(self, services):
|
||||
self.services = services
|
||||
|
||||
def _checkWorkflowStopped(self, workflow):
|
||||
"""Check if workflow has been stopped by user and raise exception if so"""
|
||||
try:
|
||||
# Get the current workflow status from the database to avoid stale data
|
||||
current_workflow = self.services.interfaceDbChat.getWorkflow(workflow.id)
|
||||
if current_workflow and current_workflow.status == "stopped":
|
||||
logger.info("Workflow stopped by user, aborting task planning")
|
||||
raise Exception("Workflow was stopped by user")
|
||||
except Exception as e:
|
||||
# If we can't get the current status due to other database issues, fall back to the in-memory object
|
||||
logger.warning(f"Could not check current workflow status from database: {str(e)}")
|
||||
if workflow and workflow.status == "stopped":
|
||||
logger.info("Workflow stopped by user (from in-memory object), aborting task planning")
|
||||
raise Exception("Workflow was stopped by user")
|
||||
|
||||
async def generateTaskPlan(self, userInput: str, workflow) -> TaskPlan:
|
||||
"""Generate a high-level task plan for the workflow"""
|
||||
try:
|
||||
# Check workflow status before generating task plan
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
logger.info(f"=== STARTING TASK PLAN GENERATION ===")
|
||||
logger.info(f"Workflow ID: {workflow.id}")
|
||||
logger.info(f"User Input: {userInput}")
|
||||
|
||||
# Use stored user prompt if available, otherwise use the input
|
||||
actualUserPrompt = self.services.currentUserPrompt if self.services and hasattr(self.services, 'currentUserPrompt') and self.services.currentUserPrompt else userInput
|
||||
logger.info(f"Actual User Prompt: {actualUserPrompt}")
|
||||
|
||||
# Check workflow status before calling AI service
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
# Create proper context object for task planning
|
||||
# For task planning, we need to create a minimal TaskStep since TaskContext requires it
|
||||
planningTaskStep = TaskStep(
|
||||
id="planning",
|
||||
objective=actualUserPrompt,
|
||||
dependencies=[],
|
||||
success_criteria=[],
|
||||
estimated_complexity="medium"
|
||||
)
|
||||
|
||||
taskPlanningContext = TaskContext(
|
||||
task_step=planningTaskStep,
|
||||
workflow=workflow,
|
||||
workflow_id=workflow.id,
|
||||
available_documents=None,
|
||||
available_connections=None,
|
||||
previous_results=[],
|
||||
previous_handover=None,
|
||||
improvements=[],
|
||||
retry_count=0,
|
||||
previous_action_results=[],
|
||||
previous_review_result=None,
|
||||
is_regeneration=False,
|
||||
failure_patterns=[],
|
||||
failed_actions=[],
|
||||
successful_actions=[],
|
||||
criteria_progress={
|
||||
'met_criteria': set(),
|
||||
'unmet_criteria': set(),
|
||||
'attempt_history': []
|
||||
}
|
||||
)
|
||||
|
||||
# Build prompt bundle (template + placeholders) using new API
|
||||
bundle = generateTaskPlanningPrompt(self.services, taskPlanningContext)
|
||||
taskPlanningPromptTemplate = bundle.prompt
|
||||
placeholders = bundle.placeholders
|
||||
|
||||
# Log task planning prompt sent to AI
|
||||
logger.info("=== TASK PLANNING PROMPT SENT TO AI ===")
|
||||
# Trace task planning prompt
|
||||
self._writeTraceLog("Task Plan Prompt", taskPlanningPromptTemplate)
|
||||
self._writeTraceLog("Task Plan Placeholders", placeholders)
|
||||
|
||||
# Centralized AI call: Task planning (quality, detailed) with placeholders
|
||||
options = AiCallOptions(
|
||||
operationType=OperationType.GENERATE_PLAN,
|
||||
priority=Priority.QUALITY,
|
||||
compressPrompt=False,
|
||||
compressContext=False,
|
||||
processingMode=ProcessingMode.DETAILED,
|
||||
maxCost=0.10,
|
||||
maxProcessingTime=30
|
||||
)
|
||||
|
||||
prompt = await self.services.ai.callAi(
|
||||
prompt=taskPlanningPromptTemplate,
|
||||
placeholders=placeholders,
|
||||
options=options
|
||||
)
|
||||
|
||||
# Check if AI response is valid
|
||||
if not prompt:
|
||||
raise ValueError("AI service returned no response for task planning")
|
||||
|
||||
# Log task planning response received
|
||||
logger.info("=== TASK PLANNING AI RESPONSE RECEIVED ===")
|
||||
logger.info(f"Response length: {len(prompt) if prompt else 0}")
|
||||
# Trace task planning response
|
||||
self._writeTraceLog("Task Plan Response", prompt)
|
||||
|
||||
# Parse task plan response
|
||||
try:
|
||||
jsonStart = prompt.find('{')
|
||||
jsonEnd = prompt.rfind('}') + 1
|
||||
if jsonStart == -1 or jsonEnd == 0:
|
||||
raise ValueError("No JSON found in response")
|
||||
jsonStr = prompt[jsonStart:jsonEnd]
|
||||
taskPlanDict = json.loads(jsonStr)
|
||||
|
||||
if 'tasks' not in taskPlanDict:
|
||||
raise ValueError("Task plan missing 'tasks' field")
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing task plan response: {str(e)}")
|
||||
taskPlanDict = {'tasks': []}
|
||||
|
||||
if not self._validateTaskPlan(taskPlanDict):
|
||||
logger.error("Generated task plan failed validation")
|
||||
logger.error(f"AI Response: {prompt}")
|
||||
logger.error(f"Parsed Task Plan: {json.dumps(taskPlanDict, indent=2)}")
|
||||
raise Exception("AI-generated task plan failed validation - AI is required for task planning")
|
||||
|
||||
if not taskPlanDict.get('tasks'):
|
||||
raise ValueError("Task plan contains no tasks")
|
||||
|
||||
# LANGUAGE DETECTION: Determine user language once for the entire workflow
|
||||
# Priority: 1. languageUserDetected from AI response, 2. service.user.language, 3. "en"
|
||||
detectedLanguage = taskPlanDict.get('languageUserDetected', '').strip()
|
||||
serviceUserLanguage = getattr(self.services.user, 'language', '') if self.services and self.services.user else ''
|
||||
|
||||
if detectedLanguage and len(detectedLanguage) == 2: # Valid language code like "en", "de", "fr"
|
||||
userLanguage = detectedLanguage
|
||||
logger.info(f"Using detected language from AI response: {userLanguage}")
|
||||
elif serviceUserLanguage and len(serviceUserLanguage) == 2:
|
||||
userLanguage = serviceUserLanguage
|
||||
logger.info(f"Using language from service user object: {userLanguage}")
|
||||
else:
|
||||
userLanguage = "en"
|
||||
logger.info(f"Using default language: {userLanguage}")
|
||||
|
||||
# Set the detected language in the service for use throughout the workflow
|
||||
if self.services and self.services.user:
|
||||
self.services.user.language = userLanguage
|
||||
logger.info(f"Set workflow user language to: {userLanguage}")
|
||||
|
||||
tasks = []
|
||||
for i, taskDict in enumerate(taskPlanDict.get('tasks', [])):
|
||||
if not isinstance(taskDict, dict):
|
||||
logger.warning(f"Skipping invalid task {i+1}: not a dictionary")
|
||||
continue
|
||||
|
||||
# Map old 'description' field to new 'objective' field
|
||||
if 'description' in taskDict and 'objective' not in taskDict:
|
||||
taskDict['objective'] = taskDict.pop('description')
|
||||
|
||||
try:
|
||||
task = TaskStep(**taskDict)
|
||||
tasks.append(task)
|
||||
except Exception as e:
|
||||
logger.warning(f"Skipping invalid task {i+1}: {str(e)}")
|
||||
continue
|
||||
|
||||
if not tasks:
|
||||
raise ValueError("No valid tasks could be created from AI response")
|
||||
|
||||
taskPlan = TaskPlan(
|
||||
overview=taskPlanDict.get('overview', ''),
|
||||
tasks=tasks,
|
||||
userMessage=taskPlanDict.get('userMessage', '')
|
||||
)
|
||||
|
||||
logger.info(f"Task plan generated successfully with {len(tasks)} tasks")
|
||||
logger.info(f"Workflow user language set to: {userLanguage}")
|
||||
|
||||
return taskPlan
|
||||
except Exception as e:
|
||||
logger.error(f"Error in generateTaskPlan: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
|
||||
def _validateTaskPlan(self, taskPlan: Dict[str, Any]) -> bool:
|
||||
"""Validate task plan structure"""
|
||||
try:
|
||||
if not isinstance(taskPlan, dict):
|
||||
logger.error("Task plan is not a dictionary")
|
||||
return False
|
||||
|
||||
if 'tasks' not in taskPlan or not isinstance(taskPlan['tasks'], list):
|
||||
logger.error(f"Task plan missing 'tasks' field or not a list. Found: {type(taskPlan.get('tasks', 'MISSING'))}")
|
||||
return False
|
||||
|
||||
# First pass: collect all task IDs to validate dependencies
|
||||
taskIds = set()
|
||||
for task in taskPlan['tasks']:
|
||||
if not isinstance(task, dict):
|
||||
logger.error(f"Task is not a dictionary: {type(task)}")
|
||||
return False
|
||||
if 'id' not in task:
|
||||
logger.error(f"Task missing 'id' field: {task}")
|
||||
return False
|
||||
taskIds.add(task['id'])
|
||||
|
||||
# Second pass: validate each task
|
||||
for i, task in enumerate(taskPlan['tasks']):
|
||||
if not isinstance(task, dict):
|
||||
logger.error(f"Task {i} is not a dictionary: {type(task)}")
|
||||
return False
|
||||
|
||||
requiredFields = ['id', 'objective', 'success_criteria']
|
||||
missingFields = [field for field in requiredFields if field not in task]
|
||||
if missingFields:
|
||||
logger.error(f"Task {i} missing required fields: {missingFields}")
|
||||
return False
|
||||
|
||||
# Check for duplicate IDs (shouldn't happen after first pass, but safety check)
|
||||
if task['id'] in taskIds and list(taskPlan['tasks']).count(task['id']) > 1:
|
||||
logger.error(f"Task {i} has duplicate ID: {task['id']}")
|
||||
return False
|
||||
|
||||
dependencies = task.get('dependencies', [])
|
||||
if not isinstance(dependencies, list):
|
||||
logger.error(f"Task {i} dependencies is not a list: {type(dependencies)}")
|
||||
return False
|
||||
|
||||
for dep in dependencies:
|
||||
if dep not in taskIds and dep != 'task_0':
|
||||
logger.error(f"Task {i} has invalid dependency: {dep} (available: {list(taskIds) + ['task_0']})")
|
||||
return False
|
||||
|
||||
logger.info(f"Task plan validation successful with {len(taskIds)} tasks")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating task plan: {str(e)}")
|
||||
return False
|
||||
|
||||
def _writeTraceLog(self, contextText: str, data: Any) -> None:
|
||||
"""Write trace data to configured trace file if in debug mode with improved JSON formatting"""
|
||||
try:
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime, UTC
|
||||
|
||||
# Only write if logger is in debug mode
|
||||
if logger.level > logging.DEBUG:
|
||||
return
|
||||
|
||||
# Get log directory from configuration
|
||||
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
|
||||
if not os.path.isabs(logDir):
|
||||
# If relative path, make it relative to the gateway directory
|
||||
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
logDir = os.path.join(gatewayDir, logDir)
|
||||
|
||||
# Ensure log directory exists
|
||||
os.makedirs(logDir, exist_ok=True)
|
||||
|
||||
# Create trace file path
|
||||
traceFile = os.path.join(logDir, "log_trace.log")
|
||||
|
||||
# Format the trace entry with better structure
|
||||
timestamp = datetime.fromtimestamp(self.services.utils.getUtcTimestamp(), UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||
|
||||
# Create a structured trace entry
|
||||
traceEntry = f"[{timestamp}] {contextText}\n"
|
||||
traceEntry += "=" * 80 + "\n"
|
||||
|
||||
# Add data if provided with improved formatting
|
||||
if data is not None:
|
||||
try:
|
||||
if isinstance(data, (dict, list)):
|
||||
# Format as pretty JSON with better settings
|
||||
jsonStr = json.dumps(data, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||
traceEntry += f"JSON Data:\n{jsonStr}\n"
|
||||
elif isinstance(data, str):
|
||||
# For string data, try to parse as JSON first, then fall back to plain text
|
||||
try:
|
||||
parsed = json.loads(data)
|
||||
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||
traceEntry += f"JSON Data (parsed from string):\n{jsonStr}\n"
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# Not valid JSON, show as plain text with proper line breaks
|
||||
formatted_data = data.replace('\\n', '\n')
|
||||
traceEntry += f"Text Data:\n{formatted_data}\n"
|
||||
else:
|
||||
# For other types, convert to string and try to parse as JSON
|
||||
dataStr = str(data)
|
||||
try:
|
||||
parsed = json.loads(dataStr)
|
||||
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||
traceEntry += f"JSON Data (parsed from object):\n{jsonStr}\n"
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# Not valid JSON, show as plain text with proper line breaks
|
||||
formatted_data = dataStr.replace('\\n', '\n')
|
||||
traceEntry += f"Object Data:\n{formatted_data}\n"
|
||||
except Exception as e:
|
||||
# Fallback to simple string representation
|
||||
traceEntry += f"Data (fallback): {str(data)}\n"
|
||||
else:
|
||||
traceEntry += "No data provided\n"
|
||||
|
||||
traceEntry += "=" * 80 + "\n\n"
|
||||
|
||||
# Write to trace file
|
||||
with open(traceFile, "a", encoding="utf-8") as f:
|
||||
f.write(traceEntry)
|
||||
|
||||
except Exception as e:
|
||||
# Don't log trace errors to avoid recursion
|
||||
pass
|
||||
111
modules/workflows/processing/core/validator.py
Normal file
111
modules/workflows/processing/core/validator.py
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
# validator.py
|
||||
# Validation logic for workflows
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class WorkflowValidator:
|
||||
"""Handles validation of workflow components"""
|
||||
|
||||
def __init__(self, services):
|
||||
self.services = services
|
||||
|
||||
def validateTask(self, taskPlan: Dict[str, Any]) -> bool:
|
||||
"""Validate task plan structure"""
|
||||
try:
|
||||
if not isinstance(taskPlan, dict):
|
||||
logger.error("Task plan is not a dictionary")
|
||||
return False
|
||||
|
||||
if 'tasks' not in taskPlan or not isinstance(taskPlan['tasks'], list):
|
||||
logger.error(f"Task plan missing 'tasks' field or not a list. Found: {type(taskPlan.get('tasks', 'MISSING'))}")
|
||||
return False
|
||||
|
||||
# First pass: collect all task IDs to validate dependencies
|
||||
taskIds = set()
|
||||
for task in taskPlan['tasks']:
|
||||
if not isinstance(task, dict):
|
||||
logger.error(f"Task is not a dictionary: {type(task)}")
|
||||
return False
|
||||
if 'id' not in task:
|
||||
logger.error(f"Task missing 'id' field: {task}")
|
||||
return False
|
||||
taskIds.add(task['id'])
|
||||
|
||||
# Second pass: validate each task
|
||||
for i, task in enumerate(taskPlan['tasks']):
|
||||
if not isinstance(task, dict):
|
||||
logger.error(f"Task {i} is not a dictionary: {type(task)}")
|
||||
return False
|
||||
|
||||
requiredFields = ['id', 'objective', 'success_criteria']
|
||||
missingFields = [field for field in requiredFields if field not in task]
|
||||
if missingFields:
|
||||
logger.error(f"Task {i} missing required fields: {missingFields}")
|
||||
return False
|
||||
|
||||
# Check for duplicate IDs (shouldn't happen after first pass, but safety check)
|
||||
if task['id'] in taskIds and list(taskPlan['tasks']).count(task['id']) > 1:
|
||||
logger.error(f"Task {i} has duplicate ID: {task['id']}")
|
||||
return False
|
||||
|
||||
dependencies = task.get('dependencies', [])
|
||||
if not isinstance(dependencies, list):
|
||||
logger.error(f"Task {i} dependencies is not a list: {type(dependencies)}")
|
||||
return False
|
||||
|
||||
for dep in dependencies:
|
||||
if dep not in taskIds and dep != 'task_0':
|
||||
logger.error(f"Task {i} has invalid dependency: {dep} (available: {list(taskIds) + ['task_0']})")
|
||||
return False
|
||||
|
||||
logger.info(f"Task plan validation successful with {len(taskIds)} tasks")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating task plan: {str(e)}")
|
||||
return False
|
||||
|
||||
def validateAction(self, actions: List[Dict[str, Any]], context) -> bool:
|
||||
"""Validate action structure"""
|
||||
try:
|
||||
if not isinstance(actions, list):
|
||||
logger.error("Actions must be a list")
|
||||
return False
|
||||
if len(actions) == 0:
|
||||
logger.warning("No actions generated")
|
||||
return False
|
||||
for i, action in enumerate(actions):
|
||||
if not isinstance(action, dict):
|
||||
logger.error(f"Action {i} must be a dictionary")
|
||||
return False
|
||||
# Check for compound action format (new) or separate method/action format (old)
|
||||
if 'action' in action and '.' in str(action.get('action', '')):
|
||||
# New compound action format: "method.action"
|
||||
requiredFields = ['action', 'parameters', 'resultLabel']
|
||||
else:
|
||||
# Old separate format: method + action fields
|
||||
requiredFields = ['method', 'action', 'parameters', 'resultLabel']
|
||||
|
||||
missingFields = []
|
||||
for field in requiredFields:
|
||||
if field not in action or not action[field]:
|
||||
missingFields.append(field)
|
||||
if missingFields:
|
||||
logger.error(f"Action {i} missing required fields: {missingFields}")
|
||||
return False
|
||||
resultLabel = action.get('resultLabel', '')
|
||||
if not resultLabel.startswith('round'):
|
||||
logger.error(f"Action {i} result label must start with 'round': {resultLabel}")
|
||||
return False
|
||||
parameters = action.get('parameters', {})
|
||||
if not isinstance(parameters, dict):
|
||||
logger.error(f"Action {i} parameters must be a dictionary")
|
||||
return False
|
||||
logger.info(f"Successfully validated {len(actions)} actions")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating actions: {str(e)}")
|
||||
return False
|
||||
File diff suppressed because it is too large
Load diff
1
modules/workflows/processing/modes/__init__.py
Normal file
1
modules/workflows/processing/modes/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
# Workflow mode implementations
|
||||
833
modules/workflows/processing/modes/modeActionplan.py
Normal file
833
modules/workflows/processing/modes/modeActionplan.py
Normal file
|
|
@ -0,0 +1,833 @@
|
|||
# modeActionplan.py
|
||||
# Actionplan mode implementation for workflows
|
||||
|
||||
import json
|
||||
import logging
|
||||
import uuid
|
||||
from typing import List, Dict, Any
|
||||
from modules.datamodels.datamodelChat import (
|
||||
TaskStep, TaskContext, TaskResult, ActionItem, TaskStatus,
|
||||
ActionResult, ReviewResult, ReviewContext
|
||||
)
|
||||
from modules.datamodels.datamodelChat import ChatWorkflow
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
|
||||
from modules.workflows.processing.modes.modeBase import BaseMode
|
||||
from modules.workflows.processing.shared.executionState import TaskExecutionState
|
||||
from modules.workflows.processing.shared.promptGenerationActionsActionplan import (
|
||||
generateActionDefinitionPrompt,
|
||||
generateResultReviewPrompt
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ActionplanMode(BaseMode):
|
||||
"""Actionplan mode implementation - batch planning and sequential execution"""
|
||||
|
||||
def __init__(self, services, workflow):
|
||||
super().__init__(services, workflow)
|
||||
|
||||
async def generateActionItems(self, taskStep: TaskStep, workflow: ChatWorkflow,
|
||||
previousResults: List = None, enhancedContext: TaskContext = None) -> List[ActionItem]:
|
||||
"""Generate actions for a given task step using batch planning approach"""
|
||||
try:
|
||||
# Check workflow status before generating actions
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
retryInfo = f" (Retry #{enhancedContext.retry_count})" if enhancedContext and enhancedContext.retry_count > 0 else ""
|
||||
logger.info(f"Generating actions for task: {taskStep.objective}{retryInfo}")
|
||||
|
||||
# Log criteria progress if this is a retry
|
||||
if enhancedContext and hasattr(enhancedContext, 'criteria_progress') and enhancedContext.criteria_progress is not None:
|
||||
progress = enhancedContext.criteria_progress
|
||||
logger.info(f"Retry attempt {enhancedContext.retry_count} - Criteria progress:")
|
||||
if progress.get('met_criteria'):
|
||||
logger.info(f" Met criteria: {', '.join(progress['met_criteria'])}")
|
||||
if progress.get('unmet_criteria'):
|
||||
logger.warning(f" Unmet criteria: {', '.join(progress['unmet_criteria'])}")
|
||||
|
||||
# Show improvement trends
|
||||
if progress.get('attempt_history'):
|
||||
recentAttempts = progress['attempt_history'][-2:] # Last 2 attempts
|
||||
if len(recentAttempts) >= 2:
|
||||
prevScore = recentAttempts[0].get('quality_score', 0)
|
||||
currScore = recentAttempts[1].get('quality_score', 0)
|
||||
if currScore > prevScore:
|
||||
logger.info(f" Quality improving: {prevScore} -> {currScore}")
|
||||
elif currScore < prevScore:
|
||||
logger.warning(f" Quality declining: {prevScore} -> {currScore}")
|
||||
else:
|
||||
logger.info(f" Quality stable: {currScore}")
|
||||
|
||||
# Enhanced retry context logging
|
||||
if enhancedContext and enhancedContext.retry_count > 0:
|
||||
logger.info("=== RETRY CONTEXT FOR ACTION GENERATION ===")
|
||||
logger.info(f"Retry Count: {enhancedContext.retry_count}")
|
||||
logger.debug(f"Previous Improvements: {enhancedContext.improvements}")
|
||||
logger.debug(f"Previous Review Result: {enhancedContext.previous_review_result}")
|
||||
logger.debug(f"Failure Patterns: {enhancedContext.failure_patterns}")
|
||||
logger.debug(f"Failed Actions: {enhancedContext.failed_actions}")
|
||||
logger.debug(f"Successful Actions: {enhancedContext.successful_actions}")
|
||||
logger.info("=== END RETRY CONTEXT ===")
|
||||
|
||||
# Log that we're starting action generation
|
||||
logger.info("=== STARTING ACTION GENERATION ===")
|
||||
|
||||
# Create proper context object for action definition
|
||||
if enhancedContext and isinstance(enhancedContext, TaskContext):
|
||||
# Use existing TaskContext if provided
|
||||
actionContext = TaskContext(
|
||||
task_step=enhancedContext.task_step,
|
||||
workflow=enhancedContext.workflow,
|
||||
workflow_id=enhancedContext.workflow_id,
|
||||
available_documents=enhancedContext.available_documents,
|
||||
available_connections=enhancedContext.available_connections,
|
||||
previous_results=enhancedContext.previous_results or previousResults or [],
|
||||
previous_handover=enhancedContext.previous_handover,
|
||||
improvements=enhancedContext.improvements or [],
|
||||
retry_count=enhancedContext.retry_count or 0,
|
||||
previous_action_results=enhancedContext.previous_action_results or [],
|
||||
previous_review_result=enhancedContext.previous_review_result,
|
||||
is_regeneration=enhancedContext.is_regeneration or False,
|
||||
failure_patterns=enhancedContext.failure_patterns or [],
|
||||
failed_actions=enhancedContext.failed_actions or [],
|
||||
successful_actions=enhancedContext.successful_actions or [],
|
||||
criteria_progress=enhancedContext.criteria_progress
|
||||
)
|
||||
else:
|
||||
# Create new context from scratch
|
||||
actionContext = TaskContext(
|
||||
task_step=taskStep,
|
||||
workflow=workflow,
|
||||
workflow_id=workflow.id,
|
||||
available_documents=None,
|
||||
available_connections=None,
|
||||
previous_results=previousResults or [],
|
||||
previous_handover=None,
|
||||
improvements=[],
|
||||
retry_count=0,
|
||||
previous_action_results=[],
|
||||
previous_review_result=None,
|
||||
is_regeneration=False,
|
||||
failure_patterns=[],
|
||||
failed_actions=[],
|
||||
successful_actions=[],
|
||||
criteria_progress=None
|
||||
)
|
||||
|
||||
# Check workflow status before calling AI service
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
# Build prompt bundle (template + placeholders)
|
||||
bundle = generateActionDefinitionPrompt(self.services, actionContext)
|
||||
actionPromptTemplate = bundle.prompt
|
||||
placeholders = bundle.placeholders
|
||||
|
||||
# Trace action planning prompt
|
||||
self._writeTraceLog("Action Plan Prompt", actionPromptTemplate)
|
||||
self._writeTraceLog("Action Plan Placeholders", placeholders)
|
||||
|
||||
# Centralized AI call: Action planning (quality, detailed) with placeholders
|
||||
options = AiCallOptions(
|
||||
operationType=OperationType.GENERATE_PLAN,
|
||||
priority=Priority.QUALITY,
|
||||
compressPrompt=False,
|
||||
compressContext=False,
|
||||
processingMode=ProcessingMode.DETAILED,
|
||||
maxCost=0.10,
|
||||
maxProcessingTime=30
|
||||
)
|
||||
|
||||
prompt = await self.services.ai.callAi(prompt=actionPromptTemplate, placeholders=placeholders, options=options)
|
||||
|
||||
# Check if AI response is valid
|
||||
if not prompt:
|
||||
raise ValueError("AI service returned no response")
|
||||
|
||||
# Log action response received
|
||||
logger.info("=== ACTION PLAN AI RESPONSE RECEIVED ===")
|
||||
logger.info(f"Response length: {len(prompt) if prompt else 0}")
|
||||
# Trace action planning response
|
||||
self._writeTraceLog("Action Plan Response", prompt)
|
||||
|
||||
# Parse action response
|
||||
jsonStart = prompt.find('{')
|
||||
jsonEnd = prompt.rfind('}') + 1
|
||||
if jsonStart == -1 or jsonEnd == 0:
|
||||
raise ValueError("No JSON found in response")
|
||||
jsonStr = prompt[jsonStart:jsonEnd]
|
||||
|
||||
try:
|
||||
actionData = json.loads(jsonStr)
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing action response JSON: {str(e)}")
|
||||
actionData = {}
|
||||
|
||||
if 'actions' not in actionData:
|
||||
raise ValueError("Action response missing 'actions' field")
|
||||
|
||||
actions = actionData['actions']
|
||||
if not actions:
|
||||
raise ValueError("Action response contains empty actions list")
|
||||
|
||||
if not isinstance(actions, list):
|
||||
raise ValueError(f"Action response 'actions' field is not a list: {type(actions)}")
|
||||
|
||||
if not self.validator.validateAction(actions, actionContext):
|
||||
logger.error("Generated actions failed validation")
|
||||
raise Exception("AI-generated actions failed validation - AI is required for action generation")
|
||||
|
||||
# Convert to ActionItem objects
|
||||
taskActions = []
|
||||
for i, a in enumerate(actions):
|
||||
if not isinstance(a, dict):
|
||||
logger.warning(f"Skipping invalid action {i+1}: not a dictionary")
|
||||
continue
|
||||
|
||||
|
||||
# Handle compound action format (new) or separate method/action format (old)
|
||||
action_name = a.get('action', 'unknown')
|
||||
if '.' in action_name:
|
||||
# New compound action format: "method.action"
|
||||
method_name, action_name = action_name.split('.', 1)
|
||||
else:
|
||||
# Old separate format: method + action fields
|
||||
method_name = a.get('method', 'unknown')
|
||||
|
||||
taskAction = self._createActionItem({
|
||||
"execMethod": method_name,
|
||||
"execAction": action_name,
|
||||
"execParameters": a.get('parameters', {}),
|
||||
"execResultLabel": a.get('resultLabel', ''),
|
||||
"expectedDocumentFormats": a.get('expectedDocumentFormats', None),
|
||||
"status": TaskStatus.PENDING,
|
||||
# Extract user-friendly message if available
|
||||
"userMessage": a.get('userMessage', None)
|
||||
})
|
||||
|
||||
if taskAction:
|
||||
taskActions.append(taskAction)
|
||||
else:
|
||||
logger.warning(f"Skipping invalid action {i+1}: failed to create ActionItem")
|
||||
|
||||
validActions = [ta for ta in taskActions if ta]
|
||||
|
||||
if not validActions:
|
||||
raise ValueError("No valid actions could be created from AI response")
|
||||
|
||||
return validActions
|
||||
except Exception as e:
|
||||
logger.error(f"Error in generateActionItems: {str(e)}")
|
||||
return []
|
||||
|
||||
|
||||
async def executeTask(self, taskStep: TaskStep, workflow: ChatWorkflow, context: TaskContext,
|
||||
taskIndex: int = None, totalTasks: int = None) -> TaskResult:
|
||||
"""Execute all actions for a task step using Actionplan mode"""
|
||||
logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===")
|
||||
|
||||
# Update workflow object before executing task
|
||||
if taskIndex is not None:
|
||||
self._updateWorkflowBeforeExecutingTask(taskIndex)
|
||||
|
||||
# Update workflow context for this task
|
||||
if taskIndex is not None:
|
||||
self.services.workflow.setWorkflowContext(task_number=taskIndex)
|
||||
|
||||
# Create task start message
|
||||
await self.messageCreator.createTaskStartMessage(taskStep, workflow, taskIndex, totalTasks)
|
||||
|
||||
state = TaskExecutionState(taskStep)
|
||||
retryContext = context
|
||||
maxRetries = state.max_retries
|
||||
|
||||
for attempt in range(maxRetries):
|
||||
logger.info(f"Task execution attempt {attempt+1}/{maxRetries}")
|
||||
|
||||
# Check workflow status before starting task execution
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
# Update retry context with current attempt information
|
||||
if retryContext:
|
||||
retryContext.retry_count = attempt + 1
|
||||
|
||||
actions = await self.generateActionItems(taskStep, workflow,
|
||||
previousResults=retryContext.previous_results,
|
||||
enhancedContext=retryContext)
|
||||
|
||||
# Log total actions count for this task
|
||||
totalActions = len(actions) if actions else 0
|
||||
logger.info(f"Task {taskIndex or '?'} has {totalActions} actions")
|
||||
|
||||
# Update workflow object after action planning
|
||||
self._updateWorkflowAfterActionPlanning(totalActions)
|
||||
self._setWorkflowTotals(totalActions=totalActions)
|
||||
|
||||
if not actions:
|
||||
logger.error("No actions defined for task step, aborting task execution")
|
||||
break
|
||||
|
||||
actionResults = []
|
||||
for actionIdx, action in enumerate(actions):
|
||||
# Check workflow status before each action execution
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
# Update workflow object before executing action
|
||||
actionNumber = actionIdx + 1
|
||||
self._updateWorkflowBeforeExecutingAction(actionNumber)
|
||||
|
||||
# Update workflow context for this action
|
||||
self.services.workflow.setWorkflowContext(action_number=actionNumber)
|
||||
|
||||
# Log action start
|
||||
logger.info(f"Task {taskIndex} - Starting action {actionNumber}/{totalActions}")
|
||||
|
||||
# Create action start message
|
||||
actionStartMessage = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": f"⚡ **Action {actionNumber}/{totalActions}** (Method {action.execMethod}.{action.execAction})",
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||
"documentsLabel": f"action_{actionNumber}_start",
|
||||
"documents": [],
|
||||
"actionProgress": "running",
|
||||
"roundNumber": workflow.currentRound,
|
||||
"taskNumber": taskIndex,
|
||||
"actionNumber": actionNumber
|
||||
}
|
||||
|
||||
# Add user-friendly message if available
|
||||
if action.userMessage:
|
||||
actionStartMessage["message"] += f"\n\n💬 {action.userMessage}"
|
||||
|
||||
message = self.services.interfaceDbChat.createMessage(actionStartMessage)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Action start message created for action {actionNumber}")
|
||||
|
||||
# Execute single action
|
||||
result = await self.actionExecutor.executeSingleAction(action, workflow, taskStep,
|
||||
taskIndex, actionNumber, totalActions)
|
||||
actionResults.append(result)
|
||||
|
||||
if result.success:
|
||||
state.addSuccessfulAction(result)
|
||||
else:
|
||||
state.addFailedAction(result)
|
||||
|
||||
# Check workflow status before review
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
reviewResult = await self._reviewTaskCompletion(taskStep, actions, actionResults, workflow)
|
||||
success = reviewResult.status == 'success'
|
||||
feedback = reviewResult.reason
|
||||
error = None if success else reviewResult.reason
|
||||
|
||||
if success:
|
||||
logger.info(f"=== TASK {taskIndex or '?'} COMPLETED SUCCESSFULLY: {taskStep.objective} ===")
|
||||
|
||||
# Create task completion message
|
||||
await self.messageCreator.createTaskCompletionMessage(taskStep, workflow, taskIndex, totalTasks, reviewResult)
|
||||
|
||||
return TaskResult(
|
||||
taskId=taskStep.id,
|
||||
status=TaskStatus.COMPLETED,
|
||||
success=True,
|
||||
feedback=feedback,
|
||||
error=None
|
||||
)
|
||||
|
||||
elif reviewResult.status == 'retry' and state.canRetry():
|
||||
logger.warning(f"Task step '{taskStep.objective}' requires retry: {reviewResult.improvements}")
|
||||
|
||||
# Enhanced logging of criteria status
|
||||
if reviewResult.met_criteria:
|
||||
logger.info(f"Met criteria: {', '.join(reviewResult.met_criteria)}")
|
||||
if reviewResult.unmet_criteria:
|
||||
logger.warning(f"Unmet criteria: {', '.join(reviewResult.unmet_criteria)}")
|
||||
|
||||
state.incrementRetryCount()
|
||||
|
||||
# Update retry context with retry information and criteria tracking
|
||||
if retryContext:
|
||||
retryContext.retry_count = state.retry_count
|
||||
retryContext.improvements = reviewResult.improvements
|
||||
retryContext.previous_action_results = actionResults
|
||||
retryContext.previous_review_result = reviewResult
|
||||
retryContext.is_regeneration = True
|
||||
retryContext.failure_patterns = state.getFailurePatterns()
|
||||
retryContext.failed_actions = state.failed_actions
|
||||
retryContext.successful_actions = state.successful_actions
|
||||
|
||||
# Track criteria progress across retries
|
||||
if not hasattr(retryContext, 'criteria_progress'):
|
||||
retryContext.criteria_progress = {
|
||||
'met_criteria': set(),
|
||||
'unmet_criteria': set(),
|
||||
'attempt_history': []
|
||||
}
|
||||
|
||||
# Update criteria progress
|
||||
if reviewResult.met_criteria:
|
||||
retryContext.criteria_progress['met_criteria'].update(reviewResult.met_criteria)
|
||||
if reviewResult.unmet_criteria:
|
||||
retryContext.criteria_progress['unmet_criteria'].update(reviewResult.unmet_criteria)
|
||||
|
||||
# Record this attempt's criteria status
|
||||
attemptRecord = {
|
||||
'attempt': state.retry_count,
|
||||
'met_criteria': reviewResult.met_criteria or [],
|
||||
'unmet_criteria': reviewResult.unmet_criteria or [],
|
||||
'quality_score': reviewResult.quality_score,
|
||||
'improvements': reviewResult.improvements or []
|
||||
}
|
||||
retryContext.criteria_progress['attempt_history'].append(attemptRecord)
|
||||
|
||||
# Create retry message
|
||||
await self.messageCreator.createRetryMessage(taskStep, workflow, taskIndex, reviewResult)
|
||||
|
||||
continue
|
||||
else:
|
||||
logger.error(f"=== TASK {taskIndex or '?'} FAILED: {taskStep.objective} after {attempt+1} attempts ===")
|
||||
|
||||
# Create error message
|
||||
await self.messageCreator.createErrorMessage(taskStep, workflow, taskIndex, reviewResult.reason)
|
||||
|
||||
return TaskResult(
|
||||
taskId=taskStep.id,
|
||||
status=TaskStatus.FAILED,
|
||||
success=False,
|
||||
feedback=feedback,
|
||||
error=reviewResult.reason if reviewResult and hasattr(reviewResult, 'reason') else "Task failed after retry attempts"
|
||||
)
|
||||
|
||||
logger.error(f"=== TASK {taskIndex or '?'} FAILED AFTER ALL RETRIES: {taskStep.objective} ===")
|
||||
|
||||
# Create final error message
|
||||
await self.messageCreator.createErrorMessage(taskStep, workflow, taskIndex, "Task failed after all retries")
|
||||
|
||||
return TaskResult(
|
||||
taskId=taskStep.id,
|
||||
status=TaskStatus.FAILED,
|
||||
success=False,
|
||||
feedback="Task failed after all retries.",
|
||||
error="Task failed after all retries."
|
||||
)
|
||||
|
||||
async def _reviewTaskCompletion(self, taskStep: TaskStep, taskActions: List[ActionItem],
|
||||
actionResults: List[ActionResult], workflow: ChatWorkflow) -> ReviewResult:
|
||||
"""Review task completion and determine success/failure/retry"""
|
||||
try:
|
||||
# Check workflow status before reviewing task completion
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
logger.info(f"=== STARTING TASK COMPLETION REVIEW ===")
|
||||
logger.info(f"Task: {taskStep.objective}")
|
||||
logger.info(f"Actions executed: {len(taskActions) if taskActions else 0}")
|
||||
logger.info(f"Action results: {len(actionResults) if actionResults else 0}")
|
||||
|
||||
# Create proper context object for result review
|
||||
reviewContext = ReviewContext(
|
||||
task_step=taskStep,
|
||||
task_actions=taskActions,
|
||||
action_results=actionResults,
|
||||
step_result={
|
||||
'successful_actions': sum(1 for result in actionResults if result.success),
|
||||
'total_actions': len(actionResults),
|
||||
'results': [self._extractResultText(result) for result in actionResults if result.success],
|
||||
'errors': [result.error for result in actionResults if not result.success],
|
||||
'documents': [
|
||||
{
|
||||
'action_index': i,
|
||||
'documents_count': len(result.documents) if result.documents else 0,
|
||||
'documents': result.documents if result.documents else []
|
||||
}
|
||||
for i, result in enumerate(actionResults)
|
||||
]
|
||||
},
|
||||
workflow_id=workflow.id,
|
||||
previous_results=[]
|
||||
)
|
||||
|
||||
# Check workflow status before calling AI service
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
# Build prompt bundle for result review
|
||||
bundle = generateResultReviewPrompt(reviewContext)
|
||||
promptTemplate = bundle.prompt
|
||||
placeholders = bundle.placeholders
|
||||
|
||||
# Log result review prompt sent to AI
|
||||
logger.info("=== RESULT REVIEW PROMPT SENT TO AI ===")
|
||||
logger.info(f"Task: {taskStep.objective}")
|
||||
logger.info(f"Action Results Count: {len(reviewContext.action_results) if reviewContext.action_results else 0}")
|
||||
logger.info(f"Task Actions Count: {len(reviewContext.task_actions) if reviewContext.task_actions else 0}")
|
||||
# Trace result review prompt
|
||||
self._writeTraceLog("Result Review Prompt", promptTemplate)
|
||||
self._writeTraceLog("Result Review Placeholders", placeholders)
|
||||
|
||||
# Centralized AI call: Result validation (balanced analysis) with placeholders
|
||||
options = AiCallOptions(
|
||||
operationType=OperationType.ANALYSE_CONTENT,
|
||||
priority=Priority.BALANCED,
|
||||
compressPrompt=True,
|
||||
compressContext=False,
|
||||
processingMode=ProcessingMode.ADVANCED,
|
||||
maxCost=0.05,
|
||||
maxProcessingTime=30
|
||||
)
|
||||
|
||||
response = await self.services.ai.callAi(prompt=promptTemplate, placeholders=placeholders, options=options)
|
||||
|
||||
# Log result review response received
|
||||
logger.info("=== RESULT REVIEW AI RESPONSE RECEIVED ===")
|
||||
logger.info(f"Response length: {len(response) if response else 0}")
|
||||
# Trace result review response
|
||||
self._writeTraceLog("Result Review Response", response)
|
||||
|
||||
# Parse review response
|
||||
jsonStart = response.find('{')
|
||||
jsonEnd = response.rfind('}') + 1
|
||||
if jsonStart == -1 or jsonEnd == 0:
|
||||
raise ValueError("No JSON found in review response")
|
||||
jsonStr = response[jsonStart:jsonEnd]
|
||||
|
||||
try:
|
||||
review = json.loads(jsonStr)
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing review response JSON: {str(e)}")
|
||||
review = {}
|
||||
if 'status' not in review:
|
||||
raise ValueError("Review response missing 'status' field")
|
||||
review.setdefault('status', 'unknown')
|
||||
review.setdefault('reason', 'No reason provided')
|
||||
review.setdefault('quality_score', 5)
|
||||
|
||||
# Ensure improvements is a list
|
||||
improvements = review.get('improvements', [])
|
||||
if isinstance(improvements, str):
|
||||
# Split string into list if it's a single improvement
|
||||
improvements = [improvements.strip()] if improvements.strip() else []
|
||||
elif not isinstance(improvements, list):
|
||||
improvements = []
|
||||
|
||||
# Ensure all list fields are properly typed
|
||||
metCriteria = review.get('met_criteria', [])
|
||||
if not isinstance(metCriteria, list):
|
||||
metCriteria = []
|
||||
|
||||
unmetCriteria = review.get('unmet_criteria', [])
|
||||
if not isinstance(unmetCriteria, list):
|
||||
unmetCriteria = []
|
||||
|
||||
reviewResult = ReviewResult(
|
||||
status=review.get('status', 'unknown'),
|
||||
reason=review.get('reason', 'No reason provided'),
|
||||
improvements=improvements,
|
||||
quality_score=review.get('quality_score', 5),
|
||||
missing_outputs=[],
|
||||
met_criteria=metCriteria,
|
||||
unmet_criteria=unmetCriteria,
|
||||
confidence=review.get('confidence', 0.5),
|
||||
# Extract user-friendly message if available
|
||||
userMessage=review.get('userMessage', None)
|
||||
)
|
||||
|
||||
# Enhanced validation logging
|
||||
logger.info(f"VALIDATION RESULT - Task: '{taskStep.objective}' - Status: {reviewResult.status.upper()}, Quality: {reviewResult.quality_score}/10")
|
||||
if reviewResult.status == 'success':
|
||||
logger.info(f"VALIDATION SUCCESS - Task completed successfully")
|
||||
if reviewResult.met_criteria:
|
||||
logger.info(f"Met criteria: {', '.join(reviewResult.met_criteria)}")
|
||||
elif reviewResult.status == 'retry':
|
||||
logger.warning(f"VALIDATION RETRY - Task requires retry: {reviewResult.improvements}")
|
||||
if reviewResult.unmet_criteria:
|
||||
logger.warning(f"Unmet criteria: {', '.join(reviewResult.unmet_criteria)}")
|
||||
else:
|
||||
logger.error(f"VALIDATION FAILED - Task failed: {reviewResult.reason}")
|
||||
|
||||
logger.info(f"=== TASK COMPLETION REVIEW FINISHED ===")
|
||||
logger.info(f"Final Status: {reviewResult.status}")
|
||||
logger.info(f"Quality Score: {reviewResult.quality_score}/10")
|
||||
logger.info(f"Improvements: {reviewResult.improvements}")
|
||||
logger.info("=== END REVIEW ===")
|
||||
|
||||
return reviewResult
|
||||
except Exception as e:
|
||||
logger.error(f"Error in reviewTaskCompletion: {str(e)}")
|
||||
return ReviewResult(
|
||||
status='failed',
|
||||
reason=str(e),
|
||||
quality_score=0
|
||||
)
|
||||
|
||||
def _createActionItem(self, actionData: Dict[str, Any]) -> ActionItem:
|
||||
"""Creates a new task action"""
|
||||
try:
|
||||
# Ensure ID is present
|
||||
if "id" not in actionData or not actionData["id"]:
|
||||
actionData["id"] = f"action_{uuid.uuid4()}"
|
||||
|
||||
# Ensure required fields
|
||||
if "status" not in actionData:
|
||||
actionData["status"] = TaskStatus.PENDING
|
||||
|
||||
if "execMethod" not in actionData:
|
||||
logger.error("execMethod is required for task action")
|
||||
return None
|
||||
|
||||
if "execAction" not in actionData:
|
||||
logger.error("execAction is required for task action")
|
||||
return None
|
||||
|
||||
if "execParameters" not in actionData:
|
||||
actionData["execParameters"] = {}
|
||||
|
||||
# Use generic field separation based on ActionItem model
|
||||
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
||||
|
||||
# Create action in database
|
||||
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||
|
||||
# Convert to ActionItem model
|
||||
return ActionItem(
|
||||
id=createdAction["id"],
|
||||
execMethod=createdAction["execMethod"],
|
||||
execAction=createdAction["execAction"],
|
||||
execParameters=createdAction.get("execParameters", {}),
|
||||
execResultLabel=createdAction.get("execResultLabel"),
|
||||
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
|
||||
status=createdAction.get("status", TaskStatus.PENDING),
|
||||
error=createdAction.get("error"),
|
||||
retryCount=createdAction.get("retryCount", 0),
|
||||
retryMax=createdAction.get("retryMax", 3),
|
||||
processingTime=createdAction.get("processingTime"),
|
||||
timestamp=float(createdAction.get("timestamp", self.services.utils.getUtcTimestamp())),
|
||||
result=createdAction.get("result"),
|
||||
resultDocuments=createdAction.get("resultDocuments", []),
|
||||
userMessage=createdAction.get("userMessage")
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating task action: {str(e)}")
|
||||
return None
|
||||
|
||||
def _extractResultText(self, result: ActionResult) -> str:
|
||||
"""Extract result text from ActionResult documents"""
|
||||
if not result.success or not result.documents:
|
||||
return ""
|
||||
|
||||
# Extract text directly from ActionDocument objects
|
||||
resultParts = []
|
||||
for doc in result.documents:
|
||||
if hasattr(doc, 'documentData') and doc.documentData:
|
||||
resultParts.append(str(doc.documentData))
|
||||
|
||||
# Join all document results with separators
|
||||
return "\n\n---\n\n".join(resultParts) if resultParts else ""
|
||||
|
||||
def _updateWorkflowBeforeExecutingTask(self, taskNumber: int):
|
||||
"""Update workflow object before executing a task"""
|
||||
try:
|
||||
updateData = {
|
||||
"currentTask": taskNumber,
|
||||
"currentAction": 0,
|
||||
"totalActions": 0
|
||||
}
|
||||
|
||||
# Update workflow object
|
||||
self.workflow.currentTask = taskNumber
|
||||
self.workflow.currentAction = 0
|
||||
self.workflow.totalActions = 0
|
||||
|
||||
# Update in database
|
||||
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||
logger.info(f"Updated workflow {self.workflow.id} before executing task {taskNumber}: {updateData}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating workflow before executing task: {str(e)}")
|
||||
|
||||
def _updateWorkflowAfterActionPlanning(self, totalActions: int):
|
||||
"""Update workflow object after action planning for current task"""
|
||||
try:
|
||||
updateData = {
|
||||
"totalActions": totalActions
|
||||
}
|
||||
|
||||
# Update workflow object
|
||||
self.workflow.totalActions = totalActions
|
||||
|
||||
# Update in database
|
||||
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||
logger.info(f"Updated workflow {self.workflow.id} after action planning: {updateData}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating workflow after action planning: {str(e)}")
|
||||
|
||||
def _updateWorkflowBeforeExecutingAction(self, actionNumber: int):
|
||||
"""Update workflow object before executing an action"""
|
||||
try:
|
||||
updateData = {
|
||||
"currentAction": actionNumber
|
||||
}
|
||||
|
||||
# Update workflow object
|
||||
self.workflow.currentAction = actionNumber
|
||||
|
||||
# Update in database
|
||||
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||
logger.info(f"Updated workflow {self.workflow.id} before executing action {actionNumber}: {updateData}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating workflow before executing action: {str(e)}")
|
||||
|
||||
def _setWorkflowTotals(self, totalTasks: int = None, totalActions: int = None):
|
||||
"""Set total counts for workflow progress tracking and update database"""
|
||||
try:
|
||||
updateData = {}
|
||||
|
||||
if totalTasks is not None:
|
||||
self.workflow.totalTasks = totalTasks
|
||||
updateData["totalTasks"] = totalTasks
|
||||
|
||||
if totalActions is not None:
|
||||
self.workflow.totalActions = totalActions
|
||||
updateData["totalActions"] = totalActions
|
||||
|
||||
# Update workflow object in database if we have changes
|
||||
if updateData:
|
||||
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||
logger.info(f"Updated workflow {self.workflow.id} totals in database: {updateData}")
|
||||
|
||||
logger.debug(f"Updated workflow totals: Tasks {self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 'N/A'}, Actions {self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 'N/A'}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting workflow totals: {str(e)}")
|
||||
|
||||
def _createActionItem(self, actionData: Dict[str, Any]) -> ActionItem:
|
||||
"""Creates a new task action"""
|
||||
try:
|
||||
import uuid
|
||||
|
||||
# Ensure ID is present
|
||||
if "id" not in actionData or not actionData["id"]:
|
||||
actionData["id"] = f"action_{uuid.uuid4()}"
|
||||
|
||||
# Ensure required fields
|
||||
if "status" not in actionData:
|
||||
actionData["status"] = TaskStatus.PENDING
|
||||
|
||||
if "execMethod" not in actionData:
|
||||
logger.error("execMethod is required for task action")
|
||||
return None
|
||||
|
||||
if "execAction" not in actionData:
|
||||
logger.error("execAction is required for task action")
|
||||
return None
|
||||
|
||||
if "execParameters" not in actionData:
|
||||
actionData["execParameters"] = {}
|
||||
|
||||
# Use generic field separation based on ActionItem model
|
||||
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
||||
|
||||
# Create action in database
|
||||
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||
|
||||
# Convert to ActionItem model
|
||||
return ActionItem(
|
||||
id=createdAction["id"],
|
||||
execMethod=createdAction["execMethod"],
|
||||
execAction=createdAction["execAction"],
|
||||
execParameters=createdAction.get("execParameters", {}),
|
||||
execResultLabel=createdAction.get("execResultLabel"),
|
||||
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
|
||||
status=createdAction.get("status", TaskStatus.PENDING),
|
||||
error=createdAction.get("error"),
|
||||
retryCount=createdAction.get("retryCount", 0),
|
||||
retryMax=createdAction.get("retryMax", 3),
|
||||
processingTime=createdAction.get("processingTime"),
|
||||
timestamp=float(createdAction.get("timestamp", self.services.utils.getUtcTimestamp())),
|
||||
result=createdAction.get("result"),
|
||||
resultDocuments=createdAction.get("resultDocuments", []),
|
||||
userMessage=createdAction.get("userMessage")
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating task action: {str(e)}")
|
||||
return None
|
||||
|
||||
def _writeTraceLog(self, contextText: str, data: Any) -> None:
|
||||
"""Write trace data to configured trace file if in debug mode with improved JSON formatting"""
|
||||
try:
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime, UTC
|
||||
|
||||
# Only write if logger is in debug mode
|
||||
if logger.level > logging.DEBUG:
|
||||
return
|
||||
|
||||
# Get log directory from configuration
|
||||
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
|
||||
if not os.path.isabs(logDir):
|
||||
# If relative path, make it relative to the gateway directory
|
||||
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
logDir = os.path.join(gatewayDir, logDir)
|
||||
|
||||
# Ensure log directory exists
|
||||
os.makedirs(logDir, exist_ok=True)
|
||||
|
||||
# Create trace file path
|
||||
traceFile = os.path.join(logDir, "log_trace.log")
|
||||
|
||||
# Format the trace entry with better structure
|
||||
timestamp = datetime.fromtimestamp(self.services.utils.getUtcTimestamp(), UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||
|
||||
# Create a structured trace entry
|
||||
traceEntry = f"[{timestamp}] {contextText}\n"
|
||||
traceEntry += "=" * 80 + "\n"
|
||||
|
||||
# Add data if provided with improved formatting
|
||||
if data is not None:
|
||||
try:
|
||||
if isinstance(data, (dict, list)):
|
||||
# Format as pretty JSON with better settings
|
||||
jsonStr = json.dumps(data, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||
traceEntry += f"JSON Data:\n{jsonStr}\n"
|
||||
elif isinstance(data, str):
|
||||
# For string data, try to parse as JSON first, then fall back to plain text
|
||||
try:
|
||||
parsed = json.loads(data)
|
||||
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||
traceEntry += f"JSON Data (parsed from string):\n{jsonStr}\n"
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# Not valid JSON, show as plain text with proper line breaks
|
||||
formatted_data = data.replace('\\n', '\n')
|
||||
traceEntry += f"Text Data:\n{formatted_data}\n"
|
||||
else:
|
||||
# For other types, convert to string and try to parse as JSON
|
||||
dataStr = str(data)
|
||||
try:
|
||||
parsed = json.loads(dataStr)
|
||||
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||
traceEntry += f"JSON Data (parsed from object):\n{jsonStr}\n"
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# Not valid JSON, show as plain text with proper line breaks
|
||||
formatted_data = dataStr.replace('\\n', '\n')
|
||||
traceEntry += f"Object Data:\n{formatted_data}\n"
|
||||
except Exception as e:
|
||||
# Fallback to simple string representation
|
||||
traceEntry += f"Data (fallback): {str(data)}\n"
|
||||
else:
|
||||
traceEntry += "No data provided\n"
|
||||
|
||||
traceEntry += "=" * 80 + "\n\n"
|
||||
|
||||
# Write to trace file
|
||||
with open(traceFile, "a", encoding="utf-8") as f:
|
||||
f.write(traceEntry)
|
||||
|
||||
except Exception as e:
|
||||
# Don't log trace errors to avoid recursion
|
||||
pass
|
||||
63
modules/workflows/processing/modes/modeBase.py
Normal file
63
modules/workflows/processing/modes/modeBase.py
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
# modeBase.py
|
||||
# Abstract base class for workflow modes
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
import logging
|
||||
from typing import List, Dict, Any
|
||||
from modules.datamodels.datamodelChat import TaskStep, TaskContext, TaskResult, ActionItem
|
||||
from modules.datamodels.datamodelChat import ChatWorkflow
|
||||
from modules.workflows.processing.core.taskPlanner import TaskPlanner
|
||||
from modules.workflows.processing.core.actionExecutor import ActionExecutor
|
||||
from modules.workflows.processing.core.messageCreator import MessageCreator
|
||||
from modules.workflows.processing.core.validator import WorkflowValidator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class BaseMode(ABC):
|
||||
"""Abstract base class for workflow execution modes"""
|
||||
|
||||
def __init__(self, services, workflow):
|
||||
self.services = services
|
||||
self.workflow = workflow
|
||||
self.taskPlanner = TaskPlanner(services)
|
||||
self.actionExecutor = ActionExecutor(services)
|
||||
self.messageCreator = MessageCreator(services)
|
||||
self.validator = WorkflowValidator(services)
|
||||
|
||||
def _checkWorkflowStopped(self, workflow):
|
||||
"""Check if workflow has been stopped by user and raise exception if so"""
|
||||
try:
|
||||
# Get the current workflow status from the database to avoid stale data
|
||||
current_workflow = self.services.interfaceDbChat.getWorkflow(workflow.id)
|
||||
if current_workflow and current_workflow.status == "stopped":
|
||||
logger.info("Workflow stopped by user, aborting execution")
|
||||
raise Exception("Workflow was stopped by user")
|
||||
except Exception as e:
|
||||
# If this was the explicit stop signal, re-raise to abort immediately
|
||||
if str(e) == "Workflow was stopped by user":
|
||||
raise
|
||||
# If we can't get the current status due to other database issues, fall back to the in-memory object
|
||||
logger.warning(f"Could not check current workflow status from database: {str(e)}")
|
||||
if workflow and workflow.status == "stopped":
|
||||
logger.info("Workflow stopped by user (from in-memory object), aborting execution")
|
||||
raise Exception("Workflow was stopped by user")
|
||||
|
||||
@abstractmethod
|
||||
async def executeTask(self, taskStep: TaskStep, workflow: ChatWorkflow, context: TaskContext,
|
||||
taskIndex: int = None, totalTasks: int = None) -> TaskResult:
|
||||
"""Execute a task step - must be implemented by concrete modes"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def generateActionItems(self, taskStep: TaskStep, workflow: ChatWorkflow,
|
||||
previousResults: List = None, enhancedContext: TaskContext = None) -> List[ActionItem]:
|
||||
"""Generate actions for a task step - must be implemented by concrete modes"""
|
||||
pass
|
||||
|
||||
async def generateTaskPlan(self, userInput: str, workflow: ChatWorkflow):
|
||||
"""Generate task plan - common to all modes"""
|
||||
return await self.taskPlanner.generateTaskPlan(userInput, workflow)
|
||||
|
||||
async def createTaskPlanMessage(self, taskPlan, workflow: ChatWorkflow):
|
||||
"""Create task plan message - common to all modes"""
|
||||
return await self.messageCreator.createTaskPlanMessage(taskPlan, workflow)
|
||||
938
modules/workflows/processing/modes/modeReact.py
Normal file
938
modules/workflows/processing/modes/modeReact.py
Normal file
|
|
@ -0,0 +1,938 @@
|
|||
# modeReact.py
|
||||
# React mode implementation for workflows
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import List, Dict, Any
|
||||
from modules.datamodels.datamodelChat import (
|
||||
TaskStep, TaskContext, TaskResult, ActionItem, TaskStatus,
|
||||
ActionResult
|
||||
)
|
||||
from modules.datamodels.datamodelChat import ChatWorkflow
|
||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, ProcessingMode, Priority
|
||||
from modules.workflows.processing.modes.modeBase import BaseMode
|
||||
from modules.workflows.processing.shared.executionState import TaskExecutionState, shouldContinue
|
||||
from modules.workflows.processing.shared.promptGenerationActionsReact import (
|
||||
generateReactPlanSelectionPrompt,
|
||||
generateReactParametersPrompt,
|
||||
generateReactRefinementPrompt
|
||||
)
|
||||
from modules.workflows.processing.shared.placeholderFactory import extractReviewContent
|
||||
from modules.workflows.processing.adaptive import IntentAnalyzer, ContentValidator, LearningEngine, ProgressTracker
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ReactMode(BaseMode):
|
||||
"""React mode implementation - iterative plan-act-observe-refine loop"""
|
||||
|
||||
def __init__(self, services, workflow):
|
||||
super().__init__(services, workflow)
|
||||
# Initialize adaptive components
|
||||
self.intentAnalyzer = IntentAnalyzer()
|
||||
self.contentValidator = ContentValidator()
|
||||
self.learningEngine = LearningEngine()
|
||||
self.progressTracker = ProgressTracker()
|
||||
self.currentIntent = None
|
||||
# Placeholder service no longer used; prompts are generated directly
|
||||
|
||||
async def generateActionItems(self, taskStep: TaskStep, workflow: ChatWorkflow,
|
||||
previousResults: List = None, enhancedContext: TaskContext = None) -> List[ActionItem]:
|
||||
"""React mode doesn't use batch action generation - actions are generated iteratively"""
|
||||
# React mode generates actions one at a time in the execution loop
|
||||
return []
|
||||
|
||||
async def executeTask(self, taskStep: TaskStep, workflow: ChatWorkflow, context: TaskContext,
|
||||
taskIndex: int = None, totalTasks: int = None) -> TaskResult:
|
||||
"""Execute task using React mode - iterative plan-act-observe-refine loop"""
|
||||
logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===")
|
||||
|
||||
# NEW: Analyze user intent with both original prompt and task objective
|
||||
# Get original user prompt from services (clean and reliable)
|
||||
original_prompt = self.services.currentUserPrompt if self.services and hasattr(self.services, 'currentUserPrompt') else taskStep.objective
|
||||
combined_context = f"Original request: {original_prompt}\n\nCurrent task: {taskStep.objective}"
|
||||
|
||||
self.currentIntent = self.intentAnalyzer.analyzeUserIntent(combined_context, context)
|
||||
logger.info(f"Intent analysis (original + task): {self.currentIntent}")
|
||||
|
||||
# NEW: Reset progress tracking for new task
|
||||
self.progressTracker.reset()
|
||||
|
||||
# Update workflow object before executing task
|
||||
if taskIndex is not None:
|
||||
self._updateWorkflowBeforeExecutingTask(taskIndex)
|
||||
|
||||
# Update workflow context for this task
|
||||
if taskIndex is not None:
|
||||
self.services.workflow.setWorkflowContext(task_number=taskIndex)
|
||||
|
||||
# Create task start message
|
||||
await self.messageCreator.createTaskStartMessage(taskStep, workflow, taskIndex, totalTasks)
|
||||
|
||||
state = TaskExecutionState(taskStep)
|
||||
# React mode uses max_steps instead of max_retries
|
||||
state.max_steps = max(1, int(getattr(workflow, 'maxSteps', 5)))
|
||||
logger.info(f"Using React mode execution with max_steps: {state.max_steps}")
|
||||
|
||||
step = 1
|
||||
lastReviewDict = None
|
||||
|
||||
while step <= state.max_steps:
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
# Update workflow[currentAction] for UI
|
||||
self._updateWorkflowBeforeExecutingAction(step)
|
||||
self.services.workflow.setWorkflowContext(action_number=step)
|
||||
|
||||
try:
|
||||
t0 = time.time()
|
||||
selection = await self._planSelect(context)
|
||||
logger.info(f"React step {step}: Selected action: {selection}")
|
||||
|
||||
# Create user-friendly message BEFORE action execution
|
||||
# Action intention message is now handled by the standard message creator in _actExecute
|
||||
|
||||
result = await self._actExecute(context, selection, taskStep, workflow, step)
|
||||
observation = self._observeBuild(result)
|
||||
# Attach deterministic label for clarity
|
||||
observation['resultLabel'] = result.resultLabel
|
||||
|
||||
# NEW: Add content validation
|
||||
if self.currentIntent and result.documents:
|
||||
validationResult = self.contentValidator.validateContent(result.documents, self.currentIntent)
|
||||
observation['contentValidation'] = validationResult
|
||||
logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {validationResult['qualityScore']:.2f})")
|
||||
|
||||
# NEW: Learn from feedback
|
||||
feedback = self._collectFeedback(result, validationResult, self.currentIntent)
|
||||
self.learningEngine.learnFromFeedback(feedback, context, self.currentIntent)
|
||||
|
||||
# NEW: Update progress
|
||||
self.progressTracker.updateProgress(result, validationResult, self.currentIntent)
|
||||
|
||||
decision = await self._refineDecide(context, observation)
|
||||
|
||||
# Store refinement decision in context for next iteration
|
||||
if not hasattr(context, 'previous_review_result') or context.previous_review_result is None:
|
||||
context.previous_review_result = []
|
||||
if decision: # Only append if decision is not None
|
||||
context.previous_review_result.append(decision)
|
||||
|
||||
# Update context with learnings from this step
|
||||
if decision and decision.get('reason'):
|
||||
if not hasattr(context, 'improvements'):
|
||||
context.improvements = []
|
||||
context.improvements.append(f"Step {step}: {decision.get('reason')}")
|
||||
|
||||
# Telemetry: simple duration per step
|
||||
duration = time.time() - t0
|
||||
self.services.interfaceDbChat.createLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"react_step_duration_sec={duration:.3f}",
|
||||
"type": "info"
|
||||
})
|
||||
lastReviewDict = decision
|
||||
|
||||
# Create user-friendly message AFTER action execution
|
||||
# Action completion message is now handled by the standard message creator in _actExecute
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"React step {step} error: {e}")
|
||||
break
|
||||
|
||||
# NEW: Use adaptive stopping logic
|
||||
progressState = self.progressTracker.getCurrentProgress()
|
||||
continueByProgress = self.progressTracker.shouldContinue(progressState, observation.get('contentValidation', {}))
|
||||
continueByReview = shouldContinue(observation, lastReviewDict, step, state.max_steps)
|
||||
|
||||
if not continueByProgress or not continueByReview:
|
||||
logger.info(f"Stopping at step {step}: progress={continueByProgress}, review={continueByReview}")
|
||||
break
|
||||
step += 1
|
||||
|
||||
# Summarize task result for react mode
|
||||
status = TaskStatus.COMPLETED
|
||||
success = True
|
||||
feedback = lastReviewDict.get('reason') if lastReviewDict and isinstance(lastReviewDict, dict) else 'Completed'
|
||||
if lastReviewDict and isinstance(lastReviewDict, dict) and lastReviewDict.get('decision') == 'stop':
|
||||
success = True
|
||||
|
||||
# Create task completion message
|
||||
await self.messageCreator.createTaskCompletionMessage(taskStep, workflow, taskIndex, totalTasks,
|
||||
type('ReviewResult', (), {'reason': feedback, 'met_criteria': [], 'quality_score': 8})())
|
||||
|
||||
return TaskResult(
|
||||
taskId=taskStep.id,
|
||||
status=status,
|
||||
success=success,
|
||||
feedback=feedback,
|
||||
error=None if success else feedback
|
||||
)
|
||||
|
||||
async def _planSelect(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""Plan: select exactly one action. Returns {"action": {method, name}}"""
|
||||
bundle = generateReactPlanSelectionPrompt(self.services, context)
|
||||
promptTemplate = bundle.prompt
|
||||
placeholders = bundle.placeholders
|
||||
|
||||
self._writeTraceLog("React Plan Selection Prompt", promptTemplate)
|
||||
self._writeTraceLog("React Plan Selection Placeholders", placeholders)
|
||||
|
||||
# Centralized AI call for plan selection (use plan generation quality)
|
||||
options = AiCallOptions(
|
||||
operationType=OperationType.GENERATE_PLAN,
|
||||
priority=Priority.QUALITY,
|
||||
compressPrompt=False,
|
||||
compressContext=False,
|
||||
processingMode=ProcessingMode.DETAILED,
|
||||
maxCost=0.10,
|
||||
maxProcessingTime=30
|
||||
)
|
||||
|
||||
response = await self.services.ai.callAi(
|
||||
prompt=promptTemplate,
|
||||
placeholders=placeholders,
|
||||
options=options
|
||||
)
|
||||
self._writeTraceLog("React Plan Selection Response", response)
|
||||
jsonStart = response.find('{') if response else -1
|
||||
jsonEnd = response.rfind('}') + 1 if response else 0
|
||||
if jsonStart == -1 or jsonEnd == 0:
|
||||
raise ValueError("No JSON in selection response")
|
||||
selection = json.loads(response[jsonStart:jsonEnd])
|
||||
if 'action' not in selection or not isinstance(selection['action'], str):
|
||||
raise ValueError("Selection missing 'action' as string")
|
||||
# Enforce spec: Stage 1 must NOT include 'parameters'
|
||||
if 'parameters' in selection:
|
||||
# Remove to avoid accidental carryover
|
||||
try:
|
||||
del selection['parameters']
|
||||
except Exception:
|
||||
selection['parameters'] = None
|
||||
return selection
|
||||
|
||||
async def _actExecute(self, context: TaskContext, selection: Dict[str, Any], taskStep: TaskStep,
|
||||
workflow: ChatWorkflow, stepIndex: int) -> ActionResult:
|
||||
"""Act: request minimal parameters then execute selected action"""
|
||||
compoundActionName = selection.get('action', '')
|
||||
|
||||
# Parse compound action name (e.g., "ai.webResearch" -> method="ai", action="webResearch")
|
||||
if '.' not in compoundActionName:
|
||||
raise ValueError(f"Invalid compound action name: {compoundActionName}. Expected format: method.action")
|
||||
|
||||
methodName, actionName = compoundActionName.split('.', 1)
|
||||
|
||||
# Always request parameters in Stage 2 (spec: Stage 1 must not provide them)
|
||||
logger.info("Requesting parameters in Stage 2 based on Stage 1 outputs")
|
||||
|
||||
# Create a permissive Stage 2 context to avoid TaskContext attribute restrictions
|
||||
from types import SimpleNamespace
|
||||
stage2Context = SimpleNamespace()
|
||||
|
||||
# Copy essential fields from original context for fallbacks (snake_case for placeholderFactory compatibility)
|
||||
stage2Context.task_step = getattr(context, 'task_step', None)
|
||||
stage2Context.workflow_id = getattr(context, 'workflow_id', None)
|
||||
|
||||
# Set Stage 1 data directly on the permissive context (snake_case for promptGenerationActionsReact compatibility)
|
||||
if isinstance(selection, dict):
|
||||
stage2Context.action_objective = selection.get('actionObjective', '')
|
||||
stage2Context.parameters_context = selection.get('parametersContext', '')
|
||||
stage2Context.learnings = selection.get('learnings', [])
|
||||
else:
|
||||
stage2Context.action_objective = ''
|
||||
stage2Context.parameters_context = ''
|
||||
stage2Context.learnings = []
|
||||
|
||||
# Build and send the Stage 2 parameters prompt (always)
|
||||
bundle = generateReactParametersPrompt(self.services, stage2Context, compoundActionName)
|
||||
promptTemplate = bundle.prompt
|
||||
placeholders = bundle.placeholders
|
||||
|
||||
self._writeTraceLog("React Parameters Prompt", promptTemplate)
|
||||
self._writeTraceLog("React Parameters Placeholders", placeholders)
|
||||
|
||||
# Centralized AI call for parameter suggestion (balanced analysis)
|
||||
options = AiCallOptions(
|
||||
operationType=OperationType.ANALYSE_CONTENT,
|
||||
priority=Priority.BALANCED,
|
||||
compressPrompt=True,
|
||||
compressContext=False,
|
||||
processingMode=ProcessingMode.ADVANCED,
|
||||
maxCost=0.05,
|
||||
maxProcessingTime=30,
|
||||
temperature=0.3, # Slightly higher temperature for better instruction following
|
||||
# maxTokens not set - use model's maximum for big JSON responses
|
||||
resultFormat="json" # Explicitly request JSON format
|
||||
)
|
||||
|
||||
paramsResp = await self.services.ai.callAi(
|
||||
prompt=promptTemplate,
|
||||
placeholders=placeholders,
|
||||
options=options
|
||||
)
|
||||
# Parse JSON response
|
||||
js = paramsResp[paramsResp.find('{'):paramsResp.rfind('}')+1] if paramsResp else '{}'
|
||||
try:
|
||||
paramObj = json.loads(js)
|
||||
parameters = paramObj.get('parameters', {}) if isinstance(paramObj, dict) else {}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse AI parameters response as JSON: {str(e)}")
|
||||
logger.error(f"Response was: {paramsResp}")
|
||||
parameters = {}
|
||||
|
||||
# Merge Stage 1 resource selections into Stage 2 parameters (only if action expects them)
|
||||
try:
|
||||
requiredDocs = selection.get('requiredInputDocuments')
|
||||
if requiredDocs:
|
||||
# Ensure list
|
||||
if isinstance(requiredDocs, list):
|
||||
# Only attach if target action defines 'documentList'
|
||||
methodName, actionName = compoundActionName.split('.', 1)
|
||||
from modules.workflows.processing.shared.methodDiscovery import getActionParameterList, methods as _methods
|
||||
expectedParams = getActionParameterList(methodName, actionName, _methods)
|
||||
if 'documentList' in expectedParams:
|
||||
parameters['documentList'] = requiredDocs
|
||||
requiredConn = selection.get('requiredConnection')
|
||||
if requiredConn:
|
||||
# Only attach if target action defines 'connectionReference'
|
||||
methodName, actionName = compoundActionName.split('.', 1)
|
||||
from modules.workflows.processing.shared.methodDiscovery import getActionParameterList, methods as _methods
|
||||
expectedParams = getActionParameterList(methodName, actionName, _methods)
|
||||
if 'connectionReference' in expectedParams:
|
||||
parameters['connectionReference'] = requiredConn
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Apply minimal defaults in-code (language)
|
||||
if 'language' not in parameters and hasattr(self.services, 'user') and getattr(self.services.user, 'language', None):
|
||||
parameters['language'] = self.services.user.language
|
||||
|
||||
# Write merged parameters to trace BEFORE continuing
|
||||
try:
|
||||
mergedParamObj = {
|
||||
"schema": (paramObj.get('schema') if isinstance(paramObj, dict) else 'parameters_v1'),
|
||||
"parameters": parameters
|
||||
}
|
||||
self._writeTraceLog("React Parameters Response", mergedParamObj)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Build a synthetic ActionItem for execution routing and labels
|
||||
currentRound = getattr(self.workflow, 'currentRound', 0)
|
||||
currentTask = getattr(self.workflow, 'currentTask', 0)
|
||||
resultLabel = f"round{currentRound}_task{currentTask}_action{stepIndex}_results"
|
||||
|
||||
taskAction = self._createActionItem({
|
||||
"execMethod": methodName,
|
||||
"execAction": actionName,
|
||||
"execParameters": parameters,
|
||||
"execResultLabel": resultLabel,
|
||||
"status": TaskStatus.PENDING
|
||||
})
|
||||
|
||||
# Execute using existing single action flow (message creation is handled internally)
|
||||
result = await self.actionExecutor.executeSingleAction(taskAction, workflow, taskStep, currentTask, stepIndex, 1)
|
||||
|
||||
return result
|
||||
|
||||
def _observeBuild(self, actionResult: ActionResult) -> Dict[str, Any]:
|
||||
"""Observe: build compact observation object from ActionResult with full document metadata"""
|
||||
previews = []
|
||||
notes = []
|
||||
if actionResult and actionResult.documents:
|
||||
# Process all documents and show full metadata
|
||||
for doc in actionResult.documents:
|
||||
# Extract all available metadata without content
|
||||
docMetadata = {
|
||||
"name": getattr(doc, 'fileName', None) or getattr(doc, 'documentName', 'Unknown'),
|
||||
"mimeType": getattr(doc, 'mimeType', 'Unknown'),
|
||||
"size": getattr(doc, 'size', 'Unknown'),
|
||||
"created": getattr(doc, 'created', 'Unknown'),
|
||||
"modified": getattr(doc, 'modified', 'Unknown'),
|
||||
"typeGroup": getattr(doc, 'typeGroup', 'Unknown'),
|
||||
"documentId": getattr(doc, 'documentId', 'Unknown'),
|
||||
"reference": getattr(doc, 'reference', 'Unknown')
|
||||
}
|
||||
# Remove 'Unknown' values to keep it clean
|
||||
docMetadata = {k: v for k, v in docMetadata.items() if v != 'Unknown'}
|
||||
|
||||
# Add content size indicator instead of actual content
|
||||
if hasattr(doc, 'documentData') and doc.documentData:
|
||||
if isinstance(doc.documentData, dict) and 'content' in doc.documentData:
|
||||
contentLength = len(str(doc.documentData['content']))
|
||||
docMetadata['contentSize'] = f"{contentLength} characters"
|
||||
else:
|
||||
contentLength = len(str(doc.documentData))
|
||||
docMetadata['contentSize'] = f"{contentLength} characters"
|
||||
|
||||
# Extract comment if available
|
||||
if hasattr(doc, 'documentData') and doc.documentData:
|
||||
data = getattr(doc, 'documentData', None)
|
||||
if isinstance(data, dict):
|
||||
comment = data.get("comment", "")
|
||||
if comment:
|
||||
notes.append(f"Document '{docMetadata.get('name', 'Unknown')}': {comment}")
|
||||
|
||||
previews.append(docMetadata)
|
||||
|
||||
observation = {
|
||||
"success": bool(actionResult.success),
|
||||
"resultLabel": actionResult.resultLabel or "",
|
||||
"documentsCount": len(actionResult.documents) if actionResult.documents else 0,
|
||||
"previews": previews,
|
||||
"notes": notes
|
||||
}
|
||||
|
||||
# NEW: Add content analysis if intent is available
|
||||
if self.currentIntent and actionResult.documents:
|
||||
contentAnalysis = self._analyzeContent(actionResult.documents)
|
||||
observation['contentAnalysis'] = contentAnalysis
|
||||
|
||||
return observation
|
||||
|
||||
def _analyzeContent(self, documents: List[Any]) -> Dict[str, Any]:
|
||||
"""Analyzes content of documents for adaptive learning"""
|
||||
try:
|
||||
if not documents:
|
||||
return {"contentType": "none", "contentSnippet": "", "intentMatch": False}
|
||||
|
||||
# Extract content from first document
|
||||
firstDoc = documents[0]
|
||||
content = ""
|
||||
if hasattr(firstDoc, 'documentData'):
|
||||
data = firstDoc.documentData
|
||||
if isinstance(data, dict) and 'content' in data:
|
||||
content = str(data['content'])
|
||||
else:
|
||||
content = str(data)
|
||||
|
||||
# Classify content type
|
||||
contentType = self._classifyContent(content)
|
||||
|
||||
# Create content snippet
|
||||
contentSnippet = content[:200] + "..." if len(content) > 200 else content
|
||||
|
||||
# Assess intent match
|
||||
intentMatch = self._assessIntentMatch(content, self.currentIntent)
|
||||
|
||||
return {
|
||||
"contentType": contentType,
|
||||
"contentSnippet": contentSnippet,
|
||||
"intentMatch": intentMatch
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing content: {str(e)}")
|
||||
return {"contentType": "error", "contentSnippet": "", "intentMatch": False}
|
||||
|
||||
def _classifyContent(self, content: str) -> str:
|
||||
"""Classifies the type of content"""
|
||||
if not content:
|
||||
return "empty"
|
||||
|
||||
# Check for code
|
||||
codeIndicators = ['def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ']
|
||||
if any(indicator in content.lower() for indicator in codeIndicators):
|
||||
return "code"
|
||||
|
||||
# Check for numbers
|
||||
if re.search(r'\b\d+\b', content):
|
||||
return "numbers"
|
||||
|
||||
# Check for structured content
|
||||
if any(indicator in content for indicator in ['\n', '\t', '|', '-', '*', '1.', '2.']):
|
||||
return "structured"
|
||||
|
||||
# Default to text
|
||||
return "text"
|
||||
|
||||
def _assessIntentMatch(self, content: str, intent: Dict[str, Any]) -> bool:
|
||||
"""Assesses if content matches the user intent"""
|
||||
if not intent:
|
||||
return False
|
||||
|
||||
dataType = intent.get("dataType", "unknown")
|
||||
|
||||
if dataType == "numbers":
|
||||
# Check if content contains actual numbers, not code
|
||||
hasNumbers = bool(re.search(r'\b\d+\b', content))
|
||||
isNotCode = not any(keyword in content.lower() for keyword in ['def ', 'function', 'import '])
|
||||
return hasNumbers and isNotCode
|
||||
|
||||
elif dataType == "text":
|
||||
# Check if content is readable text
|
||||
words = re.findall(r'\b\w+\b', content)
|
||||
return len(words) > 5
|
||||
|
||||
elif dataType == "documents":
|
||||
# Check if content is suitable for document creation
|
||||
hasStructure = any(indicator in content for indicator in ['\n', '\t', '|', '-', '*'])
|
||||
hasContent = len(content.strip()) > 50
|
||||
return hasStructure and hasContent
|
||||
|
||||
return True # Default to match for unknown types
|
||||
|
||||
def _collectFeedback(self, result: Any, validation: Dict[str, Any], intent: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Collects comprehensive feedback from action execution"""
|
||||
try:
|
||||
# Extract content summary
|
||||
contentDelivered = ""
|
||||
if result.documents:
|
||||
firstDoc = result.documents[0]
|
||||
if hasattr(firstDoc, 'documentData'):
|
||||
data = firstDoc.documentData
|
||||
if isinstance(data, dict) and 'content' in data:
|
||||
content = str(data['content'])
|
||||
contentDelivered = content[:100] + "..." if len(content) > 100 else content
|
||||
else:
|
||||
contentDelivered = str(data)[:100] + "..." if len(str(data)) > 100 else str(data)
|
||||
|
||||
return {
|
||||
"actionAttempted": result.resultLabel or "unknown",
|
||||
"parametersUsed": {}, # Would be extracted from action context
|
||||
"contentDelivered": contentDelivered,
|
||||
"intentMatchScore": validation.get('qualityScore', 0),
|
||||
"qualityScore": validation.get('qualityScore', 0),
|
||||
"issuesFound": validation.get('improvementSuggestions', []),
|
||||
"learningOpportunities": validation.get('improvementSuggestions', []),
|
||||
"userSatisfaction": None, # Would be collected from user feedback
|
||||
"timestamp": datetime.now(timezone.utc).timestamp()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error collecting feedback: {str(e)}")
|
||||
return {
|
||||
"actionAttempted": "unknown",
|
||||
"parametersUsed": {},
|
||||
"contentDelivered": "",
|
||||
"intentMatchScore": 0,
|
||||
"qualityScore": 0,
|
||||
"issuesFound": [],
|
||||
"learningOpportunities": [],
|
||||
"userSatisfaction": None,
|
||||
"timestamp": datetime.now(timezone.utc).timestamp()
|
||||
}
|
||||
|
||||
async def _refineDecide(self, context: TaskContext, observation: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Refine: decide continue or stop, with reason"""
|
||||
# Create proper ReviewContext for extractReviewContent
|
||||
from modules.datamodels.datamodelChat import ReviewContext
|
||||
reviewContext = ReviewContext(
|
||||
task_step=context.task_step,
|
||||
task_actions=[],
|
||||
action_results=[], # React mode doesn't have action results in this context
|
||||
step_result={'observation': observation},
|
||||
workflow_id=context.workflow_id,
|
||||
previous_results=[]
|
||||
)
|
||||
|
||||
baseReviewContent = extractReviewContent(reviewContext)
|
||||
placeholders = {"REVIEW_CONTENT": baseReviewContent}
|
||||
|
||||
# NEW: Add content validation to review content
|
||||
enhancedReviewContent = placeholders.get("REVIEW_CONTENT", "")
|
||||
if 'contentValidation' in observation:
|
||||
validation = observation['contentValidation']
|
||||
enhancedReviewContent += f"\n\nCONTENT VALIDATION:\n"
|
||||
enhancedReviewContent += f"Overall Success: {validation['overallSuccess']}\n"
|
||||
enhancedReviewContent += f"Quality Score: {validation['qualityScore']:.2f}\n"
|
||||
if validation['improvementSuggestions']:
|
||||
enhancedReviewContent += f"Improvement Suggestions: {', '.join(validation['improvementSuggestions'])}\n"
|
||||
|
||||
# NEW: Add content analysis to review content
|
||||
if 'contentAnalysis' in observation:
|
||||
analysis = observation['contentAnalysis']
|
||||
enhancedReviewContent += f"\nCONTENT ANALYSIS:\n"
|
||||
enhancedReviewContent += f"Content Type: {analysis['contentType']}\n"
|
||||
enhancedReviewContent += f"Intent Match: {analysis['intentMatch']}\n"
|
||||
if analysis['contentSnippet']:
|
||||
enhancedReviewContent += f"Content Preview: {analysis['contentSnippet']}\n"
|
||||
|
||||
# NEW: Add progress state to review content
|
||||
progressState = self.progressTracker.getCurrentProgress()
|
||||
enhancedReviewContent += f"\nPROGRESS STATE:\n"
|
||||
enhancedReviewContent += f"Completed Objectives: {len(progressState['completedObjectives'])}\n"
|
||||
enhancedReviewContent += f"Partial Achievements: {len(progressState['partialAchievements'])}\n"
|
||||
enhancedReviewContent += f"Failed Attempts: {len(progressState['failedAttempts'])}\n"
|
||||
enhancedReviewContent += f"Current Phase: {progressState['currentPhase']}\n"
|
||||
if progressState['nextActionsSuggested']:
|
||||
enhancedReviewContent += f"Next Action Suggestions: {', '.join(progressState['nextActionsSuggested'])}\n"
|
||||
|
||||
# Update placeholders with enhanced review content
|
||||
placeholders["REVIEW_CONTENT"] = enhancedReviewContent
|
||||
|
||||
bundle = generateReactRefinementPrompt(self.services, context, enhancedReviewContent)
|
||||
promptTemplate = bundle.prompt
|
||||
placeholders = bundle.placeholders
|
||||
|
||||
self._writeTraceLog("React Refinement Prompt", promptTemplate)
|
||||
self._writeTraceLog("React Refinement Placeholders", placeholders)
|
||||
|
||||
# Centralized AI call for refinement decision (balanced analysis)
|
||||
options = AiCallOptions(
|
||||
operationType=OperationType.ANALYSE_CONTENT,
|
||||
priority=Priority.BALANCED,
|
||||
compressPrompt=True,
|
||||
compressContext=False,
|
||||
processingMode=ProcessingMode.ADVANCED,
|
||||
maxCost=0.05,
|
||||
maxProcessingTime=30
|
||||
)
|
||||
|
||||
resp = await self.services.ai.callAi(
|
||||
prompt=promptTemplate,
|
||||
placeholders=placeholders,
|
||||
options=options
|
||||
)
|
||||
self._writeTraceLog("React Refinement Response", resp)
|
||||
js = resp[resp.find('{'):resp.rfind('}')+1] if resp else '{}'
|
||||
try:
|
||||
decision = json.loads(js)
|
||||
except Exception:
|
||||
decision = {"decision": "continue", "reason": "default"}
|
||||
return decision
|
||||
|
||||
async def _createReactActionMessage(self, workflow: ChatWorkflow, selection: Dict[str, Any],
|
||||
step: int, maxSteps: int, taskIndex: int, messageType: str,
|
||||
result: ActionResult = None, observation: Dict[str, Any] = None):
|
||||
"""Create user-friendly messages for React workflow actions"""
|
||||
try:
|
||||
action = selection.get('action', {})
|
||||
method = action.get('method', '')
|
||||
actionName = action.get('name', '')
|
||||
|
||||
# Get user language
|
||||
userLanguage = self.services.user.language if self.services and self.services.user else 'en'
|
||||
|
||||
if messageType == "before":
|
||||
# Message BEFORE action execution
|
||||
userMessage = await self._generateActionIntentionMessage(method, actionName, userLanguage)
|
||||
messageContent = f"🔄 **Step {step}/{maxSteps}**\n\n{userMessage}"
|
||||
status = "step"
|
||||
actionProgress = "pending"
|
||||
documentsLabel = f"action_{step}_intention"
|
||||
|
||||
elif messageType == "after":
|
||||
# Message AFTER action execution
|
||||
userMessage = await self._generateActionResultMessage(method, actionName, result, observation, userLanguage)
|
||||
successIcon = "✅" if result and result.success else "❌"
|
||||
messageContent = f"{successIcon} **Step {step}/{maxSteps} Complete**\n\n{userMessage}"
|
||||
status = "step"
|
||||
actionProgress = "success" if result and result.success else "fail"
|
||||
documentsLabel = observation.get('resultLabel') if observation else f"action_{step}_result"
|
||||
else:
|
||||
return
|
||||
|
||||
# Create workflow message
|
||||
messageData = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": messageContent,
|
||||
"status": status,
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||
"documentsLabel": documentsLabel,
|
||||
"documents": [],
|
||||
"roundNumber": workflow.currentRound,
|
||||
"taskNumber": taskIndex,
|
||||
"actionNumber": step,
|
||||
"actionProgress": actionProgress
|
||||
}
|
||||
|
||||
message = self.services.interfaceDbChat.createMessage(messageData)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating React action message: {str(e)}")
|
||||
|
||||
async def _generateActionIntentionMessage(self, method: str, actionName: str, userLanguage: str):
|
||||
"""Generate user-friendly message explaining what action will do"""
|
||||
try:
|
||||
# Create a simple AI prompt to generate user-friendly action descriptions
|
||||
prompt = f"""Generate a brief, user-friendly message explaining what the {method}.{actionName} action will do.
|
||||
|
||||
User language: {userLanguage}
|
||||
|
||||
|
||||
Return only the user-friendly message, no technical details."""
|
||||
|
||||
# Call AI to generate user-friendly message
|
||||
response = await self.services.ai.callAi(
|
||||
prompt=prompt,
|
||||
options=AiCallOptions(
|
||||
operationType=OperationType.GENERATE_CONTENT,
|
||||
priority=Priority.SPEED,
|
||||
compressPrompt=True,
|
||||
maxCost=0.01,
|
||||
maxProcessingTime=5
|
||||
)
|
||||
)
|
||||
|
||||
return response.strip() if response else f"Executing {method}.{actionName} action..."
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating action intention message: {str(e)}")
|
||||
return f"Executing {method}.{actionName} action..."
|
||||
|
||||
async def _generateActionResultMessage(self, method: str, actionName: str, result: ActionResult,
|
||||
observation: Dict[str, Any], userLanguage: str):
|
||||
"""Generate user-friendly message explaining action results"""
|
||||
try:
|
||||
# Build result context
|
||||
resultContext = ""
|
||||
if result and result.documents:
|
||||
docCount = len(result.documents)
|
||||
resultContext = f"Generated {docCount} document(s)"
|
||||
elif observation and observation.get('documentsCount', 0) > 0:
|
||||
docCount = observation.get('documentsCount', 0)
|
||||
resultContext = f"Generated {docCount} document(s)"
|
||||
|
||||
# Create AI prompt for result message
|
||||
prompt = f"""Generate a brief, user-friendly message explaining the result of the {method}.{actionName} action.
|
||||
|
||||
User language: {userLanguage}
|
||||
Success: {result.success if result else 'Unknown'}
|
||||
Result context: {resultContext}
|
||||
|
||||
Return only the user-friendly message, no technical details."""
|
||||
|
||||
# Call AI to generate user-friendly result message
|
||||
response = await self.services.ai.callAi(
|
||||
prompt=prompt,
|
||||
options=AiCallOptions(
|
||||
operationType=OperationType.GENERATE_CONTENT,
|
||||
priority=Priority.SPEED,
|
||||
compressPrompt=True,
|
||||
maxCost=0.01,
|
||||
maxProcessingTime=5
|
||||
)
|
||||
)
|
||||
|
||||
return response.strip() if response else f"{method}.{actionName} action completed"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating action result message: {str(e)}")
|
||||
return f"{method}.{actionName} action completed"
|
||||
|
||||
def _createActionItem(self, actionData: Dict[str, Any]) -> ActionItem:
|
||||
"""Creates a new task action for React mode"""
|
||||
try:
|
||||
import uuid
|
||||
|
||||
# Ensure ID is present
|
||||
if "id" not in actionData or not actionData["id"]:
|
||||
actionData["id"] = f"action_{uuid.uuid4()}"
|
||||
|
||||
# Ensure required fields
|
||||
if "status" not in actionData:
|
||||
actionData["status"] = TaskStatus.PENDING
|
||||
|
||||
if "execMethod" not in actionData:
|
||||
logger.error("execMethod is required for task action")
|
||||
return None
|
||||
|
||||
if "execAction" not in actionData:
|
||||
logger.error("execAction is required for task action")
|
||||
return None
|
||||
|
||||
if "execParameters" not in actionData:
|
||||
actionData["execParameters"] = {}
|
||||
|
||||
# Use generic field separation based on ActionItem model
|
||||
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
||||
|
||||
# Create action in database
|
||||
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||
|
||||
# Convert to ActionItem model
|
||||
return ActionItem(
|
||||
id=createdAction["id"],
|
||||
execMethod=createdAction["execMethod"],
|
||||
execAction=createdAction["execAction"],
|
||||
execParameters=createdAction.get("execParameters", {}),
|
||||
execResultLabel=createdAction.get("execResultLabel"),
|
||||
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
|
||||
status=createdAction.get("status", TaskStatus.PENDING),
|
||||
error=createdAction.get("error"),
|
||||
retryCount=createdAction.get("retryCount", 0),
|
||||
retryMax=createdAction.get("retryMax", 3),
|
||||
processingTime=createdAction.get("processingTime"),
|
||||
timestamp=float(createdAction.get("timestamp", self.services.utils.getUtcTimestamp())),
|
||||
result=createdAction.get("result"),
|
||||
resultDocuments=createdAction.get("resultDocuments", []),
|
||||
userMessage=createdAction.get("userMessage")
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating task action: {str(e)}")
|
||||
return None
|
||||
|
||||
def _updateWorkflowBeforeExecutingTask(self, taskNumber: int):
|
||||
"""Update workflow object before executing a task"""
|
||||
try:
|
||||
updateData = {
|
||||
"currentTask": taskNumber,
|
||||
"currentAction": 0,
|
||||
"totalActions": 0
|
||||
}
|
||||
|
||||
# Update workflow object
|
||||
self.workflow.currentTask = taskNumber
|
||||
self.workflow.currentAction = 0
|
||||
self.workflow.totalActions = 0
|
||||
|
||||
# Update in database
|
||||
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||
logger.info(f"Updated workflow {self.workflow.id} before executing task {taskNumber}: {updateData}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating workflow before executing task: {str(e)}")
|
||||
|
||||
def _updateWorkflowBeforeExecutingAction(self, actionNumber: int):
|
||||
"""Update workflow object before executing an action"""
|
||||
try:
|
||||
updateData = {
|
||||
"currentAction": actionNumber
|
||||
}
|
||||
|
||||
# Update workflow object
|
||||
self.workflow.currentAction = actionNumber
|
||||
|
||||
# Update in database
|
||||
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||
logger.info(f"Updated workflow {self.workflow.id} before executing action {actionNumber}: {updateData}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating workflow before executing action: {str(e)}")
|
||||
|
||||
def _createActionItem(self, actionData: Dict[str, Any]) -> ActionItem:
|
||||
"""Creates a new task action for React mode"""
|
||||
try:
|
||||
import uuid
|
||||
|
||||
# Ensure ID is present
|
||||
if "id" not in actionData or not actionData["id"]:
|
||||
actionData["id"] = f"action_{uuid.uuid4()}"
|
||||
|
||||
# Ensure required fields
|
||||
if "status" not in actionData:
|
||||
actionData["status"] = TaskStatus.PENDING
|
||||
|
||||
if "execMethod" not in actionData:
|
||||
logger.error("execMethod is required for task action")
|
||||
return None
|
||||
|
||||
if "execAction" not in actionData:
|
||||
logger.error("execAction is required for task action")
|
||||
return None
|
||||
|
||||
if "execParameters" not in actionData:
|
||||
actionData["execParameters"] = {}
|
||||
|
||||
# Use generic field separation based on ActionItem model
|
||||
simpleFields, objectFields = self.services.interfaceDbChat._separate_object_fields(ActionItem, actionData)
|
||||
|
||||
# Create action in database
|
||||
createdAction = self.services.interfaceDbChat.db.recordCreate(ActionItem, simpleFields)
|
||||
|
||||
# Convert to ActionItem model
|
||||
return ActionItem(
|
||||
id=createdAction["id"],
|
||||
execMethod=createdAction["execMethod"],
|
||||
execAction=createdAction["execAction"],
|
||||
execParameters=createdAction.get("execParameters", {}),
|
||||
execResultLabel=createdAction.get("execResultLabel"),
|
||||
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
|
||||
status=createdAction.get("status", TaskStatus.PENDING),
|
||||
error=createdAction.get("error"),
|
||||
retryCount=createdAction.get("retryCount", 0),
|
||||
retryMax=createdAction.get("retryMax", 3),
|
||||
processingTime=createdAction.get("processingTime"),
|
||||
timestamp=float(createdAction.get("timestamp", self.services.utils.getUtcTimestamp())),
|
||||
result=createdAction.get("result"),
|
||||
resultDocuments=createdAction.get("resultDocuments", []),
|
||||
userMessage=createdAction.get("userMessage")
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating task action: {str(e)}")
|
||||
return None
|
||||
|
||||
def _writeTraceLog(self, contextText: str, data: Any) -> None:
|
||||
"""Write trace data to configured trace file if in debug mode with improved JSON formatting"""
|
||||
try:
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime, UTC
|
||||
|
||||
# Only write if logger is in debug mode
|
||||
if logger.level > logging.DEBUG:
|
||||
return
|
||||
|
||||
# Get log directory from configuration
|
||||
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
|
||||
if not os.path.isabs(logDir):
|
||||
# If relative path, make it relative to the gateway directory
|
||||
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
logDir = os.path.join(gatewayDir, logDir)
|
||||
|
||||
# Ensure log directory exists
|
||||
os.makedirs(logDir, exist_ok=True)
|
||||
|
||||
# Create trace file path
|
||||
traceFile = os.path.join(logDir, "log_trace.log")
|
||||
|
||||
# Format the trace entry with better structure
|
||||
timestamp = datetime.fromtimestamp(self.services.utils.getUtcTimestamp(), UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||
|
||||
# Create a structured trace entry
|
||||
traceEntry = f"[{timestamp}] {contextText}\n"
|
||||
traceEntry += "=" * 80 + "\n"
|
||||
|
||||
# Add data if provided with improved formatting
|
||||
if data is not None:
|
||||
try:
|
||||
if isinstance(data, (dict, list)):
|
||||
# Format as pretty JSON with better settings
|
||||
jsonStr = json.dumps(data, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||
traceEntry += f"JSON Data:\n{jsonStr}\n"
|
||||
elif isinstance(data, str):
|
||||
# For string data, try to parse as JSON first, then fall back to plain text
|
||||
try:
|
||||
parsed = json.loads(data)
|
||||
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||
traceEntry += f"JSON Data (parsed from string):\n{jsonStr}\n"
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# Not valid JSON, show as plain text with proper line breaks
|
||||
formatted_data = data.replace('\\n', '\n')
|
||||
traceEntry += f"Text Data:\n{formatted_data}\n"
|
||||
else:
|
||||
# For other types, convert to string and try to parse as JSON
|
||||
dataStr = str(data)
|
||||
try:
|
||||
parsed = json.loads(dataStr)
|
||||
jsonStr = json.dumps(parsed, indent=2, default=str, ensure_ascii=False, sort_keys=False)
|
||||
traceEntry += f"JSON Data (parsed from object):\n{jsonStr}\n"
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# Not valid JSON, show as plain text with proper line breaks
|
||||
formatted_data = dataStr.replace('\\n', '\n')
|
||||
traceEntry += f"Object Data:\n{formatted_data}\n"
|
||||
except Exception as e:
|
||||
# Fallback to simple string representation
|
||||
traceEntry += f"Data (fallback): {str(data)}\n"
|
||||
else:
|
||||
traceEntry += "No data provided\n"
|
||||
|
||||
traceEntry += "=" * 80 + "\n\n"
|
||||
|
||||
# Write to trace file
|
||||
with open(traceFile, "a", encoding="utf-8") as f:
|
||||
f.write(traceEntry)
|
||||
|
||||
except Exception as e:
|
||||
# Don't log trace errors to avoid recursion
|
||||
pass
|
||||
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,418 +0,0 @@
|
|||
"""
|
||||
Placeholder-based prompt factory for dynamic AI calls.
|
||||
This module provides prompt templates with placeholders that can be filled dynamically.
|
||||
"""
|
||||
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
from modules.workflows.processing.promptFactory import (
|
||||
_getAvailableDocuments,
|
||||
_getPreviousRoundContext,
|
||||
getMethodsList,
|
||||
getEnhancedDocumentContext,
|
||||
_getConnectionReferenceList,
|
||||
methods
|
||||
)
|
||||
|
||||
|
||||
def createTaskPlanningPromptTemplate() -> str:
|
||||
"""Create task planning prompt template with placeholders."""
|
||||
return """You are a task planning AI that analyzes user requests and creates structured, self-contained task plans with user-friendly feedback messages.
|
||||
|
||||
USER REQUEST: {{KEY:USER_PROMPT}}
|
||||
|
||||
AVAILABLE DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS}}
|
||||
|
||||
PREVIOUS WORKFLOW ROUNDS CONTEXT:
|
||||
{{KEY:WORKFLOW_HISTORY}}
|
||||
|
||||
INSTRUCTIONS:
|
||||
1. Analyze the user request, available documents, and previous workflow rounds context
|
||||
2. If the user request appears to be a follow-up (like "try again", "versuche es nochmals", "retry", etc.),
|
||||
use the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what the user wants to retry or continue
|
||||
3. Group related topics and sequential steps into single, comprehensive tasks
|
||||
4. Focus on business outcomes, not technical operations
|
||||
5. Make each task self-contained: clearly state what to do and what outputs are expected
|
||||
6. Ensure proper handover between tasks (later actions will use your task outputs)
|
||||
7. Detect the language of the user request and include it in languageUserDetected
|
||||
8. Generate user-friendly messages for each task in the user's request language
|
||||
9. Return a JSON object with the exact structure shown below
|
||||
|
||||
TASK GROUPING PRINCIPLES:
|
||||
- COMBINE RELATED TOPICS: Group related subjects, sequential steps, or workflow-structured activities into single tasks
|
||||
- SEQUENTIAL WORKFLOWS: If the user says "first do this, then that, then that" → create ONE task that handles the entire sequence
|
||||
- SIMILAR CONTENT: If multiple items deal with the same subject matter → combine into ONE comprehensive task
|
||||
- ONLY SPLIT WHEN DIFFERENT: Create separate tasks ONLY when the user explicitly wants different, independent things
|
||||
|
||||
EXAMPLES OF GOOD TASK GROUPING:
|
||||
|
||||
COMBINE INTO ONE TASK:
|
||||
- "Analyze the documents, extract key insights, and create a summary report" → ONE task: "Analyze documents and create comprehensive summary report"
|
||||
- "First check my emails, then respond to urgent ones, then organize my inbox" → ONE task: "Process and organize email inbox with priority responses"
|
||||
- "Review the budget, analyze spending patterns, and suggest cost-cutting measures" → ONE task: "Comprehensive budget analysis with optimization recommendations"
|
||||
- "Create a business strategy, develop marketing plan, and prepare presentation" → ONE task: "Develop complete business strategy with marketing plan and presentation"
|
||||
|
||||
SPLIT INTO MULTIPLE TASKS:
|
||||
- "Create a business strategy for Q4" AND "Check my emails for messages from my assistant" → TWO separate tasks (different subjects)
|
||||
- "Analyze customer feedback" AND "Prepare quarterly financial report" → TWO separate tasks (different business areas)
|
||||
- "Review project timeline" AND "Update employee handbook" → TWO separate tasks (unrelated activities)
|
||||
|
||||
TASK PLANNING PRINCIPLES:
|
||||
- Break down complex requests into logical, sequential steps
|
||||
- Focus on business value and outcomes
|
||||
- Keep tasks at a meaningful level of abstraction (not implementation details)
|
||||
- Each task should produce results that can be used by subsequent tasks
|
||||
- Ensure clear dependencies and handovers between tasks
|
||||
- Provide clear, actionable user messages in the user's request language
|
||||
- Group related activities to minimize task fragmentation
|
||||
- Only create multiple tasks when dealing with truly different, independent objectives
|
||||
- Make task objectives action-oriented and specific (include scope, data sources to consider, and output intent at high level)
|
||||
- Write success_criteria as measurable acceptance criteria focusing on outputs (what artifacts or insights will exist and how they are validated)
|
||||
|
||||
FOLLOW-UP PROMPT HANDLING:
|
||||
- If the user request is a follow-up (e.g., "try again", "versuche es nochmals", "retry", "continue", "proceed"),
|
||||
analyze the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what failed or was incomplete
|
||||
- Use the previous round's user requests and task outcomes to determine what the user wants to retry
|
||||
- If previous rounds failed due to missing documents, and documents are now available,
|
||||
create tasks that use the newly available documents to accomplish the original request
|
||||
- Maintain the same business objective from previous rounds but adapt to current available resources
|
||||
|
||||
SPECIFIC SCENARIO HANDLING:
|
||||
- If previous round failed with "documents missing" error and current round has documents available,
|
||||
the user likely wants to retry the same operation with the newly provided documents
|
||||
- Example: Previous round "speichere mir die 3 dokumente im sharepoint unter xxx" failed due to missing documents,
|
||||
current round "versuche es nochmals" with documents should retry the SharePoint save operation
|
||||
- Always check if the current request is a retry by looking for retry keywords and previous round context
|
||||
|
||||
REQUIRED JSON STRUCTURE:
|
||||
{{
|
||||
"overview": "Brief description of the overall plan",
|
||||
"languageUserDetected": "en", // Language code detected from user request (en, de, fr, it, es, etc.)
|
||||
"userMessage": "User-friendly message explaining the task plan in user's request language",
|
||||
"tasks": [
|
||||
{{
|
||||
"id": "task_1",
|
||||
"objective": "Clear business objective this task accomplishes (combining related activities)",
|
||||
"dependencies": ["task_0"], // IDs of tasks that must complete first
|
||||
"success_criteria": ["criteria1", "criteria2"],
|
||||
"estimated_complexity": "low|medium|high",
|
||||
"userMessage": "User-friendly message explaining what this task will accomplish in user's request language"
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
EXAMPLES OF GOOD TASK OBJECTIVES (COMBINING RELATED ACTIVITIES):
|
||||
- "Analyze documents and extract key insights for business communication"
|
||||
- "Create professional business communication incorporating analyzed information"
|
||||
- "Execute business communication using specified channels and document outcomes"
|
||||
- "Develop comprehensive business strategy with implementation roadmap and success metrics"
|
||||
|
||||
EXAMPLES OF WELL-FORMED SUCCESS CRITERIA (OUTPUT-FOCUSED):
|
||||
- "Deliver a prioritized list of 10–20 candidates with justification"
|
||||
- "Provide a structured JSON with fields: company, ticker, rationale, metrics"
|
||||
- "Produce a presentation outline with 5 sections and bullet points per section"
|
||||
- "Include data sources and date stamped references for traceability"
|
||||
|
||||
EXAMPLES OF GOOD SUCCESS CRITERIA:
|
||||
- "Key insights extracted and ready for business use"
|
||||
- "Professional communication created with clear business value"
|
||||
- "Business communication successfully delivered and documented"
|
||||
- "All outcomes properly documented and accessible"
|
||||
|
||||
EXAMPLES OF BAD TASK OBJECTIVES:
|
||||
- "Read the PDF file" (too granular - should be "Analyze document content")
|
||||
- "Convert data to CSV" (implementation detail - should be "Structure data for analysis")
|
||||
- "Send email" (too specific - should be "Deliver business communication")
|
||||
|
||||
LANGUAGE DETECTION:
|
||||
- Analyze the user request text to identify the language
|
||||
- Use standard language codes: en (English), de (German), fr (French), it (Italian), es (Spanish), etc.
|
||||
- If the language cannot be determined, use "en" as default
|
||||
- Include the detected language in the languageUserDetected field
|
||||
|
||||
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||
|
||||
|
||||
def createActionDefinitionPromptTemplate() -> str:
|
||||
"""Create action definition prompt template with placeholders."""
|
||||
return """You are an action planning AI that generates specific, executable actions for task steps.
|
||||
|
||||
TASK OBJECTIVE: {{KEY:USER_PROMPT}}
|
||||
|
||||
AVAILABLE DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS}}
|
||||
|
||||
WORKFLOW HISTORY: {{KEY:WORKFLOW_HISTORY}}
|
||||
|
||||
AVAILABLE METHODS: {{KEY:AVAILABLE_METHODS}}
|
||||
|
||||
USER LANGUAGE: {{KEY:USER_LANGUAGE}}
|
||||
|
||||
INSTRUCTIONS:
|
||||
- Generate actions to accomplish this task step using available documents, connections, and previous results
|
||||
- Use docItem for single documents and docList for groups of documents as shown in AVAILABLE DOCUMENTS
|
||||
- If there are no documents available, do not create document extraction actions. Select methods strictly based on the task objective; choose web actions when external information is required. Otherwise, generate a status/information report requesting needed inputs.
|
||||
- Always pass documentList as a LIST of references (docItem and/or docList) - this list CANNOT be empty for document extraction actions
|
||||
- For referencing documents from previous actions, use the format "round{current_round}_task{current_task}_action{action_number}_{descriptive_label}"
|
||||
- Each action must be self-contained and executable with the provided parameters
|
||||
- For document extraction, ensure prompts are specific and detailed
|
||||
- Include validation steps in extraction prompts where relevant
|
||||
- If this is a retry, learn from previous failures and improve the approach
|
||||
- Address specific issues mentioned in previous review feedback
|
||||
- When specifying expectedDocumentFormats, ensure AI prompts explicitly request pure data without markdown formatting
|
||||
- Generate user-friendly messages for each action in the user's language
|
||||
|
||||
PARAMETER COMPLETENESS REQUIREMENTS:
|
||||
- Every parameter must contain all information needed to execute without implicit context
|
||||
- Use explicit, concrete values (units, languages, formats, limits, date ranges, IDs) when applicable
|
||||
- For search-like parameters (if any method requires a query), derive the query from the task objective AND ALL success criteria dimensions. Include:
|
||||
- Key entities and domain terms from the objective
|
||||
- All distinct facets from success_criteria (e.g., valuation AND AI potential AND know-how needs)
|
||||
- Geography/localization (e.g., Schweiz/Suisse/Switzerland; use multilingual synonyms when helpful)
|
||||
- Time horizon or recency if relevant
|
||||
- Boolean operators and synonyms to increase precision (use AND/OR, quotes, parentheses)
|
||||
- Avoid single-topic or generic queries focused only on one facet (e.g., pure valuation metrics)
|
||||
- When facets are truly distinct, create 1–3 focused actions with precise queries rather than one vague catch-all
|
||||
- Document list parameters must reference only existing labels or prior action outputs; do not reference future outputs
|
||||
|
||||
DOCUMENT ROUTING GUIDANCE:
|
||||
- Each action should produce documents with a clear resultLabel for routing
|
||||
- Use consistent naming: "round{current_round}_task{current_task}_action{action_number}_{descriptive_label}"
|
||||
- Ensure document flow: Action A produces documents that Action B can consume
|
||||
- Document labels should be descriptive of content, not just "results" or "output"
|
||||
- Consider what subsequent actions will need and structure outputs accordingly
|
||||
|
||||
REQUIRED JSON STRUCTURE:
|
||||
{{
|
||||
"actions": [
|
||||
{{
|
||||
"method": "method_name",
|
||||
"action": "action_name",
|
||||
"parameters": {{}},
|
||||
"resultLabel": "round{current_round}_task{current_task}_action{action_number}_{descriptive_label}",
|
||||
"description": "Brief description of what this action accomplishes",
|
||||
"userMessage": "User-friendly message explaining what this action will do in user's language"
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
IMPORTANT NOTES:
|
||||
- Respond with ONLY the JSON object. Do not include any explanatory text.
|
||||
- Before creating any document extraction action, verify that AVAILABLE DOCUMENTS contains actual document references.
|
||||
- Always include a user-friendly userMessage for each action in the user's language.
|
||||
- The examples above show German user messages as reference - adapt the language to match the USER LANGUAGE specified above."""
|
||||
|
||||
|
||||
def createActionSelectionPromptTemplate() -> str:
|
||||
"""Create action selection prompt template with placeholders."""
|
||||
return """Select exactly one action to advance the task.
|
||||
|
||||
OBJECTIVE: {{KEY:USER_PROMPT}}
|
||||
AVAILABLE DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS}}
|
||||
USER LANGUAGE: {{KEY:USER_LANGUAGE}}
|
||||
|
||||
MINIMAL TOOL CATALOG (method -> action -> [parameterNames]):
|
||||
{{KEY:AVAILABLE_METHODS}}
|
||||
|
||||
BUSINESS RULES:
|
||||
- Pick exactly one action per step.
|
||||
- Derive choice from objective and success criteria.
|
||||
- Prefer user language.
|
||||
- Keep it minimal; avoid provider specifics.
|
||||
|
||||
RESPONSE FORMAT (JSON only):
|
||||
{{"action":{{"method":"web","name":"search"}}}}"""
|
||||
|
||||
|
||||
def createActionParameterPromptTemplate() -> str:
|
||||
"""Create action parameter prompt template with placeholders."""
|
||||
return """Provide only the required parameters for this action.
|
||||
|
||||
SELECTED ACTION: {{KEY:SELECTED_ACTION}}
|
||||
ACTION SIGNATURE: {{KEY:ACTION_SIGNATURE}}
|
||||
OBJECTIVE: {{KEY:USER_PROMPT}}
|
||||
AVAILABLE DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS}}
|
||||
USER LANGUAGE: {{KEY:USER_LANGUAGE}}
|
||||
|
||||
RULES:
|
||||
- Return only the parameters object.
|
||||
- Include user language if relevant.
|
||||
- Reference documents only by exact labels available.
|
||||
- Avoid unnecessary fields; host applies defaults.
|
||||
- Use the ACTION SIGNATURE above to understand what parameters are required.
|
||||
- Convert the objective into appropriate parameter values as needed.
|
||||
|
||||
RESPONSE FORMAT (JSON only):
|
||||
{{"parameters":{{}}}}"""
|
||||
|
||||
|
||||
def createRefinementPromptTemplate() -> str:
|
||||
"""Create refinement prompt template with placeholders."""
|
||||
return """Decide next step based on observation.
|
||||
|
||||
OBJECTIVE: {{KEY:USER_PROMPT}}
|
||||
OBSERVATION:
|
||||
{{KEY:REVIEW_CONTENT}}
|
||||
|
||||
RULES:
|
||||
- If criteria are met or no further action helps, decide stop.
|
||||
- Else decide continue.
|
||||
|
||||
RESPONSE FORMAT (JSON only):
|
||||
{{"decision":"continue","reason":"Need more data"}}"""
|
||||
|
||||
|
||||
def createResultReviewPromptTemplate() -> str:
|
||||
"""Create result review prompt template with placeholders."""
|
||||
return """You are a result validation AI that reviews task execution outcomes and determines success, retry needs, or failure.
|
||||
|
||||
TASK OBJECTIVE: {{KEY:USER_PROMPT}}
|
||||
|
||||
EXECUTION RESULTS:
|
||||
{{KEY:REVIEW_CONTENT}}
|
||||
|
||||
VALIDATION CRITERIA:
|
||||
- Review each action's success/failure status
|
||||
- Check if required documents were produced
|
||||
- Validate document quality and completeness
|
||||
- Assess if success criteria were met
|
||||
- Identify any missing or incomplete outputs
|
||||
- Determine if retry would help or if task should be marked as failed
|
||||
|
||||
REQUIRED JSON STRUCTURE:
|
||||
{{
|
||||
"status": "success|retry|failed",
|
||||
"reason": "Detailed explanation of the validation decision",
|
||||
"improvements": ["specific improvement 1", "specific improvement 2"],
|
||||
"quality_score": 8, // 1-10 scale
|
||||
"met_criteria": ["criteria1", "criteria2"],
|
||||
"unmet_criteria": ["criteria3", "criteria4"],
|
||||
"confidence": 0.85, // 0.0-1.0 scale
|
||||
"userMessage": "User-friendly message explaining the validation result"
|
||||
}}
|
||||
|
||||
VALIDATION PRINCIPLES:
|
||||
- Be thorough but fair in assessment
|
||||
- Focus on business value and outcomes
|
||||
- Consider both technical execution and business results
|
||||
- Provide specific, actionable improvement suggestions
|
||||
- Use quality scores to track progress across retries
|
||||
- Clearly identify which success criteria were met vs. unmet
|
||||
- Set appropriate confidence levels based on evidence quality
|
||||
|
||||
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||
|
||||
|
||||
# Helper functions to extract content for placeholders
|
||||
|
||||
def extractUserPrompt(context) -> str:
|
||||
"""Extract user prompt from context."""
|
||||
if hasattr(context, 'task_step') and context.task_step:
|
||||
return context.task_step.objective or 'No request specified'
|
||||
return 'No request specified'
|
||||
|
||||
|
||||
def extractAvailableDocuments(context) -> str:
|
||||
"""Extract available documents from context."""
|
||||
if hasattr(context, 'workflow') and context.workflow:
|
||||
return _getAvailableDocuments(context.workflow)
|
||||
return "No documents available"
|
||||
|
||||
|
||||
def extractWorkflowHistory(service, context) -> str:
|
||||
"""Extract workflow history from context."""
|
||||
if hasattr(context, 'workflow') and context.workflow:
|
||||
return _getPreviousRoundContext(service, context.workflow) or "No previous workflow rounds - this is the first round."
|
||||
return "No previous workflow rounds - this is the first round."
|
||||
|
||||
|
||||
def extractAvailableMethods(service) -> str:
|
||||
"""Extract available methods for action planning."""
|
||||
methodList = getMethodsList(service)
|
||||
method_actions = {}
|
||||
for sig in methodList:
|
||||
if '.' in sig:
|
||||
method, rest = sig.split('.', 1)
|
||||
action = rest.split('(')[0]
|
||||
method_actions.setdefault(method, []).append((action, sig))
|
||||
|
||||
# Create a structured JSON format for better AI parsing
|
||||
available_methods_json = {}
|
||||
for method, actions in method_actions.items():
|
||||
available_methods_json[method] = {}
|
||||
# Get the method instance for accessing docstrings
|
||||
method_instance = methods.get(method, {}).get('instance') if methods else None
|
||||
|
||||
for action, sig in actions:
|
||||
# Parse the signature to extract parameters
|
||||
if '(' in sig and ')' in sig:
|
||||
# Extract parameters from signature
|
||||
params_start = sig.find('(')
|
||||
params_end = sig.find(')')
|
||||
params_str = sig[params_start+1:params_end]
|
||||
|
||||
# Parse parameters directly from the docstring - much simpler and more reliable!
|
||||
parameters = []
|
||||
|
||||
# Get the actual function's docstring
|
||||
if method_instance and hasattr(method_instance, action):
|
||||
func = getattr(method_instance, action)
|
||||
if hasattr(func, '__doc__') and func.__doc__:
|
||||
docstring = func.__doc__
|
||||
|
||||
# Parse Parameters section from docstring
|
||||
lines = docstring.split('\n')
|
||||
in_parameters = False
|
||||
for i, line in enumerate(lines):
|
||||
original_line = line
|
||||
line = line.strip()
|
||||
|
||||
if line.startswith('Parameters:'):
|
||||
in_parameters = True
|
||||
continue
|
||||
elif line.startswith('Returns:') or line.startswith('Raises:') or line.startswith('Note:') or line.startswith('Example:') or line.startswith('Examples:'):
|
||||
in_parameters = False
|
||||
continue
|
||||
elif in_parameters and line and not line.startswith('-') and not line.startswith('*'):
|
||||
# This is a parameter line
|
||||
if ':' in line:
|
||||
param_name = line.split(':')[0].strip()
|
||||
param_desc = line.split(':', 1)[1].strip()
|
||||
parameters.append(f"{param_name}: {param_desc}")
|
||||
|
||||
available_methods_json[method][action] = parameters
|
||||
else:
|
||||
available_methods_json[method][action] = []
|
||||
|
||||
return json.dumps(available_methods_json, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
def extractUserLanguage(service) -> str:
|
||||
"""Extract user language from service."""
|
||||
return service.user.language if service and service.user else 'en'
|
||||
|
||||
|
||||
def extractReviewContent(context) -> str:
|
||||
"""Extract review content from context."""
|
||||
if hasattr(context, 'action_results') and context.action_results:
|
||||
# Build result summary
|
||||
result_summary = ""
|
||||
for i, result in enumerate(context.action_results):
|
||||
result_summary += f"\nRESULT {i+1}:\n"
|
||||
result_summary += f" Success: {result.success}\n"
|
||||
if result.error:
|
||||
result_summary += f" Error: {result.error}\n"
|
||||
|
||||
if result.documents:
|
||||
result_summary += f" Documents: {len(result.documents)} document(s)\n"
|
||||
for doc in result.documents:
|
||||
doc_name = getattr(doc, 'documentName', 'Unknown')
|
||||
doc_mime = getattr(doc, 'mimeType', 'Unknown')
|
||||
result_summary += f" - {doc_name} ({doc_mime})\n"
|
||||
else:
|
||||
result_summary += f" Documents: None\n"
|
||||
|
||||
return result_summary
|
||||
elif hasattr(context, 'observation') and context.observation:
|
||||
return json.dumps(context.observation, ensure_ascii=False)
|
||||
else:
|
||||
return "No review content available"
|
||||
1
modules/workflows/processing/shared/__init__.py
Normal file
1
modules/workflows/processing/shared/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
# Shared workflow utilities
|
||||
|
|
@ -4,8 +4,8 @@
|
|||
import logging
|
||||
from typing import List
|
||||
from datetime import datetime, UTC
|
||||
from modules.datamodels.datamodelWorkflow import TaskStep
|
||||
from modules.datamodels.datamodelWorkflow import ActionResult
|
||||
from modules.datamodels.datamodelChat import TaskStep
|
||||
from modules.datamodels.datamodelChat import ActionResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -58,7 +58,7 @@ class TaskExecutionState:
|
|||
patterns.append("permission_issues")
|
||||
return list(set(patterns))
|
||||
|
||||
def should_continue(observation, review=None, current_step: int = 0, max_steps: int = 5) -> bool:
|
||||
def shouldContinue(observation, review=None, current_step: int = 0, max_steps: int = 5) -> bool:
|
||||
"""Helper to decide if the iterative loop should continue
|
||||
- Stop if review indicates 'stop' or success criteria are met
|
||||
- Stop on failure with no retry path
|
||||
131
modules/workflows/processing/shared/methodDiscovery.py
Normal file
131
modules/workflows/processing/shared/methodDiscovery.py
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
# methodDiscovery.py
|
||||
# Method discovery and management for workflow execution
|
||||
|
||||
import json
|
||||
import logging
|
||||
import importlib
|
||||
import pkgutil
|
||||
import inspect
|
||||
from typing import Any, Dict, List
|
||||
from modules.datamodels.datamodelChat import TaskContext, ReviewContext, DocumentExchange
|
||||
from modules.workflows.methods.methodBase import MethodBase
|
||||
|
||||
# Set up logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Global methods catalog - moved from serviceCenter
|
||||
methods = {}
|
||||
|
||||
def discoverMethods(serviceCenter):
|
||||
"""Dynamically discover all method classes and their actions in modules methods package"""
|
||||
try:
|
||||
# Import the methods package
|
||||
methodsPackage = importlib.import_module('modules.workflows.methods')
|
||||
|
||||
# Discover all modules in the package
|
||||
for _, name, isPkg in pkgutil.iter_modules(methodsPackage.__path__):
|
||||
if not isPkg and name.startswith('method'):
|
||||
try:
|
||||
# Import the module
|
||||
module = importlib.import_module(f'modules.workflows.methods.{name}')
|
||||
|
||||
# Find all classes in the module that inherit from MethodBase
|
||||
for itemName, item in inspect.getmembers(module):
|
||||
if (inspect.isclass(item) and
|
||||
issubclass(item, MethodBase) and
|
||||
item != MethodBase):
|
||||
# Instantiate the method
|
||||
methodInstance = item(serviceCenter)
|
||||
|
||||
# Use the actions property from MethodBase which handles @action decorator
|
||||
actions = methodInstance.actions
|
||||
|
||||
# Create method info
|
||||
methodInfo = {
|
||||
'instance': methodInstance,
|
||||
'actions': actions,
|
||||
'description': item.__doc__ or f"Method {itemName}"
|
||||
}
|
||||
|
||||
# Store the method with full class name
|
||||
methods[itemName] = methodInfo
|
||||
|
||||
# Also store with short name for action executor access
|
||||
shortName = itemName.replace('Method', '').lower()
|
||||
methods[shortName] = methodInfo
|
||||
|
||||
logger.info(f"Discovered method {itemName} (short: {shortName}) with {len(actions)} actions")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error discovering method {name}: {str(e)}")
|
||||
continue
|
||||
|
||||
logger.info(f"Discovered {len(methods)} method entries total")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error discovering methods: {str(e)}")
|
||||
|
||||
def getMethodsList(serviceCenter):
|
||||
"""Get a list of available methods with their signatures"""
|
||||
if not methods:
|
||||
discoverMethods(serviceCenter)
|
||||
|
||||
methodsList = []
|
||||
for methodName, methodInfo in methods.items():
|
||||
methodDescription = methodInfo['description']
|
||||
actionsList = []
|
||||
|
||||
for actionName, actionInfo in methodInfo['actions'].items():
|
||||
actionDescription = actionInfo['description']
|
||||
parameters = actionInfo['parameters']
|
||||
|
||||
# Build parameter signature
|
||||
paramSig = []
|
||||
for paramName, paramInfo in parameters.items():
|
||||
paramType = paramInfo['type']
|
||||
paramRequired = paramInfo['required']
|
||||
paramDefault = paramInfo['default']
|
||||
|
||||
if paramRequired:
|
||||
paramSig.append(f"{paramName}: {paramType}")
|
||||
else:
|
||||
defaultStr = f" = {paramDefault}" if paramDefault is not None else " = None"
|
||||
paramSig.append(f"{paramName}: {paramType}{defaultStr}")
|
||||
|
||||
paramSignature = f"({', '.join(paramSig)})" if paramSig else "()"
|
||||
actionsList.append(f"- {actionName}{paramSignature}: {actionDescription}")
|
||||
|
||||
actionsStr = "\n".join(actionsList)
|
||||
methodsList.append(f"**{methodName}**: {methodDescription}\n{actionsStr}")
|
||||
|
||||
return "\n\n".join(methodsList)
|
||||
|
||||
def getActionParameterList(methodName: str, actionName: str, methods: Dict[str, Any]) -> str:
|
||||
"""Get action parameter list from method docstring for AI parameter generation (list only)."""
|
||||
try:
|
||||
if not methods or methodName not in methods:
|
||||
return ""
|
||||
|
||||
methodInstance = methods[methodName]['instance']
|
||||
if actionName not in methodInstance.actions:
|
||||
return ""
|
||||
|
||||
action_info = methodInstance.actions[actionName]
|
||||
# Extract parameter descriptions from docstring
|
||||
docstring = action_info.get('description', '')
|
||||
paramDescriptions, paramTypes = methodInstance._extractParameterDetails(docstring)
|
||||
|
||||
param_list = []
|
||||
for paramName, paramDesc in paramDescriptions.items():
|
||||
paramType = paramTypes.get(paramName, 'Any')
|
||||
if paramDesc:
|
||||
param_list.append(f"- {paramName} ({paramType}): {paramDesc}")
|
||||
else:
|
||||
param_list.append(f"- {paramName} ({paramType})")
|
||||
|
||||
# Return list only, without leading headings or trailing text
|
||||
return "\n".join(param_list)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting action parameter signature for {methodName}.{actionName}: {str(e)}")
|
||||
return ""
|
||||
|
||||
411
modules/workflows/processing/shared/placeholderFactory.py
Normal file
411
modules/workflows/processing/shared/placeholderFactory.py
Normal file
|
|
@ -0,0 +1,411 @@
|
|||
"""
|
||||
Placeholder Factory
|
||||
Centralized placeholder extraction functions for all workflow modes.
|
||||
Each function corresponds to a {{KEY:PLACEHOLDER_NAME}} in prompt templates.
|
||||
|
||||
NAMING CONVENTION:
|
||||
- All functions follow pattern: extract{PlaceholderName}()
|
||||
- Placeholder names are in UPPER_CASE with underscores
|
||||
- Function names are in camelCase
|
||||
|
||||
MAPPING TABLE (keys → function) with usage [taskplan | actionplan | react]:
|
||||
{{KEY:USER_PROMPT}} -> extractUserPrompt() [taskplan, actionplan, react]
|
||||
{{KEY:USER_LANGUAGE}} -> extractUserLanguage() [actionplan, react]
|
||||
{{KEY:WORKFLOW_HISTORY}} -> extractWorkflowHistory() [taskplan, actionplan, react]
|
||||
{{KEY:AVAILABLE_CONNECTIONS_INDEX}} -> extractAvailableConnectionsIndex() [actionplan, react]
|
||||
{{KEY:AVAILABLE_CONNECTIONS_SUMMARY}} -> extractAvailableConnectionsSummary() []
|
||||
{{KEY:AVAILABLE_DOCUMENTS_SUMMARY}} -> extractAvailableDocumentsSummary() [taskplan, actionplan, react]
|
||||
{{KEY:AVAILABLE_DOCUMENTS_INDEX}} -> extractAvailableDocumentsIndex() [react]
|
||||
{{KEY:AVAILABLE_METHODS}} -> extractAvailableMethods() [actionplan, react]
|
||||
{{KEY:REVIEW_CONTENT}} -> extractReviewContent() [actionplan, react]
|
||||
{{KEY:PREVIOUS_ACTION_RESULTS}} -> extractPreviousActionResults() [react]
|
||||
{{KEY:LEARNINGS_AND_IMPROVEMENTS}} -> extractLearningsAndImprovements() [react]
|
||||
{{KEY:LATEST_REFINEMENT_FEEDBACK}} -> extractLatestRefinementFeedback() [react]
|
||||
|
||||
Following placeholders are populated directly by prompt builders with according context in promptGenerationActionsReact module:
|
||||
- ACTION_OBJECTIVE,
|
||||
- SELECTED_ACTION,
|
||||
- ACTION_SIGNATURE
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
from modules.datamodels.datamodelChat import ChatDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from modules.workflows.processing.shared.methodDiscovery import (methods, discoverMethods)
|
||||
|
||||
def extractUserPrompt(context: Any) -> str:
|
||||
"""Extract user prompt from context. Maps to {{KEY:USER_PROMPT}}.
|
||||
Prefer the cleaned intent stored on the services object if available via context.
|
||||
Fallback to the task_step objective.
|
||||
"""
|
||||
try:
|
||||
# Prefer services.currentUserPrompt when accessible through context
|
||||
services = getattr(context, 'services', None)
|
||||
if services and getattr(services, 'currentUserPrompt', None):
|
||||
return services.currentUserPrompt
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if hasattr(context, 'task_step') and context.task_step:
|
||||
return context.task_step.objective or 'No request specified'
|
||||
return 'No request specified'
|
||||
|
||||
def extractWorkflowHistory(service: Any, context: Any) -> str:
|
||||
"""Extract workflow history from context. Maps to {{KEY:WORKFLOW_HISTORY}}
|
||||
Reverse-chronological, enriched with message summaries and document labels.
|
||||
"""
|
||||
# Prefer explicit workflow on context; else fall back to services.workflow
|
||||
workflow = None
|
||||
try:
|
||||
if hasattr(context, 'workflow') and context.workflow:
|
||||
workflow = context.workflow
|
||||
elif hasattr(service, 'workflow') and service.workflow:
|
||||
workflow = service.workflow
|
||||
except Exception:
|
||||
workflow = None
|
||||
|
||||
if workflow:
|
||||
history = getPreviousRoundContext(service, workflow)
|
||||
return history or "No previous workflow rounds available"
|
||||
return "No previous workflow rounds available"
|
||||
|
||||
def extractAvailableMethods(service: Any) -> str:
|
||||
"""Extract available methods for action planning. Maps to {{KEY:AVAILABLE_METHODS}}"""
|
||||
try:
|
||||
# Get the methods dictionary directly from the global methods variable
|
||||
if not methods:
|
||||
discoverMethods(service)
|
||||
|
||||
# Create a flat JSON format with compound action names for better AI parsing
|
||||
available_actions_json = {}
|
||||
for methodName, methodInfo in methods.items():
|
||||
# Convert MethodAi -> ai, MethodDocument -> document, etc.
|
||||
shortName = methodName.replace('Method', '').lower()
|
||||
|
||||
for actionName, actionInfo in methodInfo['actions'].items():
|
||||
# Create compound action name: method.action
|
||||
compoundActionName = f"{shortName}.{actionName}"
|
||||
# Get the action description
|
||||
action_description = actionInfo.get('description', f"Execute {actionName} action")
|
||||
available_actions_json[compoundActionName] = action_description
|
||||
|
||||
return json.dumps(available_actions_json, indent=2, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting available methods: {str(e)}")
|
||||
return json.dumps({}, indent=2, ensure_ascii=False)
|
||||
|
||||
def extractUserLanguage(service: Any) -> str:
|
||||
"""Extract user language from service. Maps to {{KEY:USER_LANGUAGE}}"""
|
||||
return service.user.language if service and service.user else 'en'
|
||||
|
||||
|
||||
def _computeMessageSummary(msg) -> str:
|
||||
"""Create a concise summary for a ChatMessage with documents only.
|
||||
Fields: documentCount, roundNumber, documentsLabel, document names, message (full), success flag.
|
||||
"""
|
||||
try:
|
||||
docs = getattr(msg, 'documents', []) or []
|
||||
if not docs:
|
||||
return "" # Only summarize messages that contain documents
|
||||
document_count = len(docs)
|
||||
round_number = getattr(msg, 'roundNumber', None) or 0
|
||||
label = getattr(msg, 'documentsLabel', None) or ""
|
||||
# Collect ALL document names (supports ChatDocument objects and dicts)
|
||||
doc_names = []
|
||||
for d in docs:
|
||||
name = None
|
||||
try:
|
||||
if isinstance(d, dict):
|
||||
# For dict objects, try multiple possible field names
|
||||
name = d.get('fileName') or d.get('documentName') or d.get('name') or d.get('filename')
|
||||
else:
|
||||
# For ChatDocument objects, use fileName field
|
||||
name = getattr(d, 'fileName', None) or getattr(d, 'documentName', None) or getattr(d, 'name', None) or getattr(d, 'filename', None)
|
||||
except Exception:
|
||||
name = None
|
||||
doc_names.append(name or "(unnamed)")
|
||||
# Format document names in brackets
|
||||
if doc_names:
|
||||
names_part = f"({', '.join(doc_names)})"
|
||||
else:
|
||||
names_part = "(no documents)"
|
||||
|
||||
# Don't truncate the message - show full content
|
||||
user_message = (getattr(msg, 'message', '') or '').strip().replace("\n", " ")
|
||||
# Read success from ChatMessage.success field
|
||||
success_flag = getattr(msg, 'success', None)
|
||||
success_text = "success=True" if success_flag is True else ("success=False" if success_flag is False else "success=Unknown")
|
||||
label_part = f" label='{label}'" if label else ""
|
||||
|
||||
# Add learning/feedback if available
|
||||
learning_part = ""
|
||||
if hasattr(msg, 'summary') and msg.summary and 'learnings' in msg.summary.lower():
|
||||
learning_part = " | learnings available"
|
||||
|
||||
return f"Round {round_number}: {document_count} docs {names_part}{label_part} | {success_text}{learning_part} | msg='{user_message}'"
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def getMessageSummary(msg) -> str:
|
||||
"""Return existing ChatMessage.summary or compute, set, and return it (documents only)."""
|
||||
try:
|
||||
if getattr(msg, 'summary', None):
|
||||
return msg.summary
|
||||
summary = _computeMessageSummary(msg)
|
||||
# Persist in-memory only; caller can store if desired
|
||||
if summary:
|
||||
try:
|
||||
setattr(msg, 'summary', summary)
|
||||
except Exception:
|
||||
pass
|
||||
return summary
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
def getPreviousRoundContext(services, workflow: Any) -> str:
|
||||
"""Get enriched context:
|
||||
- Reverse-chronological ordering
|
||||
- Current round first (newest → oldest), then older rounds
|
||||
- Only messages with documents summarized
|
||||
- Include available documents snapshot at end
|
||||
"""
|
||||
try:
|
||||
if not workflow:
|
||||
return "No previous round context available"
|
||||
|
||||
lines: List[str] = []
|
||||
|
||||
# Reverse-chronological, current round first
|
||||
try:
|
||||
msgs = getattr(workflow, 'messages', []) or []
|
||||
current_round = getattr(workflow, 'currentRound', None)
|
||||
current_round_msgs: List[Any] = []
|
||||
previous_round_msgs: List[Any] = []
|
||||
for m in msgs:
|
||||
if current_round is not None and getattr(m, 'roundNumber', None) == current_round:
|
||||
current_round_msgs.append(m)
|
||||
else:
|
||||
previous_round_msgs.append(m)
|
||||
|
||||
for m in reversed(current_round_msgs):
|
||||
s = getMessageSummary(m)
|
||||
if s:
|
||||
lines.append(f"- {s}")
|
||||
for m in reversed(previous_round_msgs):
|
||||
s = getMessageSummary(m)
|
||||
if s:
|
||||
lines.append(f"- {s}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Include available documents snapshot at end
|
||||
try:
|
||||
if hasattr(services, 'workflow'):
|
||||
docs_index = services.workflow.getAvailableDocuments(workflow)
|
||||
if docs_index and docs_index != "No documents available":
|
||||
doc_count = docs_index.count("docItem:") # Only count actual documents, not document list labels
|
||||
lines.append(f"Available documents: {doc_count}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not lines:
|
||||
return "No previous round context available"
|
||||
return "\n".join(lines)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting previous round context: {str(e)}")
|
||||
return "Error retrieving previous round context"
|
||||
|
||||
def extractReviewContent(context: Any) -> str:
|
||||
"""Extract review content for result validation. Maps to {{KEY:REVIEW_CONTENT}}"""
|
||||
try:
|
||||
if hasattr(context, 'action_results') and context.action_results:
|
||||
# Build result summary
|
||||
result_summary = ""
|
||||
for i, result in enumerate(context.action_results):
|
||||
result_summary += f"\nRESULT {i+1}:\n"
|
||||
result_summary += f" Success: {result.success}\n"
|
||||
if result.error:
|
||||
result_summary += f" Error: {result.error}\n"
|
||||
|
||||
if result.documents:
|
||||
result_summary += f" Documents: {len(result.documents)} document(s)\n"
|
||||
for doc in result.documents:
|
||||
# Extract all available metadata without content
|
||||
doc_metadata = {
|
||||
"name": getattr(doc, 'fileName', None) or getattr(doc, 'documentName', 'Unknown'),
|
||||
"mimeType": getattr(doc, 'mimeType', 'Unknown'),
|
||||
"size": getattr(doc, 'size', 'Unknown'),
|
||||
"created": getattr(doc, 'created', 'Unknown'),
|
||||
"modified": getattr(doc, 'modified', 'Unknown'),
|
||||
"typeGroup": getattr(doc, 'typeGroup', 'Unknown'),
|
||||
"documentId": getattr(doc, 'documentId', 'Unknown'),
|
||||
"reference": getattr(doc, 'reference', 'Unknown')
|
||||
}
|
||||
# Remove 'Unknown' values to keep it clean
|
||||
doc_metadata = {k: v for k, v in doc_metadata.items() if v != 'Unknown'}
|
||||
result_summary += f" - {json.dumps(doc_metadata, indent=6, ensure_ascii=False)}\n"
|
||||
else:
|
||||
result_summary += f" Documents: None\n"
|
||||
|
||||
return result_summary
|
||||
elif hasattr(context, 'observation') and context.observation:
|
||||
# For observation data, show full content but handle documents specially
|
||||
if isinstance(context.observation, dict):
|
||||
# Create a copy to modify
|
||||
obs_copy = context.observation.copy()
|
||||
|
||||
# If there are previews with documents, show only metadata
|
||||
if 'previews' in obs_copy and isinstance(obs_copy['previews'], list):
|
||||
for preview in obs_copy['previews']:
|
||||
if isinstance(preview, dict) and 'snippet' in preview:
|
||||
# Replace snippet with metadata indicator
|
||||
preview['snippet'] = f"[Content: {len(preview.get('snippet', ''))} characters]"
|
||||
|
||||
return json.dumps(obs_copy, indent=2, ensure_ascii=False)
|
||||
else:
|
||||
return json.dumps(context.observation, ensure_ascii=False)
|
||||
elif hasattr(context, 'step_result') and context.step_result and 'observation' in context.step_result:
|
||||
# For observation data in step_result, show full content but handle documents specially
|
||||
observation = context.step_result['observation']
|
||||
if isinstance(observation, dict):
|
||||
# Create a copy to modify
|
||||
obs_copy = observation.copy()
|
||||
|
||||
# If there are previews with documents, show only metadata
|
||||
if 'previews' in obs_copy and isinstance(obs_copy['previews'], list):
|
||||
for preview in obs_copy['previews']:
|
||||
if isinstance(preview, dict) and 'snippet' in preview:
|
||||
# Replace snippet with metadata indicator
|
||||
preview['snippet'] = f"[Content: {len(preview.get('snippet', ''))} characters]"
|
||||
|
||||
return json.dumps(obs_copy, indent=2, ensure_ascii=False)
|
||||
else:
|
||||
return json.dumps(observation, ensure_ascii=False)
|
||||
else:
|
||||
return "No review content available"
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting review content: {str(e)}")
|
||||
return "No review content available"
|
||||
|
||||
def extractPreviousActionResults(context: Any) -> str:
|
||||
"""Extract previous action results for learning context. Maps to {{KEY:PREVIOUS_ACTION_RESULTS}}"""
|
||||
try:
|
||||
if not hasattr(context, 'previous_action_results') or not context.previous_action_results:
|
||||
return "No previous actions executed yet"
|
||||
|
||||
results = []
|
||||
for i, result in enumerate(context.previous_action_results[-5:], 1): # Last 5 results
|
||||
if hasattr(result, 'resultLabel') and hasattr(result, 'status'):
|
||||
status = "SUCCESS" if result.status == "completed" else "FAILED"
|
||||
results.append(f"Action {i}: {result.resultLabel} - {status}")
|
||||
if hasattr(result, 'error') and result.error:
|
||||
results.append(f" Error: {result.error}")
|
||||
|
||||
return "\n".join(results) if results else "No previous actions executed yet"
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting previous action results: {str(e)}")
|
||||
return "No previous actions executed yet"
|
||||
|
||||
def extractLearningsAndImprovements(context: Any) -> str:
|
||||
"""Extract learnings and improvements from previous actions. Maps to {{KEY:LEARNINGS_AND_IMPROVEMENTS}}"""
|
||||
try:
|
||||
learnings = []
|
||||
|
||||
# Get improvements from context
|
||||
if hasattr(context, 'improvements') and context.improvements and isinstance(context.improvements, list):
|
||||
learnings.append("IMPROVEMENTS:")
|
||||
for improvement in context.improvements[-3:]: # Last 3 improvements
|
||||
learnings.append(f"- {improvement}")
|
||||
|
||||
# Get failure patterns
|
||||
if hasattr(context, 'failure_patterns') and context.failure_patterns and isinstance(context.failure_patterns, list):
|
||||
learnings.append("FAILURE PATTERNS TO AVOID:")
|
||||
for pattern in context.failure_patterns[-3:]: # Last 3 patterns
|
||||
learnings.append(f"- {pattern}")
|
||||
|
||||
# Get successful actions
|
||||
if hasattr(context, 'successful_actions') and context.successful_actions and isinstance(context.successful_actions, list):
|
||||
learnings.append("SUCCESSFUL APPROACHES:")
|
||||
for action in context.successful_actions[-3:]: # Last 3 successful
|
||||
learnings.append(f"- {action}")
|
||||
|
||||
return "\n".join(learnings) if learnings else "No learnings available yet"
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting learnings and improvements: {str(e)}")
|
||||
return "No learnings available yet"
|
||||
|
||||
def extractLatestRefinementFeedback(context: Any) -> str:
|
||||
"""Extract the latest refinement feedback. Maps to {{KEY:LATEST_REFINEMENT_FEEDBACK}}"""
|
||||
try:
|
||||
if not hasattr(context, 'previous_review_result') or not context.previous_review_result or not isinstance(context.previous_review_result, list):
|
||||
return "No previous refinement feedback available"
|
||||
|
||||
# Get the most recent refinement decision
|
||||
latest_decision = context.previous_review_result[-1]
|
||||
if not isinstance(latest_decision, dict):
|
||||
return "No previous refinement feedback available"
|
||||
|
||||
feedback_parts = []
|
||||
|
||||
# Add decision and reason
|
||||
decision = latest_decision.get('decision', 'unknown')
|
||||
reason = latest_decision.get('reason', 'No reason provided')
|
||||
feedback_parts.append(f"Latest Decision: {decision}")
|
||||
feedback_parts.append(f"Reason: {reason}")
|
||||
|
||||
# Add any specific feedback or suggestions
|
||||
if 'feedback' in latest_decision:
|
||||
feedback_parts.append(f"Feedback: {latest_decision['feedback']}")
|
||||
|
||||
if 'suggestions' in latest_decision:
|
||||
feedback_parts.append(f"Suggestions: {latest_decision['suggestions']}")
|
||||
|
||||
return "\n".join(feedback_parts)
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting latest refinement feedback: {str(e)}")
|
||||
return "No previous refinement feedback available"
|
||||
|
||||
def extractAvailableDocumentsSummary(service: Any, context: Any) -> str:
|
||||
"""Summary of available documents (count only)."""
|
||||
try:
|
||||
documents = service.workflow.getAvailableDocuments(context.workflow)
|
||||
if documents and documents != "No documents available":
|
||||
doc_count = documents.count("docList:") + documents.count("docItem:")
|
||||
return f"{doc_count} documents available from previous tasks"
|
||||
return "No documents available"
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting document summary: {str(e)}")
|
||||
return "No documents available"
|
||||
|
||||
def extractAvailableDocumentsIndex(service: Any, context: Any) -> str:
|
||||
"""Index of available documents with detailed references for parameter generation."""
|
||||
try:
|
||||
return service.workflow.getAvailableDocuments(context.workflow)
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting document index: {str(e)}")
|
||||
return "No documents available"
|
||||
|
||||
def extractAvailableConnectionsSummary(service: Any) -> str:
|
||||
"""Summary of available connections (count only)."""
|
||||
try:
|
||||
connections = service.workflow.getConnectionReferenceList()
|
||||
if connections:
|
||||
return f"{len(connections)} connections available"
|
||||
return "No connections available"
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting connection summary: {str(e)}")
|
||||
return "No connections available"
|
||||
|
||||
def extractAvailableConnectionsIndex(service: Any) -> str:
|
||||
"""Index of available connections with detailed references for parameter generation."""
|
||||
try:
|
||||
connections = service.workflow.getConnectionReferenceList()
|
||||
if connections:
|
||||
return '\n'.join(f"- {conn}" for conn in connections)
|
||||
return "No connections available"
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting connection index: {str(e)}")
|
||||
return "No connections available"
|
||||
|
|
@ -0,0 +1,236 @@
|
|||
"""
|
||||
Actionplan Mode Prompt Generation
|
||||
Handles prompt templates and extraction functions for actionplan mode action handling.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
from modules.datamodels.datamodelChat import PromptBundle, PromptPlaceholder
|
||||
from modules.workflows.processing.shared.placeholderFactory import (
|
||||
extractUserPrompt,
|
||||
extractAvailableDocumentsSummary,
|
||||
extractWorkflowHistory,
|
||||
extractAvailableMethods,
|
||||
extractUserLanguage,
|
||||
extractAvailableConnectionsIndex,
|
||||
extractReviewContent,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def generateActionDefinitionPrompt(services, context: Any) -> PromptBundle:
|
||||
"""Define placeholders first, then the template; return PromptBundle."""
|
||||
placeholders: List[PromptPlaceholder] = [
|
||||
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
|
||||
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_SUMMARY", content=extractAvailableDocumentsSummary(services, context), summaryAllowed=True),
|
||||
PromptPlaceholder(label="AVAILABLE_CONNECTIONS_INDEX", content=extractAvailableConnectionsIndex(services), summaryAllowed=False),
|
||||
PromptPlaceholder(label="WORKFLOW_HISTORY", content=extractWorkflowHistory(services, context), summaryAllowed=True),
|
||||
PromptPlaceholder(label="AVAILABLE_METHODS", content=extractAvailableMethods(services), summaryAllowed=False),
|
||||
PromptPlaceholder(label="USER_LANGUAGE", content=extractUserLanguage(services), summaryAllowed=False),
|
||||
]
|
||||
|
||||
template = """# Action Definition
|
||||
|
||||
Generate the next action to advance toward completing the task objective.
|
||||
|
||||
## 📋 Context
|
||||
|
||||
### Task Objective
|
||||
{{KEY:USER_PROMPT}}
|
||||
|
||||
### Available Documents
|
||||
{{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
|
||||
|
||||
### Available Connections
|
||||
{{KEY:AVAILABLE_CONNECTIONS_INDEX}}
|
||||
|
||||
### User Language
|
||||
{{KEY:USER_LANGUAGE}}
|
||||
|
||||
### Workflow History
|
||||
{{KEY:WORKFLOW_HISTORY}}
|
||||
|
||||
### Available Methods
|
||||
{{KEY:AVAILABLE_METHODS}}
|
||||
|
||||
## ⚠️ RULES
|
||||
|
||||
### Action Names
|
||||
- **Use EXACT compound action names** from AVAILABLE_METHODS (e.g., "ai.process", "document.extract", "web.search")
|
||||
- **DO NOT create** new action names - only use those listed in AVAILABLE_METHODS
|
||||
- **DO NOT separate** method and action names - use the full compound name
|
||||
|
||||
### Parameter Guidelines
|
||||
- **Use exact document references** from AVAILABLE_DOCUMENTS_INDEX
|
||||
- **Use exact connection references** from AVAILABLE_CONNECTIONS_INDEX
|
||||
- **Include user language** if relevant
|
||||
- **Avoid unnecessary fields** - host applies defaults
|
||||
|
||||
## 📊 Required JSON Structure
|
||||
|
||||
```json
|
||||
{
|
||||
"actions": [
|
||||
{
|
||||
"action": "method.action_name",
|
||||
"parameters": {},
|
||||
"resultLabel": "round{current_round}_task{current_task}_action{action_number}_{descriptive_label}",
|
||||
"description": "What this action accomplishes",
|
||||
"userMessage": "User-friendly message in {{KEY:USER_LANGUAGE}}"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## ✅ Correct Example
|
||||
|
||||
```json
|
||||
{
|
||||
"actions": [
|
||||
{
|
||||
"action": "document.extract",
|
||||
"parameters": {"documentList": ["docList:msg_123:results"]},
|
||||
"resultLabel": "round1_task1_action1_extract_results",
|
||||
"description": "Extract data from documents",
|
||||
"userMessage": "Extracting data from documents"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## 🎯 Action Planning Guidelines
|
||||
|
||||
### Method Selection
|
||||
- **Choose appropriate method** based on task requirements
|
||||
- **Consider available resources** (documents, connections)
|
||||
- **Match method capabilities** to task objectives
|
||||
|
||||
### Parameter Design
|
||||
- **Use ACTION SIGNATURE** to understand required parameters
|
||||
- **Convert objective** into appropriate parameter values
|
||||
- **Include all required parameters** for the action
|
||||
|
||||
### Result Labeling
|
||||
- **Use descriptive labels** that explain what the action produces
|
||||
- **Follow naming convention**: `round{round}_task{task}_action{action}_{label}`
|
||||
- **Make labels meaningful** for future reference
|
||||
|
||||
### User Messages
|
||||
- **Write in user language** ({{KEY:USER_LANGUAGE}})
|
||||
- **Explain what's happening** in user-friendly terms
|
||||
- **Keep messages concise** but informative
|
||||
|
||||
## 🚀 Response Format
|
||||
Return ONLY the JSON object."""
|
||||
|
||||
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||
|
||||
def generateResultReviewPrompt(context: Any) -> PromptBundle:
|
||||
"""Define placeholders first, then the template; return PromptBundle."""
|
||||
placeholders: List[PromptPlaceholder] = [
|
||||
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
|
||||
PromptPlaceholder(label="REVIEW_CONTENT", content=extractReviewContent(context), summaryAllowed=True),
|
||||
]
|
||||
|
||||
template = """# Result Review & Validation
|
||||
|
||||
Review task execution outcomes and determine success, retry needs, or failure.
|
||||
|
||||
## 📋 Context
|
||||
|
||||
### Task Objective
|
||||
{{KEY:USER_PROMPT}}
|
||||
|
||||
### Execution Results
|
||||
{{KEY:REVIEW_CONTENT}}
|
||||
|
||||
## 🔍 Validation Criteria
|
||||
|
||||
### Action Assessment
|
||||
- **Review each action's success/failure status**
|
||||
- **Check if required documents were produced**
|
||||
- **Validate document quality and completeness**
|
||||
- **Assess if success criteria were met**
|
||||
- **Identify any missing or incomplete outputs**
|
||||
|
||||
### Decision Making
|
||||
- **Determine if retry would help** or if task should be marked as failed
|
||||
- **Consider business value** and user satisfaction
|
||||
- **Evaluate technical execution** and results quality
|
||||
|
||||
## 📊 Required JSON Structure
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "success|retry|failed",
|
||||
"reason": "Detailed explanation of the validation decision",
|
||||
"improvements": ["specific improvement 1", "specific improvement 2"],
|
||||
"quality_score": 8,
|
||||
"met_criteria": ["criteria1", "criteria2"],
|
||||
"unmet_criteria": ["criteria3", "criteria4"],
|
||||
"confidence": 0.85,
|
||||
"userMessage": "User-friendly message explaining the validation result"
|
||||
}
|
||||
```
|
||||
|
||||
## 🎯 Validation Principles
|
||||
|
||||
### Assessment Approach
|
||||
- **Be thorough but fair** in assessment
|
||||
- **Focus on business value** and outcomes
|
||||
- **Consider both technical execution** and business results
|
||||
- **Provide specific, actionable** improvement suggestions
|
||||
|
||||
### Quality Scoring
|
||||
- **Use quality scores** to track progress across retries
|
||||
- **Scale 1-10**: 1 = Poor, 5 = Average, 10 = Excellent
|
||||
- **Consider completeness, accuracy, and usefulness**
|
||||
|
||||
### Criteria Evaluation
|
||||
- **Clearly identify** which success criteria were met vs. unmet
|
||||
- **List specific criteria** that were achieved
|
||||
- **Note missing requirements** that need attention
|
||||
|
||||
### Confidence Levels
|
||||
- **Set appropriate confidence levels** based on evidence quality
|
||||
- **Scale 0.0-1.0**: 0.0 = No confidence, 1.0 = Complete confidence
|
||||
- **Consider data quality** and result reliability
|
||||
|
||||
## 📝 Status Definitions
|
||||
|
||||
### Success
|
||||
- **All objectives met** - User got what they asked for
|
||||
- **Quality standards met** - Results are complete and accurate
|
||||
- **No retry needed** - Task is fully complete
|
||||
|
||||
### Retry
|
||||
- **Partial success** - Some but not all objectives met
|
||||
- **Improvement possible** - Retry could lead to better results
|
||||
- **Technical issues** - Action failures that can be resolved
|
||||
|
||||
### Failed
|
||||
- **No progress made** - Objectives not achieved
|
||||
- **Technical limitations** - Cannot be resolved with retry
|
||||
- **Resource constraints** - Missing required inputs
|
||||
|
||||
## 💡 Improvement Suggestions
|
||||
|
||||
### Actionable Improvements
|
||||
- **Be specific** - Don't just say "improve quality"
|
||||
- **Focus on process** - How to do better next time
|
||||
- **Consider resources** - What additional inputs might help
|
||||
- **Technical fixes** - Address specific technical issues
|
||||
|
||||
### Examples
|
||||
- "Use more specific document references from AVAILABLE_DOCUMENTS_INDEX"
|
||||
- "Include user language parameter for better localization"
|
||||
- "Break down complex objective into smaller, focused actions"
|
||||
- "Verify document references before processing"
|
||||
|
||||
## 🚀 Response Format
|
||||
Return ONLY the JSON object. Do not include any explanatory text."""
|
||||
|
||||
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||
|
||||
|
|
@ -0,0 +1,237 @@
|
|||
"""
|
||||
React Mode Prompt Generation
|
||||
Handles prompt templates for react mode action handling.
|
||||
"""
|
||||
|
||||
from typing import Any, List
|
||||
from modules.datamodels.datamodelChat import PromptBundle, PromptPlaceholder
|
||||
from modules.workflows.processing.shared.placeholderFactory import (
|
||||
extractUserPrompt,
|
||||
extractUserLanguage,
|
||||
extractAvailableMethods,
|
||||
extractAvailableDocumentsSummary,
|
||||
extractAvailableDocumentsIndex,
|
||||
extractAvailableConnectionsIndex,
|
||||
extractPreviousActionResults,
|
||||
extractLearningsAndImprovements,
|
||||
extractLatestRefinementFeedback,
|
||||
extractWorkflowHistory,
|
||||
)
|
||||
from modules.workflows.processing.shared.methodDiscovery import methods, getActionParameterList
|
||||
|
||||
def generateReactPlanSelectionPrompt(services, context: Any) -> PromptBundle:
|
||||
"""Define placeholders first, then the template; return PromptBundle."""
|
||||
placeholders: List[PromptPlaceholder] = [
|
||||
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
|
||||
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_SUMMARY", content=extractAvailableDocumentsSummary(services, context), summaryAllowed=True),
|
||||
PromptPlaceholder(label="AVAILABLE_METHODS", content=extractAvailableMethods(services), summaryAllowed=False),
|
||||
# Provide enriched history context for Stage 1 to craft parametersContext
|
||||
PromptPlaceholder(label="WORKFLOW_HISTORY", content=extractWorkflowHistory(services, context), summaryAllowed=True),
|
||||
# Provide deterministic indexes so the planner can choose exact labels
|
||||
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_INDEX", content=extractAvailableDocumentsIndex(services, context), summaryAllowed=True),
|
||||
PromptPlaceholder(label="AVAILABLE_CONNECTIONS_INDEX", content=extractAvailableConnectionsIndex(services), summaryAllowed=False),
|
||||
]
|
||||
|
||||
template = """Select exactly one action to advance the task.
|
||||
|
||||
OBJECTIVE:
|
||||
{{KEY:USER_PROMPT}}
|
||||
|
||||
AVAILABLE_DOCUMENTS_SUMMARY:
|
||||
{{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
|
||||
|
||||
AVAILABLE_METHODS:
|
||||
{{KEY:AVAILABLE_METHODS}}
|
||||
|
||||
WORKFLOW_HISTORY (reverse-chronological, enriched):
|
||||
{{KEY:WORKFLOW_HISTORY}}
|
||||
|
||||
AVAILABLE_DOCUMENTS_INDEX:
|
||||
{{KEY:AVAILABLE_DOCUMENTS_INDEX}}
|
||||
|
||||
AVAILABLE_CONNECTIONS_INDEX:
|
||||
{{KEY:AVAILABLE_CONNECTIONS_INDEX}}
|
||||
|
||||
REPLY: Return ONLY a JSON object with the following structure (no comments, no extra text):
|
||||
{{
|
||||
"action": "method.action_name",
|
||||
"actionObjective": "...",
|
||||
"learnings": ["..."],
|
||||
"requiredInputDocuments": ["docList:..."],
|
||||
"requiredConnection": "connection:..." | null,
|
||||
"parametersContext": "concise text that Stage 2 will use to set business parameters"
|
||||
}}
|
||||
|
||||
EXAMPLE how to assign references from AVAILABLE_DOCUMENTS_INDEX and AVAILABLE_CONNECTIONS_INDEX:
|
||||
"requiredInputDocuments": ["docList:msg_47a7a578-e8f2-4ba8-ac66-0dbff40605e0:round8_task1_action1_results","docItem:5d8b7aee-b546-4487-b6a8-835c86f7b186:AI_Generated_Document_20251006-104256.docx"],
|
||||
"requiredConnection": "connection:msft:p.motsch@valueon.ch:1ae8b8e5-128b-49b8-b1cb-7c632669eeae",
|
||||
|
||||
RULES:
|
||||
1. Use EXACT action names from AVAILABLE_METHODS
|
||||
2. Do NOT output a "parameters" object
|
||||
3. parametersContext must be short and sufficient for Stage 2
|
||||
4. Return ONLY JSON - no markdown, no explanations
|
||||
5. For requiredInputDocuments, use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
|
||||
6. For requiredConnection, use ONLY an exact label from AVAILABLE_CONNECTIONS_INDEX
|
||||
"""
|
||||
|
||||
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||
|
||||
def generateReactParametersPrompt(services, context: Any, compoundActionName: str) -> PromptBundle:
|
||||
"""Define placeholders first, then the template; return PromptBundle.
|
||||
|
||||
Minimal Stage 2 (no fallback): consumes actionObjective, selectedAction, parametersContext only.
|
||||
Excludes documents/connections/history entirely.
|
||||
"""
|
||||
# derive method/action and parameter list
|
||||
methodName, actionName = (compoundActionName.split('.', 1) if '.' in compoundActionName else (compoundActionName, ''))
|
||||
actionParameterList = getActionParameterList(methodName, actionName, methods)
|
||||
|
||||
def _formatBusinessParameters(params) -> str:
|
||||
excluded = {"documentList", "connectionReference"}
|
||||
# Case 1: params is a list of dicts or objects with 'name'
|
||||
if isinstance(params, (list, tuple)):
|
||||
entries = []
|
||||
for p in params:
|
||||
try:
|
||||
if isinstance(p, dict):
|
||||
name = p.get("name")
|
||||
if not name or name in excluded:
|
||||
continue
|
||||
ptype = p.get("type") or p.get("dataType") or ""
|
||||
req = p.get("required")
|
||||
reqTxt = "required" if (req is True or str(req).lower() == "true") else "optional"
|
||||
desc = p.get("description") or p.get("desc") or ""
|
||||
entry = f"- {name} ({ptype}, {reqTxt})" + (f": {desc}" if desc else "")
|
||||
entries.append(entry)
|
||||
else:
|
||||
# Try attribute access
|
||||
name = getattr(p, "name", None)
|
||||
if not name or name in excluded:
|
||||
continue
|
||||
ptype = getattr(p, "type", "") or getattr(p, "dataType", "")
|
||||
req = getattr(p, "required", False)
|
||||
reqTxt = "required" if (req is True or str(req).lower() == "true") else "optional"
|
||||
desc = getattr(p, "description", None) or getattr(p, "desc", None) or ""
|
||||
entry = f"- {name} ({ptype}, {reqTxt})" + (f": {desc}" if desc else "")
|
||||
entries.append(entry)
|
||||
except Exception:
|
||||
continue
|
||||
return "\n".join(entries)
|
||||
# Case 2: params is a string description: filter out lines mentioning excluded names
|
||||
if isinstance(params, str):
|
||||
lines = [ln for ln in params.splitlines() if not any(ex in ln for ex in excluded)]
|
||||
return "\n".join(lines).strip()
|
||||
# Fallback: plain string
|
||||
try:
|
||||
return str(params)
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
actionParametersText = _formatBusinessParameters(actionParameterList)
|
||||
|
||||
# determine action objective if available, else fall back to user prompt
|
||||
if hasattr(context, 'action_objective') and context.action_objective:
|
||||
actionObjective = context.action_objective
|
||||
elif hasattr(context, 'task_step') and context.task_step and getattr(context.task_step, 'objective', None):
|
||||
actionObjective = context.task_step.objective
|
||||
else:
|
||||
actionObjective = extractUserPrompt(context)
|
||||
|
||||
# Minimal Stage 2 (no fallback)
|
||||
parametersContext = getattr(context, 'parameters_context', None)
|
||||
learningsText = ""
|
||||
try:
|
||||
# If Stage 1 learnings were attached to context, pass them textually
|
||||
if hasattr(context, 'learnings') and context.learnings:
|
||||
if isinstance(context.learnings, (list, tuple)):
|
||||
learningsText = "\n".join(f"- {str(x)}" for x in context.learnings)
|
||||
else:
|
||||
learningsText = str(context.learnings)
|
||||
except Exception:
|
||||
learningsText = ""
|
||||
|
||||
placeholders: List[PromptPlaceholder] = [
|
||||
PromptPlaceholder(label="ACTION_OBJECTIVE", content=actionObjective, summaryAllowed=False),
|
||||
PromptPlaceholder(label="SELECTED_ACTION", content=compoundActionName, summaryAllowed=False),
|
||||
PromptPlaceholder(label="PARAMETERS_CONTEXT", content=(parametersContext or ""), summaryAllowed=True),
|
||||
PromptPlaceholder(label="ACTION_PARAMETERS", content=actionParametersText, summaryAllowed=False),
|
||||
PromptPlaceholder(label="LEARNINGS", content=learningsText, summaryAllowed=True),
|
||||
]
|
||||
|
||||
template = """You are a parameter generator. Set the parameters for this specific action.
|
||||
|
||||
CONTEXT AND OBJECTIVE:
|
||||
-----------------
|
||||
{{KEY:ACTION_OBJECTIVE}}
|
||||
-----------------
|
||||
|
||||
SELECTED_ACTION:
|
||||
{{KEY:SELECTED_ACTION}}
|
||||
|
||||
|
||||
REPLY (ONLY JSON):
|
||||
{{
|
||||
"schema": "parameters_v1",
|
||||
"parameters": {{
|
||||
"paramName": "value"
|
||||
}}
|
||||
}}
|
||||
|
||||
|
||||
CONTEXT FOR PARAMETER VALUES:
|
||||
-----------------
|
||||
{{KEY:PARAMETERS_CONTEXT}}
|
||||
-----------------
|
||||
|
||||
LEARNINGS (from prior attempts, if any):
|
||||
{{KEY:LEARNINGS}}
|
||||
|
||||
REQUIRED PARAMETERS FOR THIS ACTION (use these exact parameter names):
|
||||
{{KEY:ACTION_PARAMETERS}}
|
||||
|
||||
INSTRUCTIONS:
|
||||
- Use ONLY the parameter names listed in section REQUIRED PARAMETERS FOR THIS ACTION
|
||||
- Fill in appropriate values based on the context and objective
|
||||
- Do NOT invent new parameters
|
||||
- Do NOT include: documentList, connectionReference, history, documents, connections
|
||||
|
||||
RULES:
|
||||
- Return ONLY JSON (no markdown, no prose)
|
||||
- Use ONLY the exact parameter names listed in REQUIRED PARAMETERS FOR THIS ACTION
|
||||
- Do NOT add any parameters not listed above
|
||||
- Do NOT add nested objects or custom fields
|
||||
"""
|
||||
|
||||
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||
|
||||
def generateReactRefinementPrompt(services, context: Any, reviewContent: str) -> PromptBundle:
|
||||
"""Define placeholders first, then the template; return PromptBundle."""
|
||||
placeholders: List[PromptPlaceholder] = [
|
||||
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
|
||||
PromptPlaceholder(label="REVIEW_CONTENT", content=reviewContent, summaryAllowed=True),
|
||||
]
|
||||
|
||||
template = """Decide the next step based on the observation.
|
||||
|
||||
OBJECTIVE:
|
||||
{{KEY:USER_PROMPT}}
|
||||
|
||||
OBSERVATION:
|
||||
{{KEY:REVIEW_CONTENT}}
|
||||
|
||||
REPLY: Return only a JSON object with your decision:
|
||||
{{
|
||||
"decision": "continue|stop",
|
||||
"reason": "brief explanation"
|
||||
}}
|
||||
|
||||
RULES:
|
||||
1. Use "continue" if objective NOT fulfilled
|
||||
2. Use "stop" if objective fulfilled
|
||||
3. Return ONLY JSON - no other text
|
||||
4. Do NOT use markdown code blocks
|
||||
5. Do NOT add explanations
|
||||
"""
|
||||
|
||||
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||
121
modules/workflows/processing/shared/promptGenerationTaskplan.py
Normal file
121
modules/workflows/processing/shared/promptGenerationTaskplan.py
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
"""
|
||||
Task Planning Prompt Generation
|
||||
Handles prompt templates and extraction functions for task planning phase.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
from modules.datamodels.datamodelChat import PromptBundle, PromptPlaceholder
|
||||
from modules.workflows.processing.shared.placeholderFactory import (
|
||||
extractUserPrompt,
|
||||
extractAvailableDocumentsSummary,
|
||||
extractWorkflowHistory,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def generateTaskPlanningPrompt(services, context: Any) -> PromptBundle:
|
||||
"""Define placeholders first, then the template; return PromptBundle."""
|
||||
placeholders: List[PromptPlaceholder] = [
|
||||
PromptPlaceholder(label="USER_PROMPT", content=extractUserPrompt(context), summaryAllowed=False),
|
||||
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_SUMMARY", content=extractAvailableDocumentsSummary(services, context), summaryAllowed=True),
|
||||
PromptPlaceholder(label="WORKFLOW_HISTORY", content=extractWorkflowHistory(services, context), summaryAllowed=True),
|
||||
]
|
||||
|
||||
template = """# Task Planning
|
||||
|
||||
Break down user requests into logical, executable task steps.
|
||||
|
||||
## 📋 Context
|
||||
|
||||
### User Request
|
||||
{{KEY:USER_PROMPT}}
|
||||
|
||||
### Available Documents
|
||||
{{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
|
||||
|
||||
### Previous Workflow Rounds
|
||||
{{KEY:WORKFLOW_HISTORY}}
|
||||
|
||||
## 📝 Task Planning Rules
|
||||
|
||||
### Strategic Task Grouping
|
||||
- **GROUP RELATED ACTIONS** - Combine all actions for the same business topic into ONE task
|
||||
- **ONE TOPIC PER TASK** - Each task should handle one complete business objective
|
||||
- **HIGH-LEVEL FOCUS** - Plan strategic outcomes, not implementation steps
|
||||
- **AVOID MICRO-TASKS** - Don't create separate tasks for each small action
|
||||
|
||||
### Task Grouping Examples
|
||||
- **Research + Analysis + Report** → ONE task: "Web research report"
|
||||
- **Data Collection + Processing + Visualization** → ONE task: "Collect and present data"
|
||||
- **Different topics** (email + flowers) → SEPARATE tasks: "Send formal email..." + "Order flowers from Fleurop for delivery to 123 Main St, include card message"
|
||||
|
||||
### Retry Handling
|
||||
- **If retry request**: Analyze previous rounds to understand what failed
|
||||
- **Learn from mistakes**: Improve the plan based on previous failures
|
||||
|
||||
## 📊 Required JSON Structure
|
||||
|
||||
```json
|
||||
{
|
||||
"overview": "Brief description of the overall plan",
|
||||
"languageUserDetected": "en",
|
||||
"userMessage": "User-friendly message explaining the task plan",
|
||||
"tasks": [
|
||||
{
|
||||
"id": "task_1",
|
||||
"objective": "Clear business objective focusing on what to deliver",
|
||||
"dependencies": ["task_0"],
|
||||
"success_criteria": ["measurable criteria 1", "measurable criteria 2"],
|
||||
"estimated_complexity": "low|medium|high",
|
||||
"userMessage": "What this task will accomplish"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## 🎯 Task Structure Guidelines
|
||||
|
||||
### Task ID Format
|
||||
- Use sequential numbering: `task_1`, `task_2`, `task_3`
|
||||
- Keep IDs simple and clear
|
||||
|
||||
### Objective Writing
|
||||
- **Be VERY SPECIFIC** - Include exact details needed for action planning
|
||||
- **Include all requirements** - recipient, attachments, format, recipients, etc.
|
||||
- **State the complete deliverable** - What exactly will be produced
|
||||
- **Include context and constraints** - When, where, how, with what
|
||||
- **Make it actionable** - Clear enough to plan specific actions
|
||||
|
||||
### Specific Objective Examples
|
||||
- **Good**: "Send formal email to ceo and board of directors with annual report as attachment"
|
||||
- **Bad**: "Handle email communication"
|
||||
- **Good**: "Order flowers from Fleurop for delivery to 123 Main St, include card message 'Happy Birthday', deliver on March 15th"
|
||||
- **Bad**: "Order flowers"
|
||||
|
||||
### Action Planning Requirements
|
||||
- **Include all necessary details** - The objective must contain everything needed to plan actions
|
||||
- **Specify recipients and destinations** - Who should receive what
|
||||
- **Include file names and formats** - What documents to use/create
|
||||
- **State timing and deadlines** - When things need to be done
|
||||
- **Include context and constraints** - Any special requirements or limitations
|
||||
|
||||
### Success Criteria
|
||||
- **Make them measurable** - specific, quantifiable outcomes
|
||||
- **Focus on deliverables** - what the user will receive
|
||||
- **Keep criteria realistic** - achievable within the task scope
|
||||
- **Include all related actions** - success means completing the entire business objective
|
||||
- **Be specific about requirements** - Include exact details like recipients, formats, deadlines
|
||||
- **State clear completion criteria** - How to know the task is fully done
|
||||
|
||||
### Complexity Estimation
|
||||
- **Low**: Simple, single-action tasks (1-2 actions)
|
||||
- **Medium**: Multi-action tasks for one topic (3-5 actions)
|
||||
- **High**: Complex strategic tasks (6+ actions)
|
||||
|
||||
## 🚀 Response Format
|
||||
Return ONLY the JSON object."""
|
||||
|
||||
return PromptBundle(prompt=template, placeholders=placeholders)
|
||||
216
modules/workflows/processing/shared/securityUtils.py
Normal file
216
modules/workflows/processing/shared/securityUtils.py
Normal file
|
|
@ -0,0 +1,216 @@
|
|||
"""
|
||||
Security utilities for AI prompt construction.
|
||||
Provides secure content escaping to prevent prompt injection attacks.
|
||||
"""
|
||||
|
||||
import re
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Union, List, Dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def _escapeForAiPrompt(content: str) -> str:
|
||||
"""
|
||||
Securely escape content for AI prompts to prevent injection attacks.
|
||||
|
||||
This function:
|
||||
1. Escapes all special characters that could break prompt structure
|
||||
2. Wraps content in secure delimiters
|
||||
3. Handles multi-line content safely
|
||||
4. Prevents quote injection and context breaking
|
||||
|
||||
Args:
|
||||
content: The content to escape
|
||||
|
||||
Returns:
|
||||
Safely escaped content wrapped in secure delimiters
|
||||
"""
|
||||
if not content:
|
||||
return ""
|
||||
|
||||
# Convert to string if not already
|
||||
content_str = str(content)
|
||||
|
||||
# Remove or escape dangerous characters that could break prompt structure
|
||||
# This includes quotes, backslashes, and other special characters
|
||||
escaped = content_str
|
||||
|
||||
# Escape backslashes first (order matters)
|
||||
escaped = escaped.replace('\\', '\\\\')
|
||||
|
||||
# Escape quotes and other special characters
|
||||
escaped = escaped.replace('"', '\\"')
|
||||
escaped = escaped.replace("'", "\\'")
|
||||
escaped = escaped.replace('\n', '\\n')
|
||||
escaped = escaped.replace('\r', '\\r')
|
||||
escaped = escaped.replace('\t', '\\t')
|
||||
|
||||
# Remove or escape other potentially dangerous characters
|
||||
# Remove control characters except newlines (already handled above)
|
||||
escaped = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', escaped)
|
||||
|
||||
# Wrap in secure delimiters with clear boundaries
|
||||
# Using a unique delimiter pattern that's unlikely to appear in user content
|
||||
secure_delimiter_start = "===USER_CONTENT_START==="
|
||||
secure_delimiter_end = "===USER_CONTENT_END==="
|
||||
|
||||
return f"{secure_delimiter_start}\n{escaped}\n{secure_delimiter_end}"
|
||||
|
||||
def _escapeForJsonPrompt(content: Any) -> str:
|
||||
"""
|
||||
Securely escape content for JSON-based AI prompts.
|
||||
|
||||
Args:
|
||||
content: The content to escape (can be any type)
|
||||
|
||||
Returns:
|
||||
Safely escaped JSON string
|
||||
"""
|
||||
try:
|
||||
# Convert to JSON string with proper escaping
|
||||
json_str = json.dumps(content, ensure_ascii=False, separators=(',', ':'))
|
||||
return json_str
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to escape content as JSON: {str(e)}")
|
||||
# Fallback to string escaping
|
||||
return _escapeForAiPrompt(str(content))
|
||||
|
||||
def _escapeForListPrompt(items: List[Any]) -> str:
|
||||
"""
|
||||
Securely escape a list of items for AI prompts.
|
||||
|
||||
Args:
|
||||
items: List of items to escape
|
||||
|
||||
Returns:
|
||||
Safely escaped list representation
|
||||
"""
|
||||
if not items:
|
||||
return "[]"
|
||||
|
||||
try:
|
||||
escaped_items = []
|
||||
for item in items:
|
||||
if isinstance(item, (dict, list)):
|
||||
escaped_items.append(_escapeForJsonPrompt(item))
|
||||
else:
|
||||
escaped_items.append(_escapeForAiPrompt(str(item)))
|
||||
|
||||
return f"[{', '.join(escaped_items)}]"
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to escape list content: {str(e)}")
|
||||
return "[]"
|
||||
|
||||
def securePromptContent(content: Any, content_type: str = "text") -> str:
|
||||
"""
|
||||
Main function to securely escape content for AI prompts.
|
||||
|
||||
Args:
|
||||
content: The content to escape
|
||||
content_type: Type of content ("text", "json", "list", "user_prompt", "document_content")
|
||||
|
||||
Returns:
|
||||
Safely escaped content ready for AI prompt insertion
|
||||
"""
|
||||
if content is None:
|
||||
return ""
|
||||
|
||||
try:
|
||||
if content_type == "json":
|
||||
return _escapeForJsonPrompt(content)
|
||||
elif content_type == "list":
|
||||
if isinstance(content, list):
|
||||
return _escapeForListPrompt(content)
|
||||
else:
|
||||
return _escapeForAiPrompt(str(content))
|
||||
elif content_type in ["user_prompt", "document_content"]:
|
||||
# Extra security for user-controlled content
|
||||
escaped = _escapeForAiPrompt(str(content))
|
||||
# Add additional warning for AI
|
||||
return f"⚠️ USER_CONTROLLED_CONTENT: {escaped}"
|
||||
else: # content_type == "text" or default
|
||||
return _escapeForAiPrompt(str(content))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error escaping content for AI prompt: {str(e)}")
|
||||
# Return a safe fallback
|
||||
return "[ERROR: Content could not be safely escaped]"
|
||||
|
||||
def buildSecurePrompt(template: str, **kwargs) -> str:
|
||||
"""
|
||||
Build a secure AI prompt by safely inserting content into a template.
|
||||
|
||||
Args:
|
||||
template: The prompt template with {key} placeholders
|
||||
**kwargs: Key-value pairs for template substitution
|
||||
|
||||
Returns:
|
||||
Securely constructed prompt
|
||||
"""
|
||||
try:
|
||||
# Escape all values before substitution
|
||||
escaped_kwargs = {}
|
||||
for key, value in kwargs.items():
|
||||
if key.endswith('_json'):
|
||||
escaped_kwargs[key] = securePromptContent(value, "json")
|
||||
elif key.endswith('_list'):
|
||||
escaped_kwargs[key] = securePromptContent(value, "list")
|
||||
elif key in ['user_prompt', 'context', 'document_content', 'user_input']:
|
||||
escaped_kwargs[key] = securePromptContent(value, "user_prompt")
|
||||
else:
|
||||
escaped_kwargs[key] = securePromptContent(value, "text")
|
||||
|
||||
# Use safe string formatting
|
||||
return template.format(**escaped_kwargs)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error building secure prompt: {str(e)}")
|
||||
return template # Return original template if escaping fails
|
||||
|
||||
def validatePromptSecurity(prompt: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Validate that a prompt is secure and doesn't contain injection patterns.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to validate
|
||||
|
||||
Returns:
|
||||
Dictionary with validation results
|
||||
"""
|
||||
issues = []
|
||||
|
||||
# Check for unescaped quotes that could break JSON
|
||||
if '"' in prompt and '\\"' not in prompt:
|
||||
# Check if quotes are properly escaped
|
||||
unescaped_quotes = re.findall(r'(?<!\\)"', prompt)
|
||||
if unescaped_quotes:
|
||||
issues.append("Unescaped quotes detected")
|
||||
|
||||
# Check for potential injection patterns
|
||||
injection_patterns = [
|
||||
r'ignore\s+previous\s+instructions',
|
||||
r'forget\s+everything',
|
||||
r'you\s+are\s+now',
|
||||
r'system\s*:',
|
||||
r'assistant\s*:',
|
||||
r'user\s*:',
|
||||
r'<\|.*\|>', # Special tokens
|
||||
]
|
||||
|
||||
for pattern in injection_patterns:
|
||||
if re.search(pattern, prompt, re.IGNORECASE):
|
||||
issues.append(f"Potential injection pattern detected: {pattern}")
|
||||
|
||||
# Check for proper content delimiters
|
||||
if "===USER_CONTENT_START===" not in prompt and "===USER_CONTENT_END===" not in prompt:
|
||||
# This might be okay for some prompts, but flag for review
|
||||
if any(keyword in prompt.lower() for keyword in ['context', 'user', 'input', 'prompt']):
|
||||
issues.append("User content may not be properly delimited")
|
||||
|
||||
return {
|
||||
"is_secure": len(issues) == 0,
|
||||
"issues": issues,
|
||||
"prompt_length": len(prompt),
|
||||
"has_user_content_delimiters": "===USER_CONTENT_START===" in prompt
|
||||
}
|
||||
335
modules/workflows/processing/workflowProcessor.py
Normal file
335
modules/workflows/processing/workflowProcessor.py
Normal file
|
|
@ -0,0 +1,335 @@
|
|||
# workflowProcessor.py
|
||||
# Main workflow processor with delegation pattern
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, Optional, List
|
||||
from modules.datamodels.datamodelChat import TaskStep, TaskContext, TaskPlan, TaskResult, ReviewResult
|
||||
from modules.datamodels.datamodelChat import ChatWorkflow
|
||||
from modules.workflows.processing.modes.modeBase import BaseMode
|
||||
from modules.workflows.processing.modes.modeActionplan import ActionplanMode
|
||||
from modules.workflows.processing.modes.modeReact import ReactMode
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class WorkflowStoppedException(Exception):
|
||||
"""Exception raised when a workflow is stopped by the user."""
|
||||
pass
|
||||
|
||||
class WorkflowProcessor:
|
||||
"""Main workflow processor that delegates to appropriate mode implementations"""
|
||||
|
||||
def __init__(self, services, workflow=None):
|
||||
self.services = services
|
||||
self.workflow = workflow
|
||||
self.mode = self._createMode(workflow.workflowMode if workflow else "Actionplan")
|
||||
|
||||
def _createMode(self, workflowMode: str) -> BaseMode:
|
||||
"""Create the appropriate mode implementation based on workflow mode"""
|
||||
if workflowMode == "React":
|
||||
return ReactMode(self.services, self.workflow)
|
||||
else:
|
||||
return ActionplanMode(self.services, self.workflow)
|
||||
|
||||
def _checkWorkflowStopped(self, workflow):
|
||||
"""Check if workflow has been stopped by user and raise exception if so"""
|
||||
try:
|
||||
# Get the current workflow status from the database to avoid stale data
|
||||
current_workflow = self.services.interfaceDbChat.getWorkflow(workflow.id)
|
||||
if current_workflow and current_workflow.status == "stopped":
|
||||
logger.info("Workflow stopped by user, aborting processing")
|
||||
raise WorkflowStoppedException("Workflow was stopped by user")
|
||||
except Exception as e:
|
||||
# If we can't get the current status due to other database issues, fall back to the in-memory object
|
||||
logger.warning(f"Could not check current workflow status from database: {str(e)}")
|
||||
if workflow and workflow.status == "stopped":
|
||||
logger.info("Workflow stopped by user (from in-memory object), aborting processing")
|
||||
raise WorkflowStoppedException("Workflow was stopped by user")
|
||||
|
||||
async def generateTaskPlan(self, userInput: str, workflow: ChatWorkflow) -> TaskPlan:
|
||||
"""Generate a high-level task plan for the workflow"""
|
||||
try:
|
||||
# Check workflow status before generating task plan
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
logger.info(f"=== STARTING TASK PLAN GENERATION ===")
|
||||
logger.info(f"Workflow ID: {workflow.id}")
|
||||
logger.info(f"User Input: {userInput}")
|
||||
logger.info(f"Workflow Mode: {workflow.workflowMode}")
|
||||
|
||||
# Delegate to the appropriate mode
|
||||
taskPlan = await self.mode.generateTaskPlan(userInput, workflow)
|
||||
|
||||
# Create task plan message
|
||||
await self.mode.createTaskPlanMessage(taskPlan, workflow)
|
||||
|
||||
return taskPlan
|
||||
except Exception as e:
|
||||
logger.error(f"Error in generateTaskPlan: {str(e)}")
|
||||
raise
|
||||
|
||||
async def executeTask(self, taskStep: TaskStep, workflow: ChatWorkflow, context: TaskContext,
|
||||
taskIndex: int = None, totalTasks: int = None) -> TaskResult:
|
||||
"""Execute a task step using the appropriate mode"""
|
||||
try:
|
||||
# Check workflow status before executing task
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
logger.info(f"=== STARTING TASK EXECUTION ===")
|
||||
logger.info(f"Task: {taskStep.objective}")
|
||||
logger.info(f"Mode: {workflow.workflowMode}")
|
||||
|
||||
# Delegate to the appropriate mode
|
||||
return await self.mode.executeTask(taskStep, workflow, context, taskIndex, totalTasks)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in executeTask: {str(e)}")
|
||||
raise
|
||||
|
||||
async def generateActionItems(self, taskStep: TaskStep, workflow: ChatWorkflow,
|
||||
previousResults: List = None, enhancedContext: TaskContext = None) -> List:
|
||||
"""Generate actions for a task step using the appropriate mode"""
|
||||
try:
|
||||
# Check workflow status before generating actions
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
logger.info(f"=== STARTING ACTION GENERATION ===")
|
||||
logger.info(f"Task: {taskStep.objective}")
|
||||
logger.info(f"Mode: {workflow.workflowMode}")
|
||||
|
||||
# Delegate to the appropriate mode
|
||||
return await self.mode.generateActionItems(taskStep, workflow, previousResults, enhancedContext)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in generateActionItems: {str(e)}")
|
||||
raise
|
||||
|
||||
def updateWorkflowAfterTaskPlanCreated(self, totalTasks: int):
|
||||
"""Update workflow object after task plan creation"""
|
||||
try:
|
||||
updateData = {
|
||||
"totalTasks": totalTasks,
|
||||
"currentTask": 0,
|
||||
"currentAction": 0,
|
||||
"totalActions": 0
|
||||
}
|
||||
|
||||
# Update workflow object
|
||||
self.workflow.totalTasks = totalTasks
|
||||
self.workflow.currentTask = 0
|
||||
self.workflow.currentAction = 0
|
||||
self.workflow.totalActions = 0
|
||||
|
||||
# Update in database
|
||||
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||
logger.info(f"Updated workflow {self.workflow.id} after task plan creation: {updateData}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating workflow after task plan creation: {str(e)}")
|
||||
|
||||
def updateWorkflowBeforeExecutingTask(self, taskNumber: int):
|
||||
"""Update workflow object before executing a task"""
|
||||
try:
|
||||
updateData = {
|
||||
"currentTask": taskNumber,
|
||||
"currentAction": 0,
|
||||
"totalActions": 0
|
||||
}
|
||||
|
||||
# Update workflow object
|
||||
self.workflow.currentTask = taskNumber
|
||||
self.workflow.currentAction = 0
|
||||
self.workflow.totalActions = 0
|
||||
|
||||
# Update in database
|
||||
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||
logger.info(f"Updated workflow {self.workflow.id} before executing task {taskNumber}: {updateData}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating workflow before executing task: {str(e)}")
|
||||
|
||||
def updateWorkflowAfterActionPlanning(self, totalActions: int):
|
||||
"""Update workflow object after action planning for current task"""
|
||||
try:
|
||||
updateData = {
|
||||
"totalActions": totalActions
|
||||
}
|
||||
|
||||
# Update workflow object
|
||||
self.workflow.totalActions = totalActions
|
||||
|
||||
# Update in database
|
||||
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||
logger.info(f"Updated workflow {self.workflow.id} after action planning: {updateData}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating workflow after action planning: {str(e)}")
|
||||
|
||||
def updateWorkflowBeforeExecutingAction(self, actionNumber: int):
|
||||
"""Update workflow object before executing an action"""
|
||||
try:
|
||||
updateData = {
|
||||
"currentAction": actionNumber
|
||||
}
|
||||
|
||||
# Update workflow object
|
||||
self.workflow.currentAction = actionNumber
|
||||
|
||||
# Update in database
|
||||
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||
logger.info(f"Updated workflow {self.workflow.id} before executing action {actionNumber}: {updateData}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating workflow before executing action: {str(e)}")
|
||||
|
||||
def setWorkflowTotals(self, totalTasks: int = None, totalActions: int = None):
|
||||
"""Set total counts for workflow progress tracking and update database"""
|
||||
try:
|
||||
updateData = {}
|
||||
|
||||
if totalTasks is not None:
|
||||
self.workflow.totalTasks = totalTasks
|
||||
updateData["totalTasks"] = totalTasks
|
||||
|
||||
if totalActions is not None:
|
||||
self.workflow.totalActions = totalActions
|
||||
updateData["totalActions"] = totalActions
|
||||
|
||||
# Update workflow object in database if we have changes
|
||||
if updateData:
|
||||
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||
logger.info(f"Updated workflow {self.workflow.id} totals in database: {updateData}")
|
||||
|
||||
logger.debug(f"Updated workflow totals: Tasks {self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 'N/A'}, Actions {self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 'N/A'}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting workflow totals: {str(e)}")
|
||||
|
||||
def resetWorkflowForNewSession(self):
|
||||
"""Reset workflow object for a new session"""
|
||||
try:
|
||||
updateData = {
|
||||
"currentTask": 0,
|
||||
"currentAction": 0,
|
||||
"totalTasks": 0,
|
||||
"totalActions": 0
|
||||
}
|
||||
|
||||
# Update workflow object
|
||||
self.workflow.currentTask = 0
|
||||
self.workflow.currentAction = 0
|
||||
self.workflow.totalTasks = 0
|
||||
self.workflow.totalActions = 0
|
||||
|
||||
# Update in database
|
||||
self.services.interfaceDbChat.updateWorkflow(self.workflow.id, updateData)
|
||||
logger.info(f"Reset workflow {self.workflow.id} for new session: {updateData}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error resetting workflow for new session: {str(e)}")
|
||||
|
||||
def writeTraceLog(self, contextText: str, data: Any) -> None:
|
||||
"""Write trace data to configured trace file if in debug mode"""
|
||||
try:
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime, UTC
|
||||
|
||||
# Only write if logger is in debug mode
|
||||
if logger.level > logging.DEBUG:
|
||||
return
|
||||
|
||||
# Get log directory from configuration
|
||||
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
|
||||
if not os.path.isabs(logDir):
|
||||
# If relative path, make it relative to the gateway directory
|
||||
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
logDir = os.path.join(gatewayDir, logDir)
|
||||
|
||||
# Ensure log directory exists
|
||||
os.makedirs(logDir, exist_ok=True)
|
||||
|
||||
# Create trace file path
|
||||
traceFile = os.path.join(logDir, "log_trace.log")
|
||||
|
||||
# Format the trace entry
|
||||
timestamp = datetime.fromtimestamp(self.services.utils.getUtcTimestamp(), UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||
traceEntry = f"[{timestamp}] {contextText}\n"
|
||||
|
||||
# Add data if provided - show full content without truncation
|
||||
if data is not None:
|
||||
if isinstance(data, (dict, list)):
|
||||
# Use ensure_ascii=False to preserve Unicode characters and indent=2 for readability
|
||||
traceEntry += f"Data: {json.dumps(data, indent=2, default=str, ensure_ascii=False)}\n"
|
||||
else:
|
||||
# For string data, show full content without truncation
|
||||
traceEntry += f"Data: {str(data)}\n"
|
||||
|
||||
traceEntry += "-" * 80 + "\n\n"
|
||||
|
||||
# Write to trace file
|
||||
with open(traceFile, "a", encoding="utf-8") as f:
|
||||
f.write(traceEntry)
|
||||
|
||||
except Exception as e:
|
||||
# Don't log trace errors to avoid recursion
|
||||
pass
|
||||
|
||||
def clearTraceLog(self) -> None:
|
||||
"""Clear the trace log file"""
|
||||
try:
|
||||
import os
|
||||
|
||||
# Get log directory from configuration
|
||||
logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
|
||||
if not os.path.isabs(logDir):
|
||||
# If relative path, make it relative to the gateway directory
|
||||
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
logDir = os.path.join(gatewayDir, logDir)
|
||||
|
||||
# Create trace file path
|
||||
traceFile = os.path.join(logDir, "log_trace.log")
|
||||
|
||||
# Clear the trace file
|
||||
if os.path.exists(traceFile):
|
||||
with open(traceFile, "w", encoding="utf-8") as f:
|
||||
f.write("")
|
||||
logger.info("Trace log cleared")
|
||||
else:
|
||||
logger.info("Trace log file does not exist, nothing to clear")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error clearing trace log: {str(e)}")
|
||||
|
||||
async def prepareTaskHandover(self, taskStep, taskActions, taskResult, workflow):
|
||||
"""Prepare task handover data for workflow coordination"""
|
||||
try:
|
||||
# Check workflow status before preparing task handover
|
||||
self._checkWorkflowStopped(workflow)
|
||||
|
||||
# Log handover status summary
|
||||
status = taskResult.status if taskResult else 'unknown'
|
||||
|
||||
# Handle both TaskResult and ReviewResult objects
|
||||
if hasattr(taskResult, 'met_criteria'):
|
||||
# This is a ReviewResult object
|
||||
met = taskResult.met_criteria if taskResult.met_criteria else []
|
||||
reviewResult = taskResult.to_dict()
|
||||
else:
|
||||
# This is a TaskResult object
|
||||
met = []
|
||||
reviewResult = {
|
||||
'status': taskResult.status if taskResult else 'unknown',
|
||||
'reason': taskResult.error if taskResult and hasattr(taskResult, 'error') else None,
|
||||
'success': taskResult.success if taskResult else False
|
||||
}
|
||||
|
||||
handoverData = {
|
||||
'task_id': taskStep.id,
|
||||
'task_description': taskStep.objective,
|
||||
'actions': [action.to_dict() for action in taskActions] if taskActions else [],
|
||||
'review_result': reviewResult,
|
||||
'workflow_id': workflow.id,
|
||||
'handover_time': self.services.utils.getUtcTimestamp()
|
||||
}
|
||||
logger.info(f"Prepared handover for task {taskStep.id} in workflow {workflow.id}")
|
||||
return handoverData
|
||||
except Exception as e:
|
||||
logger.error(f"Error in prepareTaskHandover: {str(e)}")
|
||||
return {'error': str(e)}
|
||||
|
|
@ -8,11 +8,11 @@ from modules.datamodels.datamodelChat import (
|
|||
UserInputRequest,
|
||||
ChatMessage,
|
||||
ChatWorkflow,
|
||||
ChatDocument,
|
||||
WorkflowResult
|
||||
ChatDocument
|
||||
)
|
||||
from modules.datamodels.datamodelWorkflow import TaskItem, TaskStatus, TaskContext
|
||||
from modules.workflows.processing.handlingTasks import HandlingTasks, WorkflowStoppedException
|
||||
from modules.datamodels.datamodelChat import TaskItem, TaskStatus, TaskContext
|
||||
from modules.workflows.processing.workflowProcessor import WorkflowProcessor, WorkflowStoppedException
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -22,11 +22,11 @@ class WorkflowManager:
|
|||
|
||||
def __init__(self, services):
|
||||
self.services = services
|
||||
self.handlingTasks = None
|
||||
self.workflowProcessor = None
|
||||
|
||||
# Exported functions
|
||||
|
||||
async def workflowStart(self, userInput: UserInputRequest, workflowId: Optional[str] = None, workflowMode: str = "Actionplan") -> ChatWorkflow:
|
||||
async def workflowStart(self, userInput: UserInputRequest, workflowId: Optional[str] = None, workflowMode: str = "React") -> ChatWorkflow:
|
||||
"""Starts a new workflow or continues an existing one, then launches processing."""
|
||||
try:
|
||||
# Debug log to check workflowMode parameter
|
||||
|
|
@ -38,8 +38,8 @@ class WorkflowManager:
|
|||
if not workflow:
|
||||
raise ValueError(f"Workflow {workflowId} not found")
|
||||
|
||||
# Add workflow to services
|
||||
self.services.workflow = workflow
|
||||
# Store workflow in services for reference (don't overwrite the workflow service)
|
||||
self.services.currentWorkflow = workflow
|
||||
|
||||
if workflow.status == "running":
|
||||
logger.info(f"Stopping running workflow {workflowId} before processing new prompt")
|
||||
|
|
@ -62,7 +62,8 @@ class WorkflowManager:
|
|||
self.services.workflow.updateWorkflow(workflowId, {
|
||||
"status": "running",
|
||||
"lastActivity": currentTime,
|
||||
"currentRound": newRound
|
||||
"currentRound": newRound,
|
||||
"workflowMode": workflowMode # Update workflow mode for existing workflows
|
||||
})
|
||||
|
||||
workflow = self.services.workflow.getWorkflow(workflowId)
|
||||
|
|
@ -71,11 +72,14 @@ class WorkflowManager:
|
|||
|
||||
self.services.workflow.createLog({
|
||||
"workflowId": workflowId,
|
||||
"message": f"Workflow resumed (round {workflow.currentRound})",
|
||||
"message": f"Workflow resumed (round {workflow.currentRound}) with mode: {workflowMode}",
|
||||
"type": "info",
|
||||
"status": "running",
|
||||
"progress": 0
|
||||
})
|
||||
|
||||
# CRITICAL: Update the workflow object's workflowMode attribute for immediate use
|
||||
workflow.workflowMode = workflowMode
|
||||
else:
|
||||
workflowData = {
|
||||
"name": "New Workflow",
|
||||
|
|
@ -108,8 +112,8 @@ class WorkflowManager:
|
|||
self.services.workflow.updateWorkflow(workflow.id, {"currentRound": 1})
|
||||
self.services.workflow.updateWorkflowStats(workflow.id, bytesSent=0, bytesReceived=0)
|
||||
|
||||
# Add workflow to services
|
||||
self.services.workflow = workflow
|
||||
# Store workflow in services for reference (don't overwrite the workflow service)
|
||||
self.services.currentWorkflow = workflow
|
||||
|
||||
# Start workflow processing asynchronously
|
||||
asyncio.create_task(self._workflowProcess(userInput, workflow))
|
||||
|
|
@ -149,11 +153,14 @@ class WorkflowManager:
|
|||
async def _workflowProcess(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> None:
|
||||
"""Process a workflow with user input"""
|
||||
try:
|
||||
self.handlingTasks = HandlingTasks(self.services, workflow)
|
||||
# Store the current user prompt in services for easy access throughout the workflow
|
||||
self.services.rawUserPrompt = userInput.prompt
|
||||
self.services.currentUserPrompt = userInput.prompt
|
||||
self.workflowProcessor = WorkflowProcessor(self.services, workflow)
|
||||
message = await self._sendFirstMessage(userInput, workflow)
|
||||
task_plan = await self._planTasks(userInput, workflow)
|
||||
workflow_result = await self._executeTasks(task_plan, workflow)
|
||||
await self._processWorkflowResults(workflow, workflow_result, message)
|
||||
await self._executeTasks(task_plan, workflow)
|
||||
await self._processWorkflowResults(workflow, message)
|
||||
|
||||
except WorkflowStoppedException:
|
||||
self._handleWorkflowStop(workflow)
|
||||
|
|
@ -166,14 +173,14 @@ class WorkflowManager:
|
|||
async def _sendFirstMessage(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> ChatMessage:
|
||||
"""Send first message to start workflow"""
|
||||
try:
|
||||
self.handlingTasks._checkWorkflowStopped()
|
||||
self.workflowProcessor._checkWorkflowStopped(workflow)
|
||||
|
||||
# Create initial message using interface
|
||||
# Generate the correct documentsLabel that matches what getDocumentReferenceString will create
|
||||
# For first user message, include round info in the user context label
|
||||
round_num = workflow.currentRound
|
||||
task_num = 0
|
||||
action_num = 0
|
||||
context_label = f"round{round_num}_task{task_num}_action{action_num}_context"
|
||||
context_label = f"round{round_num}_usercontext"
|
||||
|
||||
messageData = {
|
||||
"workflowId": workflow.id,
|
||||
|
|
@ -199,7 +206,7 @@ class WorkflowManager:
|
|||
workflow.messages.append(message)
|
||||
|
||||
# Clear trace log for new workflow session
|
||||
self.handlingTasks.clearTraceLog()
|
||||
self.workflowProcessor.clearTraceLog()
|
||||
|
||||
# Add documents if any, now with messageId
|
||||
if userInput.listFileId:
|
||||
|
|
@ -209,6 +216,128 @@ class WorkflowManager:
|
|||
# Update the message with documents in database
|
||||
self.services.workflow.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]})
|
||||
|
||||
# Analyze the user's input to extract intent and offload bulky context into documents
|
||||
try:
|
||||
analyzerPrompt = (
|
||||
"You are an input analyzer. Split the user's message into:\n"
|
||||
"1) intent: the user's core request in one concise paragraph, normalized to the user's language.\n"
|
||||
"2) contextItems: supportive data to attach as separate documents if significantly larger than the intent. "
|
||||
"Include large literal data blocks, long lists/tables, code/JSON blocks, quoted transcripts, CSV fragments, or detailed specs. "
|
||||
"Keep URLs in the intent unless they include large pasted content.\n\n"
|
||||
"Rules:\n"
|
||||
"- If total content length (intent + data) is less than 10% of the model's max tokens, do not extract; "
|
||||
"return an empty contextItems and keep a compact, self-contained intent.\n"
|
||||
"- If content exceeds that, move bulky parts into contextItems, keeping the intent short and clear.\n"
|
||||
"- Preserve critical references (URLs, filenames) in the intent.\n"
|
||||
"- Normalize the intent to the detected language. If mixed-language, use the primary detected language and normalize.\n\n"
|
||||
"Output JSON only (no markdown):\n"
|
||||
"{\n"
|
||||
" \"detectedLanguage\": \"en\",\n"
|
||||
" \"intent\": \"Concise normalized request...\",\n"
|
||||
" \"contextItems\": [\n"
|
||||
" {\n"
|
||||
" \"title\": \"User context 1\",\n"
|
||||
" \"mimeType\": \"text/plain\",\n"
|
||||
" \"content\": \"Full extracted content block here\"\n"
|
||||
" }\n"
|
||||
" ]\n"
|
||||
"}\n\n"
|
||||
f"User message:\n{userInput.prompt}"
|
||||
)
|
||||
|
||||
# Call AI analyzer
|
||||
aiResponse = await self.services.ai.callAi(prompt=analyzerPrompt)
|
||||
|
||||
detectedLanguage = None
|
||||
intentText = userInput.prompt
|
||||
contextItems = []
|
||||
|
||||
# Parse analyzer response (JSON expected)
|
||||
try:
|
||||
import json
|
||||
jsonStart = aiResponse.find('{') if aiResponse else -1
|
||||
jsonEnd = aiResponse.rfind('}') + 1 if aiResponse else 0
|
||||
if jsonStart != -1 and jsonEnd > jsonStart:
|
||||
parsed = json.loads(aiResponse[jsonStart:jsonEnd])
|
||||
detectedLanguage = parsed.get('detectedLanguage') or None
|
||||
if parsed.get('intent'):
|
||||
intentText = parsed.get('intent')
|
||||
contextItems = parsed.get('contextItems') or []
|
||||
except Exception:
|
||||
contextItems = []
|
||||
|
||||
# Update services state
|
||||
if detectedLanguage and isinstance(detectedLanguage, str):
|
||||
self._setUserLanguage(detectedLanguage)
|
||||
self.services.currentUserPrompt = intentText or userInput.prompt
|
||||
|
||||
# Telemetry (sizes and counts)
|
||||
try:
|
||||
inputSize = len(userInput.prompt.encode('utf-8')) if userInput and userInput.prompt else 0
|
||||
outputSize = len(aiResponse.encode('utf-8')) if aiResponse else 0
|
||||
self.services.workflow.createLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"User prompt analyzed (input {inputSize} bytes, output {outputSize} bytes, items {len(contextItems)})",
|
||||
"type": "info",
|
||||
"status": "running",
|
||||
"progress": 0
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Create and attach documents for context items
|
||||
if contextItems and isinstance(contextItems, list):
|
||||
created_docs = []
|
||||
for idx, item in enumerate(contextItems):
|
||||
try:
|
||||
title = item.get('title') if isinstance(item, dict) else None
|
||||
mime = item.get('mimeType') if isinstance(item, dict) else None
|
||||
content = item.get('content') if isinstance(item, dict) else None
|
||||
if not content:
|
||||
continue
|
||||
fileName = (title or f"user_context_{idx+1}.txt").strip()
|
||||
mimeType = (mime or "text/plain").strip()
|
||||
|
||||
# Create file in component storage
|
||||
content_bytes = content.encode('utf-8')
|
||||
file_item = self.services.interfaceDbComponent.createFile(
|
||||
name=fileName,
|
||||
mimeType=mimeType,
|
||||
content=content_bytes
|
||||
)
|
||||
# Persist file data
|
||||
self.services.interfaceDbComponent.createFileData(file_item.id, content_bytes)
|
||||
|
||||
# Collect file info
|
||||
file_info = self.services.workflow.getFileInfo(file_item.id)
|
||||
from modules.datamodels.datamodelChat import ChatDocument as _ChatDocument
|
||||
doc = _ChatDocument(
|
||||
messageId=message.id,
|
||||
fileId=file_item.id,
|
||||
fileName=file_info.get("fileName", fileName) if file_info else fileName,
|
||||
fileSize=file_info.get("size", len(content_bytes)) if file_info else len(content_bytes),
|
||||
mimeType=file_info.get("mimeType", mimeType) if file_info else mimeType
|
||||
)
|
||||
# Persist document record
|
||||
self.services.interfaceDbChat.createDocument(doc.to_dict())
|
||||
created_docs.append(doc)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if created_docs:
|
||||
# Attach to message and persist
|
||||
if not message.documents:
|
||||
message.documents = []
|
||||
message.documents.extend(created_docs)
|
||||
# Ensure label is user_context for discoverability
|
||||
message.documentsLabel = context_label
|
||||
self.services.workflow.updateMessage(message.id, {
|
||||
"documents": [d.to_dict() for d in message.documents],
|
||||
"documentsLabel": context_label
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning(f"Prompt analysis failed or skipped: {str(e)}")
|
||||
|
||||
return message
|
||||
else:
|
||||
raise Exception("Failed to create first message")
|
||||
|
|
@ -219,7 +348,7 @@ class WorkflowManager:
|
|||
|
||||
async def _planTasks(self, userInput: UserInputRequest, workflow: ChatWorkflow):
|
||||
"""Generate task plan for workflow execution"""
|
||||
handling = self.handlingTasks
|
||||
handling = self.workflowProcessor
|
||||
# Generate task plan first (shared for both modes)
|
||||
task_plan = await handling.generateTaskPlan(userInput.prompt, workflow)
|
||||
if not task_plan or not task_plan.tasks:
|
||||
|
|
@ -229,9 +358,9 @@ class WorkflowManager:
|
|||
logger.info(f"Executing workflow mode={workflow_mode} with {len(task_plan.tasks)} tasks")
|
||||
return task_plan
|
||||
|
||||
async def _executeTasks(self, task_plan, workflow: ChatWorkflow) -> WorkflowResult:
|
||||
"""Execute all tasks in the task plan"""
|
||||
handling = self.handlingTasks
|
||||
async def _executeTasks(self, task_plan, workflow: ChatWorkflow) -> None:
|
||||
"""Execute all tasks in the task plan and update workflow status."""
|
||||
handling = self.workflowProcessor
|
||||
total_tasks = len(task_plan.tasks)
|
||||
all_task_results: List = []
|
||||
previous_results: List[str] = []
|
||||
|
|
@ -240,7 +369,7 @@ class WorkflowManager:
|
|||
current_task_index = idx + 1
|
||||
logger.info(f"Task {current_task_index}/{total_tasks}: {task_step.objective}")
|
||||
|
||||
# Build TaskContext (mode-specific behavior is inside HandlingTasks)
|
||||
# Build TaskContext (mode-specific behavior is inside WorkflowProcessor)
|
||||
task_context = TaskContext(
|
||||
task_step=task_step,
|
||||
workflow=workflow,
|
||||
|
|
@ -274,19 +403,15 @@ class WorkflowManager:
|
|||
if task_result.success and task_result.feedback:
|
||||
previous_results.append(task_result.feedback)
|
||||
|
||||
return WorkflowResult(
|
||||
status="completed",
|
||||
completed_tasks=len(all_task_results),
|
||||
total_tasks=total_tasks,
|
||||
execution_time=0.0,
|
||||
final_results_count=len(all_task_results)
|
||||
)
|
||||
# Mark workflow as completed; error/stop cases update status elsewhere
|
||||
workflow.status = "completed"
|
||||
return None
|
||||
|
||||
async def _processWorkflowResults(self, workflow: ChatWorkflow, workflow_result: WorkflowResult, initial_message: ChatMessage) -> None:
|
||||
"""Process workflow results and create appropriate messages"""
|
||||
async def _processWorkflowResults(self, workflow: ChatWorkflow, initial_message: ChatMessage) -> None:
|
||||
"""Process workflow results based on workflow status and create appropriate messages"""
|
||||
try:
|
||||
try:
|
||||
self.handlingTasks._checkWorkflowStopped()
|
||||
self.workflowProcessor._checkWorkflowStopped(workflow)
|
||||
except WorkflowStoppedException:
|
||||
logger.info(f"Workflow {workflow.id} was stopped during result processing")
|
||||
|
||||
|
|
@ -321,7 +446,7 @@ class WorkflowManager:
|
|||
})
|
||||
return
|
||||
|
||||
if workflow_result.status == 'stopped':
|
||||
if workflow.status == 'stopped':
|
||||
# Create stopped message
|
||||
stopped_message = {
|
||||
"workflowId": workflow.id,
|
||||
|
|
@ -363,12 +488,12 @@ class WorkflowManager:
|
|||
"progress": 100
|
||||
})
|
||||
return
|
||||
elif workflow_result.status == 'failed':
|
||||
elif workflow.status == 'failed':
|
||||
# Create error message
|
||||
error_message = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": f"Workflow failed: {workflow_result.error or 'Unknown error'}",
|
||||
"message": f"Workflow failed: {'Unknown error'}",
|
||||
"status": "last",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": self.services.utils.getUtcTimestamp(),
|
||||
|
|
@ -399,7 +524,7 @@ class WorkflowManager:
|
|||
# Add failed log entry
|
||||
self.services.workflow.createLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Workflow failed: {workflow_result.error or 'Unknown error'}",
|
||||
"message": "Workflow failed: Unknown error",
|
||||
"type": "error",
|
||||
"status": "failed",
|
||||
"progress": 100
|
||||
|
|
@ -504,7 +629,7 @@ class WorkflowManager:
|
|||
async def _generateWorkflowFeedback(self, workflow: ChatWorkflow) -> str:
|
||||
"""Generate feedback message for workflow completion"""
|
||||
try:
|
||||
self.handlingTasks._checkWorkflowStopped()
|
||||
self.workflowProcessor._checkWorkflowStopped(workflow)
|
||||
|
||||
# Count messages by role
|
||||
user_messages = [msg for msg in workflow.messages if msg.role == 'user']
|
||||
|
|
|
|||
BIN
testdata/00Untitled.jpg
vendored
BIN
testdata/00Untitled.jpg
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 407 KiB |
BIN
testdata/Muster_Kundenliste_Test1.xlsx
vendored
BIN
testdata/Muster_Kundenliste_Test1.xlsx
vendored
Binary file not shown.
BIN
testdata/diagramm_komponenten.pdf
vendored
BIN
testdata/diagramm_komponenten.pdf
vendored
Binary file not shown.
Loading…
Reference in a new issue