standard parameter set for all ai plugins
This commit is contained in:
parent
36947b6d7e
commit
64131f65ce
18 changed files with 450 additions and 653 deletions
|
|
@ -5,7 +5,7 @@ from typing import Dict, Any, List, Union
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from modules.aicore.aicoreBase import BaseConnectorAi
|
from modules.aicore.aicoreBase import BaseConnectorAi
|
||||||
from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum
|
from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse
|
||||||
|
|
||||||
# Configure logger
|
# Configure logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -88,28 +88,26 @@ class AiAnthropic(BaseConnectorAi):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> Dict[str, Any]:
|
async def callAiBasic(self, modelCall: AiModelCall) -> AiModelResponse:
|
||||||
"""
|
"""
|
||||||
Calls the Anthropic API with the given messages.
|
Calls the Anthropic API with the given messages using standardized pattern.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
messages: List of messages in OpenAI format (role, content)
|
modelCall: AiModelCall with messages and options
|
||||||
temperature: Temperature for response generation (0.0-1.0)
|
|
||||||
maxTokens: Maximum number of tokens in the response
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The response in OpenAI format
|
AiModelResponse with content and metadata
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
HTTPException: For errors in API communication
|
HTTPException: For errors in API communication
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Use parameters from configuration if none were overridden
|
# Extract parameters from modelCall
|
||||||
if temperature is None:
|
messages = modelCall.messages
|
||||||
temperature = self.config.get("temperature", 0.2)
|
model = modelCall.model
|
||||||
|
options = modelCall.options
|
||||||
# Don't set maxTokens from config - let the model use its full context length
|
temperature = options.get("temperature", self.config.get("temperature", 0.2))
|
||||||
# Our continuation system handles stopping early via prompt engineering
|
maxTokens = model.maxTokens
|
||||||
|
|
||||||
# Transform OpenAI-style messages to Anthropic format:
|
# Transform OpenAI-style messages to Anthropic format:
|
||||||
# - Move any 'system' role content to top-level 'system'
|
# - Move any 'system' role content to top-level 'system'
|
||||||
|
|
@ -205,23 +203,13 @@ class AiAnthropic(BaseConnectorAi):
|
||||||
logger.warning(f"Anthropic API returned empty content. Full response: {anthropicResponse}")
|
logger.warning(f"Anthropic API returned empty content. Full response: {anthropicResponse}")
|
||||||
content = "[Anthropic API returned empty response]"
|
content = "[Anthropic API returned empty response]"
|
||||||
|
|
||||||
# Return in OpenAI format
|
# Return standardized response
|
||||||
return {
|
return AiModelResponse(
|
||||||
"id": anthropicResponse.get("id", ""),
|
content=content,
|
||||||
"object": "chat.completion",
|
success=True,
|
||||||
"created": anthropicResponse.get("created", 0),
|
modelId=self.modelName,
|
||||||
"model": anthropicResponse.get("model", self.modelName),
|
metadata={"response_id": anthropicResponse.get("id", "")}
|
||||||
"choices": [
|
)
|
||||||
{
|
|
||||||
"message": {
|
|
||||||
"role": "assistant",
|
|
||||||
"content": content
|
|
||||||
},
|
|
||||||
"index": 0,
|
|
||||||
"finish_reason": "stop"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error calling Anthropic API: {str(e)}")
|
logger.error(f"Error calling Anthropic API: {str(e)}")
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any, List, Union
|
from typing import Dict, Any, List, Union
|
||||||
from modules.aicore.aicoreBase import BaseConnectorAi
|
from modules.aicore.aicoreBase import BaseConnectorAi
|
||||||
from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum
|
from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse
|
||||||
|
|
||||||
# Configure logger
|
# Configure logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -76,158 +76,36 @@ class AiInternal(BaseConnectorAi):
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
async def extractDocument(self, documentData: Union[str, bytes], extractionType: str = "basic") -> Dict[str, Any]:
|
async def extractDocument(self, modelCall: AiModelCall) -> AiModelResponse:
|
||||||
"""
|
"""
|
||||||
Extract content from a document.
|
NOP - we only need the model for price calculations
|
||||||
|
|
||||||
Args:
|
|
||||||
documentData: The document data to extract from
|
|
||||||
extractionType: Type of extraction (basic, advanced, detailed)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary with extraction results
|
|
||||||
"""
|
"""
|
||||||
try:
|
logger.error(f"Document extraction not to call here")
|
||||||
logger.info(f"Starting document extraction with type: {extractionType}")
|
return AiModelResponse(
|
||||||
|
content="",
|
||||||
# Simulate document extraction processing
|
success=False,
|
||||||
# In a real implementation, this would use actual document processing libraries
|
error="Internal connector should not be called directly"
|
||||||
|
)
|
||||||
if isinstance(documentData, bytes):
|
|
||||||
content = documentData.decode('utf-8', errors='ignore')
|
|
||||||
else:
|
|
||||||
content = str(documentData)
|
|
||||||
|
|
||||||
# Basic extraction logic
|
|
||||||
extractedContent = {
|
|
||||||
"text": content,
|
|
||||||
"metadata": {
|
|
||||||
"extraction_type": extractionType,
|
|
||||||
"content_length": len(content),
|
|
||||||
"processing_time": 0.1 # Simulated
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(f"Document extraction completed successfully")
|
|
||||||
return extractedContent
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error during document extraction: {str(e)}")
|
|
||||||
return {
|
|
||||||
"error": str(e),
|
|
||||||
"success": False
|
|
||||||
}
|
|
||||||
|
|
||||||
async def generateDocument(self, template: str, data: Dict[str, Any], format: str = "html") -> Dict[str, Any]:
|
async def generateDocument(self, modelCall: AiModelCall) -> AiModelResponse:
|
||||||
"""
|
"""
|
||||||
Generate a document from a template and data.
|
NOP - we only need the model for price calculations
|
||||||
|
|
||||||
Args:
|
|
||||||
template: The document template
|
|
||||||
data: Data to populate the template
|
|
||||||
format: Output format (html, pdf, docx, etc.)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary with generated document
|
|
||||||
"""
|
"""
|
||||||
try:
|
logger.error(f"Document generation not to call here")
|
||||||
logger.info(f"Starting document generation with format: {format}")
|
return AiModelResponse(
|
||||||
|
content="",
|
||||||
# Simulate document generation processing
|
success=False,
|
||||||
# In a real implementation, this would use actual templating engines
|
error="Internal connector should not be called directly"
|
||||||
|
)
|
||||||
# Basic template processing
|
|
||||||
generatedContent = template
|
|
||||||
for key, value in data.items():
|
|
||||||
placeholder = f"{{{key}}}"
|
|
||||||
generatedContent = generatedContent.replace(placeholder, str(value))
|
|
||||||
|
|
||||||
result = {
|
|
||||||
"content": generatedContent,
|
|
||||||
"format": format,
|
|
||||||
"metadata": {
|
|
||||||
"template_length": len(template),
|
|
||||||
"data_keys": list(data.keys()),
|
|
||||||
"processing_time": 0.2 # Simulated
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(f"Document generation completed successfully")
|
|
||||||
return result
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error during document generation: {str(e)}")
|
|
||||||
return {
|
|
||||||
"error": str(e),
|
|
||||||
"success": False
|
|
||||||
}
|
|
||||||
|
|
||||||
async def renderDocument(self, content: str, targetFormat: str, options: Dict[str, Any] = None) -> Dict[str, Any]:
|
async def renderDocument(self, modelCall: AiModelCall) -> AiModelResponse:
|
||||||
"""
|
"""
|
||||||
Render a document to a specific format.
|
NOP - we only need the model for price calculations
|
||||||
|
|
||||||
Args:
|
|
||||||
content: The content to render
|
|
||||||
targetFormat: Target format (html, pdf, docx, etc.)
|
|
||||||
options: Rendering options
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary with rendered document
|
|
||||||
"""
|
"""
|
||||||
try:
|
logger.error(f"Document rendering not to call here")
|
||||||
logger.info(f"Starting document rendering to format: {targetFormat}")
|
return AiModelResponse(
|
||||||
|
content="",
|
||||||
if options is None:
|
success=False,
|
||||||
options = {}
|
error="Internal connector should not be called directly"
|
||||||
|
)
|
||||||
# Simulate document rendering processing
|
|
||||||
# In a real implementation, this would use actual rendering libraries
|
|
||||||
|
|
||||||
# Basic rendering logic based on target format
|
|
||||||
if targetFormat.lower() == "html":
|
|
||||||
renderedContent = f"<html><body>{content}</body></html>"
|
|
||||||
elif targetFormat.lower() == "pdf":
|
|
||||||
# Simulate PDF rendering
|
|
||||||
renderedContent = f"PDF_CONTENT_PLACEHOLDER: {content}"
|
|
||||||
else:
|
|
||||||
# Default to plain text
|
|
||||||
renderedContent = content
|
|
||||||
|
|
||||||
result = {
|
|
||||||
"content": renderedContent,
|
|
||||||
"format": targetFormat,
|
|
||||||
"metadata": {
|
|
||||||
"input_length": len(content),
|
|
||||||
"output_length": len(renderedContent),
|
|
||||||
"processing_time": 0.3, # Simulated
|
|
||||||
"options": options
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(f"Document rendering completed successfully")
|
|
||||||
return result
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error during document rendering: {str(e)}")
|
|
||||||
return {
|
|
||||||
"error": str(e),
|
|
||||||
"success": False
|
|
||||||
}
|
|
||||||
|
|
||||||
async def _testConnection(self) -> bool:
|
|
||||||
"""
|
|
||||||
Tests the internal processing capabilities.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if internal processing is working, False otherwise
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Test basic functionality
|
|
||||||
testContent = "Test document content"
|
|
||||||
result = await self.extractDocument(testContent)
|
|
||||||
|
|
||||||
return result.get("success", True) and "error" not in result
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Internal connector test failed: {str(e)}")
|
|
||||||
return False
|
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from typing import Dict, Any, List, Union
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from modules.aicore.aicoreBase import BaseConnectorAi
|
from modules.aicore.aicoreBase import BaseConnectorAi
|
||||||
from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum
|
from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse
|
||||||
|
|
||||||
# Configure logger
|
# Configure logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -125,40 +125,34 @@ class AiOpenai(BaseConnectorAi):
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> str:
|
async def callAiBasic(self, modelCall: AiModelCall) -> AiModelResponse:
|
||||||
"""
|
"""
|
||||||
Calls the OpenAI API with the given messages.
|
Calls the OpenAI API with the given messages using standardized pattern.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
messages: List of messages in OpenAI format (role, content)
|
modelCall: AiModelCall with messages and options
|
||||||
temperature: Temperature for response generation (0.0-1.0)
|
|
||||||
maxTokens: Maximum number of tokens in the response
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The response from the OpenAI API
|
AiModelResponse with content and metadata
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
HTTPException: For errors in API communication
|
HTTPException: For errors in API communication
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Use parameters from configuration if none were overridden
|
# Extract parameters from modelCall
|
||||||
if temperature is None:
|
messages = modelCall.messages
|
||||||
temperature = self.config.get("temperature", 0.2)
|
model = modelCall.model
|
||||||
|
options = modelCall.options
|
||||||
# Don't set maxTokens from config - let the model use its full context length
|
temperature = options.get("temperature", self.config.get("temperature", 0.2))
|
||||||
# Our continuation system handles stopping early via prompt engineering
|
maxTokens = model.maxTokens
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": self.modelName,
|
"model": self.modelName,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"temperature": temperature
|
"temperature": temperature,
|
||||||
|
"max_tokens": maxTokens
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add max_tokens - use provided value or throw error
|
|
||||||
if maxTokens is None:
|
|
||||||
raise ValueError("maxTokens must be provided for OpenAI API calls")
|
|
||||||
payload["max_tokens"] = maxTokens
|
|
||||||
|
|
||||||
response = await self.httpClient.post(
|
response = await self.httpClient.post(
|
||||||
self.apiUrl,
|
self.apiUrl,
|
||||||
json=payload
|
json=payload
|
||||||
|
|
@ -186,7 +180,13 @@ class AiOpenai(BaseConnectorAi):
|
||||||
|
|
||||||
responseJson = response.json()
|
responseJson = response.json()
|
||||||
content = responseJson["choices"][0]["message"]["content"]
|
content = responseJson["choices"][0]["message"]["content"]
|
||||||
return content
|
|
||||||
|
return AiModelResponse(
|
||||||
|
content=content,
|
||||||
|
success=True,
|
||||||
|
modelId=self.modelName,
|
||||||
|
metadata={"response_id": responseJson.get("id", "")}
|
||||||
|
)
|
||||||
|
|
||||||
except ContextLengthExceededException:
|
except ContextLengthExceededException:
|
||||||
# Re-raise context length exceptions without wrapping
|
# Re-raise context length exceptions without wrapping
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from typing import Dict, Any, List, Union, Optional
|
||||||
from fastapi import HTTPException
|
from fastapi import HTTPException
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from modules.aicore.aicoreBase import BaseConnectorAi
|
from modules.aicore.aicoreBase import BaseConnectorAi
|
||||||
from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum
|
from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse
|
||||||
|
|
||||||
# Configure logger
|
# Configure logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -141,40 +141,34 @@ class AiPerplexity(BaseConnectorAi):
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> str:
|
async def callAiBasic(self, modelCall: AiModelCall) -> AiModelResponse:
|
||||||
"""
|
"""
|
||||||
Calls the Perplexity API with the given messages.
|
Calls the Perplexity API with the given messages using standardized pattern.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
messages: List of messages in OpenAI format (role, content)
|
modelCall: AiModelCall with messages and options
|
||||||
temperature: Temperature for response generation (0.0-1.0)
|
|
||||||
maxTokens: Maximum number of tokens in the response
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The response from the Perplexity API
|
AiModelResponse with content and metadata
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
HTTPException: For errors in API communication
|
HTTPException: For errors in API communication
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Use parameters from configuration if none were overridden
|
# Extract parameters from modelCall
|
||||||
if temperature is None:
|
messages = modelCall.messages
|
||||||
temperature = self.config.get("temperature", 0.2)
|
model = modelCall.model
|
||||||
|
options = modelCall.options
|
||||||
# Don't set maxTokens from config - let the model use its full context length
|
temperature = options.get("temperature", self.config.get("temperature", 0.2))
|
||||||
# Our continuation system handles stopping early via prompt engineering
|
maxTokens = model.maxTokens
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": self.modelName,
|
"model": self.modelName,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"temperature": temperature
|
"temperature": temperature,
|
||||||
|
"max_tokens": maxTokens
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add max_tokens - use provided value or throw error
|
|
||||||
if maxTokens is None:
|
|
||||||
raise ValueError("maxTokens must be provided for Perplexity API calls")
|
|
||||||
payload["max_tokens"] = maxTokens
|
|
||||||
|
|
||||||
response = await self.httpClient.post(
|
response = await self.httpClient.post(
|
||||||
self.apiUrl,
|
self.apiUrl,
|
||||||
json=payload
|
json=payload
|
||||||
|
|
@ -198,7 +192,13 @@ class AiPerplexity(BaseConnectorAi):
|
||||||
|
|
||||||
responseJson = response.json()
|
responseJson = response.json()
|
||||||
content = responseJson["choices"][0]["message"]["content"]
|
content = responseJson["choices"][0]["message"]["content"]
|
||||||
return content
|
|
||||||
|
return AiModelResponse(
|
||||||
|
content=content,
|
||||||
|
success=True,
|
||||||
|
modelId=self.modelName,
|
||||||
|
metadata={"response_id": responseJson.get("id", "")}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error calling Perplexity API: {str(e)}")
|
logger.error(f"Error calling Perplexity API: {str(e)}")
|
||||||
|
|
|
||||||
|
|
@ -9,21 +9,7 @@ from tavily import AsyncTavilyClient
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||||
from modules.aicore.aicoreBase import BaseConnectorAi
|
from modules.aicore.aicoreBase import BaseConnectorAi
|
||||||
from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum
|
from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelResponse
|
||||||
from modules.datamodels.datamodelWeb import (
|
|
||||||
WebSearchActionResult,
|
|
||||||
WebSearchActionDocument,
|
|
||||||
WebSearchDocumentData,
|
|
||||||
WebSearchResultItem,
|
|
||||||
WebCrawlActionResult,
|
|
||||||
WebCrawlActionDocument,
|
|
||||||
WebCrawlDocumentData,
|
|
||||||
WebCrawlResultItem,
|
|
||||||
WebScrapeActionResult,
|
|
||||||
WebScrapeActionDocument,
|
|
||||||
WebSearchDocumentData as WebScrapeDocumentData,
|
|
||||||
WebScrapeResultItem,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -39,6 +25,32 @@ class WebCrawlResult:
|
||||||
url: str
|
url: str
|
||||||
content: str
|
content: str
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class WebResearchRequest:
|
||||||
|
"""Ultra-simplified web research request"""
|
||||||
|
user_prompt: str
|
||||||
|
urls: Optional[List[str]] = None
|
||||||
|
max_results: int = 5
|
||||||
|
max_pages: int = 10
|
||||||
|
search_depth: str = "basic"
|
||||||
|
extract_depth: str = "advanced"
|
||||||
|
format: str = "markdown"
|
||||||
|
country: Optional[str] = None
|
||||||
|
time_range: Optional[str] = None
|
||||||
|
topic: Optional[str] = None
|
||||||
|
language: Optional[str] = None
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class WebResearchResult:
|
||||||
|
"""Ultra-simplified web research result - just success/error + documents"""
|
||||||
|
success: bool = True
|
||||||
|
error: Optional[str] = None
|
||||||
|
documents: List[dict] = None # Simple dict instead of ActionDocument
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if self.documents is None:
|
||||||
|
self.documents = []
|
||||||
|
|
||||||
class ConnectorWeb(BaseConnectorAi):
|
class ConnectorWeb(BaseConnectorAi):
|
||||||
"""Tavily web search connector."""
|
"""Tavily web search connector."""
|
||||||
|
|
||||||
|
|
@ -152,127 +164,167 @@ class ConnectorWeb(BaseConnectorAi):
|
||||||
webSearchMaxResults=int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20")),
|
webSearchMaxResults=int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20")),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Standardized methods returning ActionResults for the interface to consume
|
# Standardized method using AiModelCall/AiModelResponse pattern
|
||||||
async def search(self, request) -> "WebSearchActionResult":
|
|
||||||
|
async def search(self, modelCall) -> "AiModelResponse":
|
||||||
|
"""Search using standardized AiModelCall/AiModelResponse pattern"""
|
||||||
try:
|
try:
|
||||||
|
# Extract parameters from modelCall
|
||||||
|
query = modelCall.messages[0]["content"] if modelCall.messages else ""
|
||||||
|
options = modelCall.options
|
||||||
|
|
||||||
raw_results = await self._search(
|
raw_results = await self._search(
|
||||||
query=request.query,
|
query=query,
|
||||||
max_results=request.max_results,
|
max_results=options.get("max_results", 5),
|
||||||
search_depth=request.search_depth,
|
search_depth=options.get("search_depth"),
|
||||||
time_range=request.time_range,
|
time_range=options.get("time_range"),
|
||||||
topic=request.topic,
|
topic=options.get("topic"),
|
||||||
include_domains=request.include_domains,
|
include_domains=options.get("include_domains"),
|
||||||
exclude_domains=request.exclude_domains,
|
exclude_domains=options.get("exclude_domains"),
|
||||||
language=request.language,
|
language=options.get("language"),
|
||||||
include_answer=request.include_answer,
|
include_answer=options.get("include_answer"),
|
||||||
include_raw_content=request.include_raw_content,
|
include_raw_content=options.get("include_raw_content"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Convert to JSON string
|
||||||
|
results_json = {
|
||||||
|
"query": query,
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"title": result.title,
|
||||||
|
"url": result.url,
|
||||||
|
"content": getattr(result, 'raw_content', None)
|
||||||
|
}
|
||||||
|
for result in raw_results
|
||||||
|
],
|
||||||
|
"total_count": len(raw_results)
|
||||||
|
}
|
||||||
|
|
||||||
|
import json
|
||||||
|
content = json.dumps(results_json, indent=2)
|
||||||
|
|
||||||
|
return AiModelResponse(
|
||||||
|
content=content,
|
||||||
|
success=True,
|
||||||
|
metadata={
|
||||||
|
"total_count": len(raw_results),
|
||||||
|
"search_depth": options.get("search_depth", "basic")
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return WebSearchActionResult(success=False, error=str(e))
|
return AiModelResponse(
|
||||||
|
content="",
|
||||||
result_items = [
|
success=False,
|
||||||
WebSearchResultItem(
|
error=str(e)
|
||||||
title=result.title,
|
|
||||||
url=result.url,
|
|
||||||
raw_content=getattr(result, 'raw_content', None)
|
|
||||||
)
|
)
|
||||||
for result in raw_results
|
|
||||||
]
|
|
||||||
|
|
||||||
document_data = WebSearchDocumentData(
|
async def crawl(self, modelCall) -> "AiModelResponse":
|
||||||
query=request.query,
|
"""Crawl using standardized AiModelCall/AiModelResponse pattern"""
|
||||||
results=result_items,
|
|
||||||
total_count=len(result_items),
|
|
||||||
)
|
|
||||||
|
|
||||||
document = WebSearchActionDocument(
|
|
||||||
documentName=f"web_search_results_{get_utc_timestamp()}.json",
|
|
||||||
documentData=document_data,
|
|
||||||
mimeType="application/json",
|
|
||||||
)
|
|
||||||
|
|
||||||
return WebSearchActionResult(
|
|
||||||
success=True, documents=[document], resultLabel="web_search_results"
|
|
||||||
)
|
|
||||||
|
|
||||||
async def crawl(self, request) -> "WebCrawlActionResult":
|
|
||||||
try:
|
try:
|
||||||
|
# Extract parameters from modelCall
|
||||||
|
options = modelCall.options
|
||||||
|
urls = options.get("urls", [])
|
||||||
|
|
||||||
raw_results = await self._crawl(
|
raw_results = await self._crawl(
|
||||||
[str(u) for u in request.urls],
|
urls,
|
||||||
extract_depth=request.extract_depth,
|
extract_depth=options.get("extract_depth"),
|
||||||
format=request.format,
|
format=options.get("format"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Convert to JSON string
|
||||||
|
results_json = {
|
||||||
|
"urls": urls,
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"url": result.url,
|
||||||
|
"content": result.content
|
||||||
|
}
|
||||||
|
for result in raw_results
|
||||||
|
],
|
||||||
|
"total_count": len(raw_results)
|
||||||
|
}
|
||||||
|
|
||||||
|
import json
|
||||||
|
content = json.dumps(results_json, indent=2)
|
||||||
|
|
||||||
|
return AiModelResponse(
|
||||||
|
content=content,
|
||||||
|
success=True,
|
||||||
|
metadata={
|
||||||
|
"total_count": len(raw_results),
|
||||||
|
"extract_depth": options.get("extract_depth", "basic")
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return WebCrawlActionResult(success=False, error=str(e))
|
return AiModelResponse(
|
||||||
|
content="",
|
||||||
|
success=False,
|
||||||
|
error=str(e)
|
||||||
|
)
|
||||||
|
|
||||||
result_items = [
|
async def scrape(self, modelCall) -> "AiModelResponse":
|
||||||
WebCrawlResultItem(url=result.url, content=result.content)
|
"""Scrape using standardized AiModelCall/AiModelResponse pattern"""
|
||||||
for result in raw_results
|
|
||||||
]
|
|
||||||
|
|
||||||
document_data = WebCrawlDocumentData(
|
|
||||||
urls=[str(u) for u in request.urls],
|
|
||||||
results=result_items,
|
|
||||||
total_count=len(result_items),
|
|
||||||
)
|
|
||||||
|
|
||||||
document = WebCrawlActionDocument(
|
|
||||||
documentName=f"web_crawl_results_{get_utc_timestamp()}.json",
|
|
||||||
documentData=document_data,
|
|
||||||
mimeType="application/json",
|
|
||||||
)
|
|
||||||
|
|
||||||
return WebCrawlActionResult(
|
|
||||||
success=True, documents=[document], resultLabel="web_crawl_results"
|
|
||||||
)
|
|
||||||
|
|
||||||
async def scrape(self, request) -> "WebScrapeActionResult":
|
|
||||||
try:
|
try:
|
||||||
|
# Extract parameters from modelCall
|
||||||
|
query = modelCall.messages[0]["content"] if modelCall.messages else ""
|
||||||
|
options = modelCall.options
|
||||||
|
|
||||||
search_results = await self._search(
|
search_results = await self._search(
|
||||||
query=request.query,
|
query=query,
|
||||||
max_results=request.max_results,
|
max_results=options.get("max_results", 5),
|
||||||
search_depth=request.search_depth,
|
search_depth=options.get("search_depth"),
|
||||||
time_range=request.time_range,
|
time_range=options.get("time_range"),
|
||||||
topic=request.topic,
|
topic=options.get("topic"),
|
||||||
include_domains=request.include_domains,
|
include_domains=options.get("include_domains"),
|
||||||
exclude_domains=request.exclude_domains,
|
exclude_domains=options.get("exclude_domains"),
|
||||||
language=request.language,
|
language=options.get("language"),
|
||||||
include_answer=request.include_answer,
|
include_answer=options.get("include_answer"),
|
||||||
include_raw_content=request.include_raw_content,
|
include_raw_content=options.get("include_raw_content"),
|
||||||
)
|
)
|
||||||
except Exception as e:
|
|
||||||
return WebScrapeActionResult(success=False, error=str(e))
|
|
||||||
|
|
||||||
try:
|
|
||||||
urls = [result.url for result in search_results]
|
urls = [result.url for result in search_results]
|
||||||
crawl_results = await self._crawl(
|
crawl_results = await self._crawl(
|
||||||
urls,
|
urls,
|
||||||
extract_depth=request.extract_depth,
|
extract_depth=options.get("extract_depth"),
|
||||||
format=request.format,
|
format=options.get("format"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Convert to JSON string
|
||||||
|
results_json = {
|
||||||
|
"query": query,
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"url": result.url,
|
||||||
|
"content": result.content
|
||||||
|
}
|
||||||
|
for result in crawl_results
|
||||||
|
],
|
||||||
|
"total_count": len(crawl_results)
|
||||||
|
}
|
||||||
|
|
||||||
|
import json
|
||||||
|
content = json.dumps(results_json, indent=2)
|
||||||
|
|
||||||
|
return AiModelResponse(
|
||||||
|
content=content,
|
||||||
|
success=True,
|
||||||
|
metadata={
|
||||||
|
"total_count": len(crawl_results),
|
||||||
|
"search_depth": options.get("search_depth", "basic"),
|
||||||
|
"extract_depth": options.get("extract_depth", "basic")
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return WebScrapeActionResult(success=False, error=str(e))
|
return AiModelResponse(
|
||||||
|
content="",
|
||||||
|
success=False,
|
||||||
|
error=str(e)
|
||||||
|
)
|
||||||
|
|
||||||
result_items = [
|
# Helper Functions
|
||||||
WebScrapeResultItem(url=result.url, content=result.content)
|
|
||||||
for result in crawl_results
|
|
||||||
]
|
|
||||||
|
|
||||||
document_data = WebScrapeDocumentData(
|
|
||||||
query=request.query,
|
|
||||||
results=result_items,
|
|
||||||
total_count=len(result_items),
|
|
||||||
)
|
|
||||||
|
|
||||||
document = WebScrapeActionDocument(
|
|
||||||
documentName=f"web_scrape_results_{get_utc_timestamp()}.json",
|
|
||||||
documentData=document_data,
|
|
||||||
mimeType="application/json",
|
|
||||||
)
|
|
||||||
|
|
||||||
return WebScrapeActionResult(
|
|
||||||
success=True, documents=[document], resultLabel="web_scrape_results"
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _search_urls_raw(self,
|
async def _search_urls_raw(self,
|
||||||
*,
|
*,
|
||||||
|
|
|
||||||
|
|
@ -185,3 +185,31 @@ class EnhancedAiCallOptions(AiCallOptions):
|
||||||
description="Separator between chunks in merged output"
|
description="Separator between chunks in merged output"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AiModelCall(BaseModel):
|
||||||
|
"""Standardized input for AI model calls."""
|
||||||
|
|
||||||
|
messages: List[Dict[str, Any]] = Field(description="Messages in OpenAI format (role, content)")
|
||||||
|
model: Optional[AiModel] = Field(default=None, description="The AI model being called")
|
||||||
|
options: Dict[str, Any] = Field(default_factory=dict, description="Additional model-specific options")
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
arbitraryTypesAllowed = True
|
||||||
|
|
||||||
|
|
||||||
|
class AiModelResponse(BaseModel):
|
||||||
|
"""Standardized output from AI model calls."""
|
||||||
|
|
||||||
|
content: str = Field(description="The AI response content")
|
||||||
|
success: bool = Field(default=True, description="Whether the call was successful")
|
||||||
|
error: Optional[str] = Field(default=None, description="Error message if success=False")
|
||||||
|
|
||||||
|
# Optional metadata that models can include
|
||||||
|
modelId: Optional[str] = Field(default=None, description="Model identifier used")
|
||||||
|
processingTime: Optional[float] = Field(default=None, description="Processing time in seconds")
|
||||||
|
tokensUsed: Optional[Dict[str, int]] = Field(default=None, description="Token usage (input, output, total)")
|
||||||
|
metadata: Optional[Dict[str, Any]] = Field(default=None, description="Additional model-specific metadata")
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
arbitraryTypesAllowed = True
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -121,10 +121,5 @@ class JsonMergeResult(BaseModel):
|
||||||
metadata: Dict[str, Any] = Field(default_factory=dict, description="Merge process metadata")
|
metadata: Dict[str, Any] = Field(default_factory=dict, description="Merge process metadata")
|
||||||
|
|
||||||
|
|
||||||
# Update forward references (compatible with Pydantic v1 and v2)
|
# Update forward references
|
||||||
try:
|
ListItem.model_rebuild()
|
||||||
# Pydantic v2
|
|
||||||
ListItem.model_rebuild()
|
|
||||||
except AttributeError:
|
|
||||||
# Pydantic v1
|
|
||||||
ListItem.update_forward_refs()
|
|
||||||
|
|
|
||||||
|
|
@ -1,142 +0,0 @@
|
||||||
"""Web-related modules"""
|
|
||||||
from pydantic import BaseModel, Field, HttpUrl
|
|
||||||
from typing import List, Optional, Literal, Dict, Any
|
|
||||||
from modules.shared.configuration import APP_CONFIG
|
|
||||||
from modules.datamodels.datamodelChat import ActionDocument, ActionResult
|
|
||||||
|
|
||||||
|
|
||||||
WEB_SEARCH_MAX_QUERY_LENGTH: int = int(APP_CONFIG.get("Web_Search_MAX_QUERY_LENGTH", "400"))
|
|
||||||
WEB_SEARCH_MAX_RESULTS: int = int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20"))
|
|
||||||
WEB_SEARCH_MIN_RESULTS: int = int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1"))
|
|
||||||
|
|
||||||
|
|
||||||
class WebResearchOptions(BaseModel):
|
|
||||||
"""Advanced options for web research workflow"""
|
|
||||||
max_pages: int = Field(default=10, ge=1, le=50, description="Maximum pages to crawl")
|
|
||||||
search_depth: Literal["basic", "advanced"] = Field(default="basic", description="Tavily search depth")
|
|
||||||
extract_depth: Literal["basic", "advanced"] = Field(default="advanced", description="Tavily extract depth")
|
|
||||||
format: Literal["text", "markdown"] = Field(default="markdown", description="Content format")
|
|
||||||
return_report: bool = Field(default=True, description="Return formatted report or raw data")
|
|
||||||
pages_search_depth: int = Field(default=1, ge=1, le=5, description="How deep to crawl: 1=main pages only, 2=main+sub-pages, 3=main+sub+sub-sub, etc.")
|
|
||||||
country: Optional[str] = Field(default=None, description="Country code for search bias")
|
|
||||||
time_range: Optional[Literal["d", "w", "m", "y"]] = Field(default=None, description="Time range for search")
|
|
||||||
topic: Optional[Literal["general", "news", "academic"]] = Field(default=None, description="Search topic")
|
|
||||||
language: Optional[str] = Field(default=None, description="Language code")
|
|
||||||
include_answer: Optional[bool] = Field(default=None, description="Include AI answer")
|
|
||||||
include_raw_content: Optional[bool] = Field(default=None, description="Include raw content")
|
|
||||||
|
|
||||||
class WebResearchRequest(BaseModel):
|
|
||||||
"""Main web research request"""
|
|
||||||
user_prompt: str = Field(min_length=1, max_length=WEB_SEARCH_MAX_QUERY_LENGTH, description="User's research question or prompt")
|
|
||||||
urls: Optional[List[str]] = Field(default=None, description="Specific URLs to crawl (optional)")
|
|
||||||
max_results: int = Field(default=5, ge=1, le=WEB_SEARCH_MAX_RESULTS, description="Max search results")
|
|
||||||
options: WebResearchOptions = Field(default_factory=WebResearchOptions, description="Advanced options")
|
|
||||||
|
|
||||||
class WebSearchResultItem(BaseModel):
|
|
||||||
"""Individual search result"""
|
|
||||||
title: str
|
|
||||||
url: HttpUrl
|
|
||||||
raw_content: Optional[str] = Field(default=None, description="Raw HTML content")
|
|
||||||
|
|
||||||
class WebCrawlResultItem(BaseModel):
|
|
||||||
"""Individual crawl result"""
|
|
||||||
url: HttpUrl
|
|
||||||
content: str
|
|
||||||
|
|
||||||
class WebResearchDocumentData(BaseModel):
|
|
||||||
"""Complete web research results"""
|
|
||||||
user_prompt: str
|
|
||||||
websites_analyzed: int
|
|
||||||
additional_links_found: int
|
|
||||||
analysis_result: str
|
|
||||||
sources: List[WebSearchResultItem]
|
|
||||||
additional_links: List[str]
|
|
||||||
individual_content: Optional[Dict[str, str]] = None # URL -> content mapping
|
|
||||||
debug_info: Optional[Dict[str, Any]] = None
|
|
||||||
|
|
||||||
class WebResearchActionDocument(ActionDocument):
|
|
||||||
documentData: WebResearchDocumentData
|
|
||||||
|
|
||||||
class WebResearchActionResult(ActionResult):
|
|
||||||
documents: List[WebResearchActionDocument] = Field(default_factory=list)
|
|
||||||
|
|
||||||
# Legacy models for connector compatibility
|
|
||||||
|
|
||||||
class WebSearchDocumentData(BaseModel):
|
|
||||||
"""Search results document data"""
|
|
||||||
query: str
|
|
||||||
results: List[WebSearchResultItem]
|
|
||||||
total_count: int
|
|
||||||
|
|
||||||
class WebSearchActionDocument(ActionDocument):
|
|
||||||
documentData: WebSearchDocumentData
|
|
||||||
|
|
||||||
class WebSearchActionResult(ActionResult):
|
|
||||||
documents: List[WebSearchActionDocument] = Field(default_factory=list)
|
|
||||||
|
|
||||||
class WebCrawlDocumentData(BaseModel):
|
|
||||||
"""Crawl results document data"""
|
|
||||||
urls: List[HttpUrl]
|
|
||||||
results: List[WebCrawlResultItem]
|
|
||||||
total_count: int
|
|
||||||
|
|
||||||
class WebCrawlActionDocument(ActionDocument):
|
|
||||||
documentData: WebCrawlDocumentData
|
|
||||||
|
|
||||||
class WebCrawlActionResult(ActionResult):
|
|
||||||
documents: List[WebCrawlActionDocument] = Field(default_factory=list)
|
|
||||||
|
|
||||||
class WebScrapeDocumentData(BaseModel):
|
|
||||||
"""Scrape results document data"""
|
|
||||||
query: str
|
|
||||||
results: List[WebSearchResultItem]
|
|
||||||
total_count: int
|
|
||||||
|
|
||||||
class WebScrapeActionDocument(ActionDocument):
|
|
||||||
documentData: WebScrapeDocumentData
|
|
||||||
|
|
||||||
class WebScrapeActionResult(ActionResult):
|
|
||||||
documents: List[WebScrapeActionDocument] = Field(default_factory=list)
|
|
||||||
|
|
||||||
class WebSearchRequest(BaseModel):
|
|
||||||
"""Search request for Tavily"""
|
|
||||||
query: str
|
|
||||||
max_results: int = 5
|
|
||||||
search_depth: Optional[Literal["basic", "advanced"]] = None
|
|
||||||
time_range: Optional[Literal["d", "w", "m", "y"]] = None
|
|
||||||
topic: Optional[Literal["general", "news", "academic"]] = None
|
|
||||||
include_domains: Optional[List[str]] = None
|
|
||||||
exclude_domains: Optional[List[str]] = None
|
|
||||||
language: Optional[str] = None
|
|
||||||
include_answer: Optional[bool] = None
|
|
||||||
include_raw_content: Optional[bool] = None
|
|
||||||
auto_parameters: Optional[bool] = None
|
|
||||||
country: Optional[str] = None
|
|
||||||
|
|
||||||
class WebCrawlRequest(BaseModel):
|
|
||||||
"""Crawl request for Tavily"""
|
|
||||||
urls: List[HttpUrl]
|
|
||||||
extract_depth: Optional[Literal["basic", "advanced"]] = None
|
|
||||||
format: Optional[Literal["text", "markdown"]] = None
|
|
||||||
|
|
||||||
class WebScrapeRequest(BaseModel):
|
|
||||||
"""Scrape request for Tavily"""
|
|
||||||
query: str
|
|
||||||
max_results: int = 5
|
|
||||||
search_depth: Optional[Literal["basic", "advanced"]] = None
|
|
||||||
time_range: Optional[Literal["d", "w", "m", "y"]] = None
|
|
||||||
topic: Optional[Literal["general", "news", "academic"]] = None
|
|
||||||
include_domains: Optional[List[str]] = None
|
|
||||||
exclude_domains: Optional[List[str]] = None
|
|
||||||
language: Optional[str] = None
|
|
||||||
include_answer: Optional[bool] = None
|
|
||||||
include_raw_content: Optional[bool] = None
|
|
||||||
auto_parameters: Optional[bool] = None
|
|
||||||
country: Optional[str] = None
|
|
||||||
extract_depth: Optional[Literal["basic", "advanced"]] = None
|
|
||||||
format: Optional[Literal["text", "markdown"]] = None
|
|
||||||
|
|
||||||
class WebScrapeResultItem(BaseModel):
|
|
||||||
"""Individual scrape result"""
|
|
||||||
url: HttpUrl
|
|
||||||
content: str
|
|
||||||
|
|
@ -13,17 +13,10 @@ from modules.datamodels.datamodelAi import (
|
||||||
AiCallOptions,
|
AiCallOptions,
|
||||||
AiCallRequest,
|
AiCallRequest,
|
||||||
AiCallResponse,
|
AiCallResponse,
|
||||||
OperationTypeEnum,
|
OperationTypeEnum,
|
||||||
|
AiModelCall,
|
||||||
|
AiModelResponse,
|
||||||
)
|
)
|
||||||
from modules.datamodels.datamodelWeb import (
|
|
||||||
WebResearchRequest,
|
|
||||||
WebResearchActionResult,
|
|
||||||
WebSearchResultItem,
|
|
||||||
WebCrawlResultItem,
|
|
||||||
WebSearchRequest,
|
|
||||||
WebCrawlRequest,
|
|
||||||
)
|
|
||||||
from modules.datamodels.datamodelChat import ActionDocument
|
|
||||||
|
|
||||||
|
|
||||||
# Dynamic model registry - models are now loaded from connectors via aicore system
|
# Dynamic model registry - models are now loaded from connectors via aicore system
|
||||||
|
|
@ -94,8 +87,7 @@ class AiObjects:
|
||||||
context = request.context or ""
|
context = request.context or ""
|
||||||
options = request.options
|
options = request.options
|
||||||
|
|
||||||
# Calculate input bytes
|
# Input bytes will be calculated inside _callWithModel
|
||||||
inputBytes = len((prompt + context).encode("utf-8"))
|
|
||||||
|
|
||||||
# Compress optionally (prompt/context) - simple truncation fallback kept here
|
# Compress optionally (prompt/context) - simple truncation fallback kept here
|
||||||
def _maybeTruncate(text: str, limit: int) -> str:
|
def _maybeTruncate(text: str, limit: int) -> str:
|
||||||
|
|
@ -109,11 +101,7 @@ class AiObjects:
|
||||||
if options.compressContext and len(context.encode("utf-8")) > 70000:
|
if options.compressContext and len(context.encode("utf-8")) > 70000:
|
||||||
context = _maybeTruncate(context, 70000)
|
context = _maybeTruncate(context, 70000)
|
||||||
|
|
||||||
# Derive generation parameters
|
# Generation parameters are handled inside _callWithModel
|
||||||
temperature = getattr(options, "temperature", None)
|
|
||||||
if temperature is None:
|
|
||||||
temperature = 0.2
|
|
||||||
maxTokens = getattr(options, "maxTokens", None)
|
|
||||||
|
|
||||||
# Get failover models for this operation type
|
# Get failover models for this operation type
|
||||||
availableModels = modelRegistry.getAvailableModels()
|
availableModels = modelRegistry.getAvailableModels()
|
||||||
|
|
@ -127,7 +115,7 @@ class AiObjects:
|
||||||
modelName="error",
|
modelName="error",
|
||||||
priceUsd=0.0,
|
priceUsd=0.0,
|
||||||
processingTime=0.0,
|
processingTime=0.0,
|
||||||
bytesSent=inputBytes,
|
bytesSent=0,
|
||||||
bytesReceived=0,
|
bytesReceived=0,
|
||||||
errorCount=1
|
errorCount=1
|
||||||
)
|
)
|
||||||
|
|
@ -139,7 +127,7 @@ class AiObjects:
|
||||||
logger.info(f"Attempting AI call with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
|
logger.info(f"Attempting AI call with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
|
||||||
|
|
||||||
# Call the model
|
# Call the model
|
||||||
response = await self._callWithModel(model, prompt, context, temperature, maxTokens, inputBytes)
|
response = await self._callWithModel(model, prompt, context)
|
||||||
|
|
||||||
logger.info(f"✅ AI call successful with model: {model.name}")
|
logger.info(f"✅ AI call successful with model: {model.name}")
|
||||||
return response
|
return response
|
||||||
|
|
@ -165,7 +153,7 @@ class AiObjects:
|
||||||
modelName="error",
|
modelName="error",
|
||||||
priceUsd=0.0,
|
priceUsd=0.0,
|
||||||
processingTime=0.0,
|
processingTime=0.0,
|
||||||
bytesSent=inputBytes,
|
bytesSent=0,
|
||||||
bytesReceived=0,
|
bytesReceived=0,
|
||||||
errorCount=1
|
errorCount=1
|
||||||
)
|
)
|
||||||
|
|
@ -216,7 +204,7 @@ class AiObjects:
|
||||||
|
|
||||||
if partSize <= modelContextBytes:
|
if partSize <= modelContextBytes:
|
||||||
# Part fits - call AI directly
|
# Part fits - call AI directly
|
||||||
response = await self._callWithModel(model, prompt, contentPart.data, 0.2, None, partSize)
|
response = await self._callWithModel(model, prompt, contentPart.data)
|
||||||
logger.info(f"✅ Content part processed successfully with model: {model.name}")
|
logger.info(f"✅ Content part processed successfully with model: {model.name}")
|
||||||
return response
|
return response
|
||||||
else:
|
else:
|
||||||
|
|
@ -228,7 +216,7 @@ class AiObjects:
|
||||||
# Process each chunk
|
# Process each chunk
|
||||||
chunkResults = []
|
chunkResults = []
|
||||||
for chunk in chunks:
|
for chunk in chunks:
|
||||||
chunkResponse = await self._callWithModel(model, prompt, chunk['data'], 0.2, None, chunk['size'])
|
chunkResponse = await self._callWithModel(model, prompt, chunk['data'])
|
||||||
chunkResults.append(chunkResponse)
|
chunkResults.append(chunkResponse)
|
||||||
|
|
||||||
# Merge chunk results
|
# Merge chunk results
|
||||||
|
|
@ -405,8 +393,11 @@ class AiObjects:
|
||||||
errorCount=1
|
errorCount=1
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _callWithModel(self, model: AiModel, prompt: str, context: str, temperature: float, maxTokens: int, inputBytes: int) -> AiCallResponse:
|
async def _callWithModel(self, model: AiModel, prompt: str, context: str) -> AiCallResponse:
|
||||||
"""Call a specific model and return the response."""
|
"""Call a specific model and return the response."""
|
||||||
|
# Calculate input bytes from prompt and context
|
||||||
|
inputBytes = len((prompt + context).encode('utf-8'))
|
||||||
|
|
||||||
# Replace <TOKEN_LIMIT> placeholder in prompt for this specific model
|
# Replace <TOKEN_LIMIT> placeholder in prompt for this specific model
|
||||||
contextLength = model.contextLength
|
contextLength = model.contextLength
|
||||||
if contextLength > 0:
|
if contextLength > 0:
|
||||||
|
|
@ -434,27 +425,21 @@ class AiObjects:
|
||||||
if not connector:
|
if not connector:
|
||||||
raise ValueError(f"No connector found for model {model.name}")
|
raise ValueError(f"No connector found for model {model.name}")
|
||||||
|
|
||||||
# Call the model's function directly
|
# Call the model's function directly - completely generic
|
||||||
if model.functionCall:
|
if model.functionCall:
|
||||||
# Use the model's function call directly
|
# Create standardized call object
|
||||||
if model.name.startswith("perplexity_callAiWithWebSearch"):
|
modelCall = AiModelCall(
|
||||||
query = modelPrompt
|
messages=messages,
|
||||||
if context:
|
model=model
|
||||||
query = f"Context: {context}\n\nQuery: {modelPrompt}"
|
)
|
||||||
content = await model.functionCall(query, temperature=temperature, maxTokens=maxTokens)
|
|
||||||
elif model.name.startswith("perplexity_researchTopic"):
|
# Call the model with standardized interface
|
||||||
content = await model.functionCall(modelPrompt)
|
modelResponse = await model.functionCall(modelCall)
|
||||||
elif model.name.startswith("perplexity_answerQuestion"):
|
|
||||||
content = await model.functionCall(modelPrompt, context)
|
# Extract content from standardized response
|
||||||
elif model.name.startswith("perplexity_getCurrentNews"):
|
if not modelResponse.success:
|
||||||
content = await model.functionCall(modelPrompt)
|
raise ValueError(f"Model call failed: {modelResponse.error}")
|
||||||
else:
|
content = modelResponse.content
|
||||||
# Standard callAiBasic
|
|
||||||
if model.connectorType == "anthropic":
|
|
||||||
response = await model.functionCall(messages, temperature=temperature, maxTokens=maxTokens)
|
|
||||||
content = response["choices"][0]["message"]["content"]
|
|
||||||
else:
|
|
||||||
content = await model.functionCall(messages, temperature=temperature, maxTokens=maxTokens)
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Model {model.name} has no function call defined")
|
raise ValueError(f"Model {model.name} has no function call defined")
|
||||||
|
|
||||||
|
|
@ -482,9 +467,6 @@ class AiObjects:
|
||||||
if options is None:
|
if options is None:
|
||||||
options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE)
|
options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE)
|
||||||
|
|
||||||
# Calculate input bytes (prompt + image data)
|
|
||||||
inputBytes = len(prompt.encode("utf-8")) + len(imageData) if isinstance(imageData, bytes) else len(prompt.encode("utf-8")) + len(str(imageData).encode("utf-8"))
|
|
||||||
|
|
||||||
# Get fallback models for image analysis
|
# Get fallback models for image analysis
|
||||||
availableModels = modelRegistry.getAvailableModels()
|
availableModels = modelRegistry.getAvailableModels()
|
||||||
failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels)
|
failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels)
|
||||||
|
|
@ -497,7 +479,7 @@ class AiObjects:
|
||||||
modelName="error",
|
modelName="error",
|
||||||
priceUsd=0.0,
|
priceUsd=0.0,
|
||||||
processingTime=0.0,
|
processingTime=0.0,
|
||||||
bytesSent=inputBytes,
|
bytesSent=0,
|
||||||
bytesReceived=0,
|
bytesReceived=0,
|
||||||
errorCount=1
|
errorCount=1
|
||||||
)
|
)
|
||||||
|
|
@ -509,7 +491,7 @@ class AiObjects:
|
||||||
logger.info(f"Attempting image analysis with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
|
logger.info(f"Attempting image analysis with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})")
|
||||||
|
|
||||||
# Call the model
|
# Call the model
|
||||||
response = await self._callImageWithModel(model, prompt, imageData, mimeType, inputBytes)
|
response = await self._callImageWithModel(model, prompt, imageData, mimeType)
|
||||||
|
|
||||||
logger.info(f"✅ Image analysis successful with model: {model.name}")
|
logger.info(f"✅ Image analysis successful with model: {model.name}")
|
||||||
return response
|
return response
|
||||||
|
|
@ -535,13 +517,23 @@ class AiObjects:
|
||||||
modelName="error",
|
modelName="error",
|
||||||
priceUsd=0.0,
|
priceUsd=0.0,
|
||||||
processingTime=0.0,
|
processingTime=0.0,
|
||||||
bytesSent=inputBytes,
|
bytesSent=0,
|
||||||
bytesReceived=0,
|
bytesReceived=0,
|
||||||
errorCount=1
|
errorCount=1
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _callImageWithModel(self, model: AiModel, prompt: str, imageData: Union[str, bytes], mimeType: str, inputBytes: int) -> AiCallResponse:
|
async def _callImageWithModel(self, model: AiModel, prompt: str, imageData: Union[str, bytes], mimeType: str) -> AiCallResponse:
|
||||||
"""Call a specific model for image analysis and return the response."""
|
"""Call a specific model for image analysis and return the response."""
|
||||||
|
# Calculate input bytes from prompt and image data
|
||||||
|
promptBytes = len(prompt.encode('utf-8'))
|
||||||
|
if isinstance(imageData, str):
|
||||||
|
# Base64 encoded string
|
||||||
|
imageBytes = len(imageData.encode('utf-8'))
|
||||||
|
else:
|
||||||
|
# Raw bytes
|
||||||
|
imageBytes = len(imageData)
|
||||||
|
inputBytes = promptBytes + imageBytes
|
||||||
|
|
||||||
# Start timing
|
# Start timing
|
||||||
startTime = time.time()
|
startTime = time.time()
|
||||||
|
|
||||||
|
|
@ -633,67 +625,52 @@ class AiObjects:
|
||||||
errorCount=1
|
errorCount=1
|
||||||
)
|
)
|
||||||
|
|
||||||
# Web functionality methods - Simple interface to Tavily connector
|
# Web functionality methods - Now use standardized AiModelCall/AiModelResponse pattern
|
||||||
async def searchWebsites(self, query: str, maxResults: int = 5, **kwargs) -> List[WebSearchResultItem]:
|
async def searchWebsites(self, query: str, maxResults: int = 5, **kwargs) -> str:
|
||||||
"""Search for websites using Tavily."""
|
"""Search for websites using Tavily with standardized pattern."""
|
||||||
request = WebSearchRequest(
|
from modules.datamodels.datamodelAi import AiModelCall
|
||||||
query=query,
|
|
||||||
max_results=maxResults,
|
modelCall = AiModelCall(
|
||||||
**kwargs
|
messages=[{"role": "user", "content": query}],
|
||||||
|
options={"max_results": maxResults, **kwargs}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get Tavily connector from registry
|
# Get Tavily connector from registry
|
||||||
tavilyConnector = modelRegistry.getConnectorForModel("tavily_search")
|
tavilyConnector = modelRegistry.getConnectorForModel("tavily_search")
|
||||||
if not tavilyConnector:
|
if not tavilyConnector:
|
||||||
raise ValueError("Tavily connector not available")
|
raise ValueError("Tavily connector not available")
|
||||||
result = await tavilyConnector.search(request)
|
|
||||||
|
|
||||||
if result.success and result.documents:
|
result = await tavilyConnector.search(modelCall)
|
||||||
return result.documents[0].documentData.results
|
return result.content if result.success else ""
|
||||||
return []
|
|
||||||
|
|
||||||
async def crawlWebsites(self, urls: List[str], extractDepth: str = "advanced", format: str = "markdown") -> List[WebCrawlResultItem]:
|
async def crawlWebsites(self, urls: List[str], extractDepth: str = "advanced", format: str = "markdown") -> str:
|
||||||
"""Crawl websites using Tavily."""
|
"""Crawl websites using Tavily with standardized pattern."""
|
||||||
from pydantic import HttpUrl
|
from modules.datamodels.datamodelAi import AiModelCall
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
# Safely create HttpUrl objects with proper scheme handling
|
modelCall = AiModelCall(
|
||||||
httpUrls = []
|
messages=[{"role": "user", "content": "crawl websites"}],
|
||||||
for url in urls:
|
options={"urls": urls, "extract_depth": extractDepth, "format": format}
|
||||||
try:
|
|
||||||
# Ensure URL has a scheme
|
|
||||||
parsed = urlparse(url)
|
|
||||||
if not parsed.scheme:
|
|
||||||
url = f"https://{url}"
|
|
||||||
|
|
||||||
# Use HttpUrl with scheme parameter (this works for all URLs)
|
|
||||||
httpUrls.append(HttpUrl(url, scheme="https"))
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Skipping invalid URL {url}: {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not httpUrls:
|
|
||||||
return []
|
|
||||||
|
|
||||||
request = WebCrawlRequest(
|
|
||||||
urls=httpUrls,
|
|
||||||
extract_depth=extractDepth,
|
|
||||||
format=format
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get Tavily connector from registry
|
# Get Tavily connector from registry
|
||||||
tavilyConnector = modelRegistry.getConnectorForModel("tavily_crawl")
|
tavilyConnector = modelRegistry.getConnectorForModel("tavily_crawl")
|
||||||
if not tavilyConnector:
|
if not tavilyConnector:
|
||||||
raise ValueError("Tavily connector not available")
|
raise ValueError("Tavily connector not available")
|
||||||
result = await tavilyConnector.crawl(request)
|
|
||||||
|
|
||||||
if result.success and result.documents:
|
result = await tavilyConnector.crawl(modelCall)
|
||||||
return result.documents[0].documentData.results
|
return result.content if result.success else ""
|
||||||
return []
|
|
||||||
|
|
||||||
async def extractContent(self, urls: List[str], extractDepth: str = "advanced", format: str = "markdown") -> Dict[str, str]:
|
async def extractContent(self, urls: List[str], extractDepth: str = "advanced", format: str = "markdown") -> Dict[str, str]:
|
||||||
"""Extract content from URLs and return as dictionary."""
|
"""Extract content from URLs and return as dictionary."""
|
||||||
|
import json
|
||||||
crawlResults = await self.crawlWebsites(urls, extractDepth, format)
|
crawlResults = await self.crawlWebsites(urls, extractDepth, format)
|
||||||
return {str(result.url): result.content for result in crawlResults}
|
|
||||||
|
# Parse JSON response and extract content
|
||||||
|
try:
|
||||||
|
data = json.loads(crawlResults)
|
||||||
|
return {result["url"]: result["content"] for result in data.get("results", [])}
|
||||||
|
except (json.JSONDecodeError, KeyError):
|
||||||
|
return {}
|
||||||
|
|
||||||
# Core Web Tools - Clean interface for web operations
|
# Core Web Tools - Clean interface for web operations
|
||||||
async def readPage(self, url: str, extractDepth: str = "advanced") -> Optional[str]:
|
async def readPage(self, url: str, extractDepth: str = "advanced") -> Optional[str]:
|
||||||
|
|
|
||||||
|
|
@ -4,13 +4,7 @@ from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
|
||||||
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
||||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
||||||
from modules.datamodels.datamodelExtraction import ChunkResult, ContentExtracted
|
from modules.datamodels.datamodelExtraction import ChunkResult, ContentExtracted
|
||||||
from modules.datamodels.datamodelWeb import (
|
from modules.aicore.aicorePluginTavily import WebResearchRequest, WebResearchResult
|
||||||
WebResearchRequest,
|
|
||||||
WebResearchActionResult,
|
|
||||||
WebResearchDocumentData,
|
|
||||||
WebResearchActionDocument,
|
|
||||||
WebSearchResultItem,
|
|
||||||
)
|
|
||||||
from modules.interfaces.interfaceAiObjects import AiObjects
|
from modules.interfaces.interfaceAiObjects import AiObjects
|
||||||
from modules.services.serviceAi.subCoreAi import SubCoreAi
|
from modules.services.serviceAi.subCoreAi import SubCoreAi
|
||||||
from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
|
from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
|
||||||
|
|
@ -136,7 +130,7 @@ class AiService:
|
||||||
return await self.coreAi.generateImage(prompt, size, quality, style, options)
|
return await self.coreAi.generateImage(prompt, size, quality, style, options)
|
||||||
|
|
||||||
# Web Research
|
# Web Research
|
||||||
async def webResearch(self, request: WebResearchRequest) -> WebResearchActionResult:
|
async def webResearch(self, request: WebResearchRequest) -> WebResearchResult:
|
||||||
"""Perform web research using interface functions."""
|
"""Perform web research using interface functions."""
|
||||||
await self._ensureAiObjectsInitialized()
|
await self._ensureAiObjectsInitialized()
|
||||||
return await self.webResearchService.webResearch(request)
|
return await self.webResearchService.webResearch(request)
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ import logging
|
||||||
from typing import Dict, Any, List, Optional, Tuple, Union
|
from typing import Dict, Any, List, Optional, Tuple, Union
|
||||||
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
|
from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
|
||||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
||||||
|
from modules.datamodels.datamodelExtraction import ContentPart
|
||||||
from modules.services.serviceAi.subSharedAiUtils import (
|
from modules.services.serviceAi.subSharedAiUtils import (
|
||||||
buildPromptWithPlaceholders,
|
buildPromptWithPlaceholders,
|
||||||
extractTextFromContentParts,
|
extractTextFromContentParts,
|
||||||
|
|
@ -12,6 +13,9 @@ from modules.services.serviceAi.subSharedAiUtils import (
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Rebuild the model to resolve forward references
|
||||||
|
AiCallRequest.model_rebuild()
|
||||||
|
|
||||||
|
|
||||||
# Loop instruction texts for different formats
|
# Loop instruction texts for different formats
|
||||||
LoopInstructionTexts = {
|
LoopInstructionTexts = {
|
||||||
|
|
@ -123,13 +127,23 @@ class SubCoreAi:
|
||||||
)
|
)
|
||||||
|
|
||||||
# Write the ACTUAL prompt sent to AI (including continuation context)
|
# Write the ACTUAL prompt sent to AI (including continuation context)
|
||||||
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
|
if iteration == 1:
|
||||||
|
# First iteration - use the historic naming pattern
|
||||||
|
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt")
|
||||||
|
else:
|
||||||
|
# Subsequent iterations - include iteration number
|
||||||
|
self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}")
|
||||||
|
|
||||||
response = await self.aiObjects.call(request)
|
response = await self.aiObjects.call(request)
|
||||||
result = response.content
|
result = response.content
|
||||||
|
|
||||||
# Write raw AI response to debug file
|
# Write raw AI response to debug file
|
||||||
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
|
if iteration == 1:
|
||||||
|
# First iteration - use the historic naming pattern
|
||||||
|
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response")
|
||||||
|
else:
|
||||||
|
# Subsequent iterations - include iteration number
|
||||||
|
self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}")
|
||||||
|
|
||||||
# Emit stats for this iteration
|
# Emit stats for this iteration
|
||||||
self.services.workflow.storeWorkflowStat(
|
self.services.workflow.storeWorkflowStat(
|
||||||
|
|
@ -436,7 +450,7 @@ CRITICAL REQUIREMENTS:
|
||||||
}
|
}
|
||||||
|
|
||||||
# Log AI response for debugging
|
# Log AI response for debugging
|
||||||
self.services.utils.writeDebugFile(str(result), "documentGenerationResponse", documents)
|
self.services.utils.writeDebugFile(str(result), "document_generation_response", documents)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -483,8 +497,16 @@ CRITICAL REQUIREMENTS:
|
||||||
|
|
||||||
self.services.utils.debugLogToFile(f"Calling aiObjects.callImage with operationType: {options.operationType}", "AI_SERVICE")
|
self.services.utils.debugLogToFile(f"Calling aiObjects.callImage with operationType: {options.operationType}", "AI_SERVICE")
|
||||||
logger.info(f"Calling aiObjects.callImage with operationType: {options.operationType}")
|
logger.info(f"Calling aiObjects.callImage with operationType: {options.operationType}")
|
||||||
|
|
||||||
|
# Write image analysis prompt to debug file
|
||||||
|
self.services.utils.writeDebugFile(prompt, "image_analysis_prompt")
|
||||||
|
|
||||||
response = await self.aiObjects.callImage(prompt, imageData, mimeType, options)
|
response = await self.aiObjects.callImage(prompt, imageData, mimeType, options)
|
||||||
|
|
||||||
|
# Write image analysis response to debug file
|
||||||
|
result = response.content if hasattr(response, 'content') else str(response)
|
||||||
|
self.services.utils.writeDebugFile(result, "image_analysis_response")
|
||||||
|
|
||||||
# Emit stats for image analysis
|
# Emit stats for image analysis
|
||||||
self.services.workflow.storeWorkflowStat(
|
self.services.workflow.storeWorkflowStat(
|
||||||
self.services.currentWorkflow,
|
self.services.currentWorkflow,
|
||||||
|
|
|
||||||
|
|
@ -348,9 +348,15 @@ class SubDocumentGeneration:
|
||||||
options=requestOptions
|
options=requestOptions
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Write document generation prompt to debug file
|
||||||
|
self.services.utils.writeDebugFile(generationPrompt, "document_generation_enhancement_prompt")
|
||||||
|
|
||||||
# Call AI to enhance the content
|
# Call AI to enhance the content
|
||||||
response = await self.aiObjects.call(request)
|
response = await self.aiObjects.call(request)
|
||||||
|
|
||||||
|
# Write document generation response to debug file
|
||||||
|
self.services.utils.writeDebugFile(response.content or '', "document_generation_enhancement_response")
|
||||||
|
|
||||||
if response and response.content:
|
if response and response.content:
|
||||||
# Parse the AI response as JSON
|
# Parse the AI response as JSON
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ import time
|
||||||
from typing import Dict, Any, List, Optional, Tuple, Union
|
from typing import Dict, Any, List, Optional, Tuple, Union
|
||||||
from modules.datamodels.datamodelChat import ChatDocument
|
from modules.datamodels.datamodelChat import ChatDocument
|
||||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum
|
||||||
from modules.datamodels.datamodelExtraction import ChunkResult, ContentExtracted
|
from modules.datamodels.datamodelExtraction import ChunkResult, ContentExtracted, PartResult
|
||||||
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -99,7 +99,7 @@ class SubDocumentProcessing:
|
||||||
mergedContent = self._mergePartResults(partResults, options)
|
mergedContent = self._mergePartResults(partResults, options)
|
||||||
|
|
||||||
# Save merged extraction content to debug
|
# Save merged extraction content to debug
|
||||||
self.services.utils.writeDebugFile(mergedContent or '', "extractionMergedText")
|
self.services.utils.writeDebugFile(mergedContent or '', "extraction_merged_text")
|
||||||
|
|
||||||
return mergedContent
|
return mergedContent
|
||||||
|
|
||||||
|
|
@ -179,7 +179,7 @@ class SubDocumentProcessing:
|
||||||
|
|
||||||
# Save merged JSON extraction content to debug
|
# Save merged JSON extraction content to debug
|
||||||
jsonStr = json.dumps(mergedJsonDocument, ensure_ascii=False, indent=2)
|
jsonStr = json.dumps(mergedJsonDocument, ensure_ascii=False, indent=2)
|
||||||
self.services.utils.writeDebugFile(jsonStr, "extractionMergedJson")
|
self.services.utils.writeDebugFile(jsonStr, "extraction_merged_json")
|
||||||
|
|
||||||
return mergedJsonDocument
|
return mergedJsonDocument
|
||||||
|
|
||||||
|
|
@ -692,7 +692,7 @@ CONTINUATION INSTRUCTIONS:
|
||||||
|
|
||||||
self.services.utils.debugLogToFile(f"Image analysis result for chunk {chunk_index}: length={len(ai_result) if ai_result else 0}, preview={ai_result[:200] if ai_result else 'None'}...", "AI_SERVICE")
|
self.services.utils.debugLogToFile(f"Image analysis result for chunk {chunk_index}: length={len(ai_result) if ai_result else 0}, preview={ai_result[:200] if ai_result else 'None'}...", "AI_SERVICE")
|
||||||
# Save image extraction response to debug file
|
# Save image extraction response to debug file
|
||||||
self.services.utils.writeDebugFile(ai_result or 'No response', f"extraction_image_chunk_{chunk_index}")
|
self.services.utils.writeDebugFile(ai_result or 'No response', f"extraction_image_chunk_{chunk_index}_response")
|
||||||
|
|
||||||
# Check if result is empty or None
|
# Check if result is empty or None
|
||||||
if not ai_result or not ai_result.strip():
|
if not ai_result or not ai_result.strip():
|
||||||
|
|
@ -794,8 +794,8 @@ CONTINUATION INSTRUCTIONS:
|
||||||
self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE")
|
self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE")
|
||||||
|
|
||||||
# Save extraction prompt and response to debug
|
# Save extraction prompt and response to debug
|
||||||
self.services.utils.writeDebugFile(augmented_prompt, f"extraction-Chunk{chunk_index}-Prompt")
|
self.services.utils.writeDebugFile(augmented_prompt, f"extraction_chunk_{chunk_index}_prompt")
|
||||||
self.services.utils.writeDebugFile(ai_result or '', f"extraction-Chunk{chunk_index}-Response")
|
self.services.utils.writeDebugFile(ai_result or '', f"extraction_chunk_{chunk_index}_response")
|
||||||
|
|
||||||
# If generating JSON, validate the response
|
# If generating JSON, validate the response
|
||||||
if generate_json:
|
if generate_json:
|
||||||
|
|
@ -889,8 +889,8 @@ CONTINUATION INSTRUCTIONS:
|
||||||
self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE")
|
self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE")
|
||||||
|
|
||||||
# Save extraction prompt and response to debug
|
# Save extraction prompt and response to debug
|
||||||
self.services.utils.writeDebugFile(augmented_prompt_text, f"extractionChunk{chunk_index}-Prompt")
|
self.services.utils.writeDebugFile(augmented_prompt_text, f"extraction_chunk_{chunk_index}_prompt")
|
||||||
self.services.utils.writeDebugFile(ai_result or '', f"extractionChunk{chunk_index}-Response")
|
self.services.utils.writeDebugFile(ai_result or '', f"extraction_chunk_{chunk_index}_response")
|
||||||
|
|
||||||
# If generating JSON, validate the response
|
# If generating JSON, validate the response
|
||||||
if generate_json:
|
if generate_json:
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,6 @@
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any, List, Optional, Tuple, Union
|
from typing import Dict, Any, List, Optional, Tuple, Union
|
||||||
from modules.datamodels.datamodelWeb import (
|
from modules.aicore.aicorePluginTavily import WebResearchRequest, WebResearchResult
|
||||||
WebResearchRequest,
|
|
||||||
WebResearchActionResult,
|
|
||||||
WebResearchDocumentData,
|
|
||||||
WebResearchActionDocument,
|
|
||||||
WebSearchResultItem,
|
|
||||||
)
|
|
||||||
from modules.interfaces.interfaceAiObjects import AiObjects
|
from modules.interfaces.interfaceAiObjects import AiObjects
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
|
|
@ -26,7 +20,7 @@ class SubWebResearch:
|
||||||
self.services = services
|
self.services = services
|
||||||
self.aiObjects = aiObjects
|
self.aiObjects = aiObjects
|
||||||
|
|
||||||
async def webResearch(self, request: WebResearchRequest) -> WebResearchActionResult:
|
async def webResearch(self, request: WebResearchRequest) -> WebResearchResult:
|
||||||
"""Perform web research using interface functions."""
|
"""Perform web research using interface functions."""
|
||||||
try:
|
try:
|
||||||
logger.info(f"WEB RESEARCH STARTED")
|
logger.info(f"WEB RESEARCH STARTED")
|
||||||
|
|
@ -81,8 +75,15 @@ class SubWebResearch:
|
||||||
prompt=query_optimizer_prompt,
|
prompt=query_optimizer_prompt,
|
||||||
options=AiCallOptions()
|
options=AiCallOptions()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Write web research query optimization prompt to debug file
|
||||||
|
self.services.utils.writeDebugFile(query_optimizer_prompt, "web_research_query_optimizer_prompt")
|
||||||
|
|
||||||
ai_response_obj = await self.aiObjects.call(ai_request)
|
ai_response_obj = await self.aiObjects.call(ai_request)
|
||||||
ai_response = ai_response_obj.content
|
ai_response = ai_response_obj.content
|
||||||
|
|
||||||
|
# Write web research query optimization response to debug file
|
||||||
|
self.services.utils.writeDebugFile(ai_response, "web_research_query_optimizer_response")
|
||||||
logger.debug(f"AI query optimizer response: {ai_response}")
|
logger.debug(f"AI query optimizer response: {ai_response}")
|
||||||
|
|
||||||
# Parse AI response to extract search query
|
# Parse AI response to extract search query
|
||||||
|
|
@ -222,7 +223,7 @@ class SubWebResearch:
|
||||||
|
|
||||||
if not search_urls:
|
if not search_urls:
|
||||||
logger.error("No relevant websites found")
|
logger.error("No relevant websites found")
|
||||||
return WebResearchActionResult(success=False, error="No relevant websites found")
|
return WebResearchResult(success=False, error="No relevant websites found")
|
||||||
|
|
||||||
# Now use AI to determine the main URLs based on user's intention
|
# Now use AI to determine the main URLs based on user's intention
|
||||||
logger.info(f"AI selecting main URLs from {len(search_urls)} search results based on user intent")
|
logger.info(f"AI selecting main URLs from {len(search_urls)} search results based on user intent")
|
||||||
|
|
@ -241,8 +242,15 @@ class SubWebResearch:
|
||||||
prompt=ai_prompt,
|
prompt=ai_prompt,
|
||||||
options=AiCallOptions()
|
options=AiCallOptions()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Write web research URL selection prompt to debug file
|
||||||
|
self.services.utils.writeDebugFile(ai_prompt, "web_research_url_selection_prompt")
|
||||||
|
|
||||||
ai_response_obj = await self.aiObjects.call(ai_request)
|
ai_response_obj = await self.aiObjects.call(ai_request)
|
||||||
ai_response = ai_response_obj.content
|
ai_response = ai_response_obj.content
|
||||||
|
|
||||||
|
# Write web research URL selection response to debug file
|
||||||
|
self.services.utils.writeDebugFile(ai_response, "web_research_url_selection_response")
|
||||||
logger.debug(f"AI response for main URL selection: {ai_response}")
|
logger.debug(f"AI response for main URL selection: {ai_response}")
|
||||||
|
|
||||||
# Parse AI response to extract URLs
|
# Parse AI response to extract URLs
|
||||||
|
|
@ -331,14 +339,14 @@ class SubWebResearch:
|
||||||
|
|
||||||
if not allContent:
|
if not allContent:
|
||||||
logger.error("Could not extract content from any websites")
|
logger.error("Could not extract content from any websites")
|
||||||
return WebResearchActionResult(success=False, error="Could not extract content from any websites")
|
return WebResearchResult(success=False, error="Could not extract content from any websites")
|
||||||
|
|
||||||
logger.info(f"=== WEB RESEARCH COMPLETED ===")
|
logger.info(f"=== WEB RESEARCH COMPLETED ===")
|
||||||
logger.info(f"Successfully crawled {len(allContent)} URLs total")
|
logger.info(f"Successfully crawled {len(allContent)} URLs total")
|
||||||
logger.info(f"Crawl depth: {effective_depth} levels")
|
logger.info(f"Crawl depth: {effective_depth} levels")
|
||||||
|
|
||||||
# Create simple result with raw content
|
# Create simple result with raw content
|
||||||
sources = [WebSearchResultItem(title=url, url=url) for url in selectedWebsites]
|
sources = [{"title": url, "url": url} for url in selectedWebsites]
|
||||||
|
|
||||||
# Get all additional links (all URLs except main ones)
|
# Get all additional links (all URLs except main ones)
|
||||||
additional_links = [url for url in allContent.keys() if url not in selectedWebsites]
|
additional_links = [url for url in allContent.keys() if url not in selectedWebsites]
|
||||||
|
|
@ -348,37 +356,34 @@ class SubWebResearch:
|
||||||
for url, content in allContent.items():
|
for url, content in allContent.items():
|
||||||
combinedContent += f"\n\n=== {url} ===\n{content}\n"
|
combinedContent += f"\n\n=== {url} ===\n{content}\n"
|
||||||
|
|
||||||
documentData = WebResearchDocumentData(
|
# Create simplified document structure
|
||||||
user_prompt=request.user_prompt,
|
document = {
|
||||||
websites_analyzed=len(allContent),
|
"documentName": f"webResearch_{request.user_prompt[:50]}.json",
|
||||||
additional_links_found=len(additional_links),
|
"documentData": {
|
||||||
analysis_result=combinedContent, # Raw content, no analysis
|
"user_prompt": request.user_prompt,
|
||||||
sources=sources,
|
"analysis_result": combinedContent,
|
||||||
additional_links=additional_links,
|
"sources": sources,
|
||||||
individual_content=allContent, # Individual URL -> content mapping
|
"additional_links": additional_links,
|
||||||
debug_info={
|
"metadata": {
|
||||||
"crawl_depth": effective_depth,
|
"websites_analyzed": len(allContent),
|
||||||
"max_configured_depth": max_depth,
|
"additional_links_found": len(additional_links),
|
||||||
"max_links_per_domain": max_links_per_domain,
|
"crawl_depth": effective_depth,
|
||||||
"crawl_timeout_minutes": crawl_timeout_minutes,
|
"max_configured_depth": max_depth,
|
||||||
"total_urls_crawled": len(allContent),
|
"max_links_per_domain": max_links_per_domain,
|
||||||
"main_urls": len(selectedWebsites),
|
"crawl_timeout_minutes": crawl_timeout_minutes,
|
||||||
"additional_urls": len(additional_links)
|
"total_urls_crawled": len(allContent),
|
||||||
}
|
"main_urls": len(selectedWebsites),
|
||||||
)
|
"additional_urls": len(additional_links)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mimeType": "application/json"
|
||||||
|
}
|
||||||
|
|
||||||
document = WebResearchActionDocument(
|
return WebResearchResult(
|
||||||
documentName=f"webResearch_{request.user_prompt[:50]}.json",
|
|
||||||
documentData=documentData,
|
|
||||||
mimeType="application/json"
|
|
||||||
)
|
|
||||||
|
|
||||||
return WebResearchActionResult(
|
|
||||||
success=True,
|
success=True,
|
||||||
documents=[document],
|
documents=[document]
|
||||||
resultLabel="webResearch_results"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in web research: {str(e)}")
|
logger.error(f"Error in web research: {str(e)}")
|
||||||
return WebResearchActionResult(success=False, error=str(e))
|
return WebResearchResult(success=False, error=str(e))
|
||||||
|
|
|
||||||
|
|
@ -337,8 +337,8 @@ class BaseRenderer(ABC):
|
||||||
response = await ai_service.aiObjects.call(request)
|
response = await ai_service.aiObjects.call(request)
|
||||||
|
|
||||||
# Save styling prompt and response to debug
|
# Save styling prompt and response to debug
|
||||||
self.services.utils.writeDebugFile(style_template, "rendererStylingPrompt")
|
self.services.utils.writeDebugFile(style_template, "renderer_styling_prompt")
|
||||||
self.services.utils.writeDebugFile(response.content or '', "rendererStylingResponse")
|
self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response")
|
||||||
|
|
||||||
# Clean and parse JSON
|
# Clean and parse JSON
|
||||||
result = response.content.strip() if response and response.content else ""
|
result = response.content.strip() if response and response.content else ""
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ class RendererImage(BaseRenderer):
|
||||||
image_prompt = await self._create_imageGenerate_prompt(extracted_content, document_title, user_prompt, ai_service)
|
image_prompt = await self._create_imageGenerate_prompt(extracted_content, document_title, user_prompt, ai_service)
|
||||||
|
|
||||||
# Save image generation prompt to debug
|
# Save image generation prompt to debug
|
||||||
ai_service.services.utils.writeDebugFile(image_prompt, "rendererImageGenerationPrompt")
|
ai_service.services.utils.writeDebugFile(image_prompt, "image_generation_prompt")
|
||||||
|
|
||||||
# Generate image using AI
|
# Generate image using AI
|
||||||
image_result = await ai_service.aiObjects.generateImage(
|
image_result = await ai_service.aiObjects.generateImage(
|
||||||
|
|
@ -71,7 +71,7 @@ class RendererImage(BaseRenderer):
|
||||||
)
|
)
|
||||||
|
|
||||||
# Save image generation response to debug
|
# Save image generation response to debug
|
||||||
ai_service.services.utils.writeDebugFile(str(image_result), "rendererImageGenerationResponse")
|
ai_service.services.utils.writeDebugFile(str(image_result), "image_generation_response")
|
||||||
|
|
||||||
# Extract base64 image data from result
|
# Extract base64 image data from result
|
||||||
if image_result and image_result.get("success", False):
|
if image_result and image_result.get("success", False):
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ from modules.workflows.methods.methodBase import MethodBase, action
|
||||||
from modules.datamodels.datamodelChat import ActionResult
|
from modules.datamodels.datamodelChat import ActionResult
|
||||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum, ModelCapabilitiesEnum
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum, ModelCapabilitiesEnum
|
||||||
from modules.datamodels.datamodelChat import ChatDocument
|
from modules.datamodels.datamodelChat import ChatDocument
|
||||||
from modules.datamodels.datamodelWeb import WebResearchRequest, WebResearchOptions
|
from modules.aicore.aicorePluginTavily import WebResearchRequest
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -263,24 +263,18 @@ class MethodAi(MethodBase):
|
||||||
error="Search query is required"
|
error="Search query is required"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Build WebResearchOptions
|
# Build WebResearchRequest (simplified dataclass)
|
||||||
options = WebResearchOptions(
|
|
||||||
max_pages=max_pages,
|
|
||||||
search_depth=search_depth,
|
|
||||||
extract_depth=extract_depth,
|
|
||||||
pages_search_depth=pages_search_depth,
|
|
||||||
country=country,
|
|
||||||
time_range=time_range,
|
|
||||||
topic=topic,
|
|
||||||
language=language
|
|
||||||
)
|
|
||||||
|
|
||||||
# Build WebResearchRequest
|
|
||||||
request = WebResearchRequest(
|
request = WebResearchRequest(
|
||||||
user_prompt=user_prompt,
|
user_prompt=user_prompt,
|
||||||
urls=urls,
|
urls=urls,
|
||||||
max_results=max_results,
|
max_results=max_results,
|
||||||
options=options
|
max_pages=max_pages,
|
||||||
|
search_depth=search_depth,
|
||||||
|
extract_depth=extract_depth,
|
||||||
|
country=country,
|
||||||
|
time_range=time_range,
|
||||||
|
topic=topic,
|
||||||
|
language=language
|
||||||
)
|
)
|
||||||
|
|
||||||
# Call web research service
|
# Call web research service
|
||||||
|
|
@ -294,7 +288,7 @@ class MethodAi(MethodBase):
|
||||||
if not result.success:
|
if not result.success:
|
||||||
return ActionResult.isFailure(error=result.error)
|
return ActionResult.isFailure(error=result.error)
|
||||||
|
|
||||||
# Convert WebResearchActionResult to ActionResult format
|
# Convert WebResearchResult to ActionResult format
|
||||||
documents = []
|
documents = []
|
||||||
for doc in result.documents:
|
for doc in result.documents:
|
||||||
documents.append({
|
documents.append({
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ sys.path.append(os.path.dirname(__file__))
|
||||||
|
|
||||||
# Import the service initialization
|
# Import the service initialization
|
||||||
from modules.features.chatPlayground.mainChatPlayground import getServices
|
from modules.features.chatPlayground.mainChatPlayground import getServices
|
||||||
from modules.datamodels.datamodelAi import AiCallOptions, OperationType
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
||||||
from modules.datamodels.datamodelUam import User
|
from modules.datamodels.datamodelUam import User
|
||||||
|
|
||||||
# The test uses the AI service which handles JSON template internally
|
# The test uses the AI service which handles JSON template internally
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue