diff --git a/modules/aicore/aicorePluginAnthropic.py b/modules/aicore/aicorePluginAnthropic.py index 6091f872..b5d552cd 100644 --- a/modules/aicore/aicorePluginAnthropic.py +++ b/modules/aicore/aicorePluginAnthropic.py @@ -5,7 +5,7 @@ from typing import Dict, Any, List, Union from fastapi import HTTPException from modules.shared.configuration import APP_CONFIG from modules.aicore.aicoreBase import BaseConnectorAi -from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum +from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse # Configure logger logger = logging.getLogger(__name__) @@ -88,28 +88,26 @@ class AiAnthropic(BaseConnectorAi): ] - async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> Dict[str, Any]: + async def callAiBasic(self, modelCall: AiModelCall) -> AiModelResponse: """ - Calls the Anthropic API with the given messages. + Calls the Anthropic API with the given messages using standardized pattern. Args: - messages: List of messages in OpenAI format (role, content) - temperature: Temperature for response generation (0.0-1.0) - maxTokens: Maximum number of tokens in the response + modelCall: AiModelCall with messages and options Returns: - The response in OpenAI format + AiModelResponse with content and metadata Raises: HTTPException: For errors in API communication """ try: - # Use parameters from configuration if none were overridden - if temperature is None: - temperature = self.config.get("temperature", 0.2) - - # Don't set maxTokens from config - let the model use its full context length - # Our continuation system handles stopping early via prompt engineering + # Extract parameters from modelCall + messages = modelCall.messages + model = modelCall.model + options = modelCall.options + temperature = options.get("temperature", self.config.get("temperature", 0.2)) + maxTokens = model.maxTokens # Transform OpenAI-style messages to Anthropic format: # - Move any 'system' role content to top-level 'system' @@ -205,23 +203,13 @@ class AiAnthropic(BaseConnectorAi): logger.warning(f"Anthropic API returned empty content. Full response: {anthropicResponse}") content = "[Anthropic API returned empty response]" - # Return in OpenAI format - return { - "id": anthropicResponse.get("id", ""), - "object": "chat.completion", - "created": anthropicResponse.get("created", 0), - "model": anthropicResponse.get("model", self.modelName), - "choices": [ - { - "message": { - "role": "assistant", - "content": content - }, - "index": 0, - "finish_reason": "stop" - } - ] - } + # Return standardized response + return AiModelResponse( + content=content, + success=True, + modelId=self.modelName, + metadata={"response_id": anthropicResponse.get("id", "")} + ) except Exception as e: logger.error(f"Error calling Anthropic API: {str(e)}") diff --git a/modules/aicore/aicorePluginInternal.py b/modules/aicore/aicorePluginInternal.py index e0473678..b121f595 100644 --- a/modules/aicore/aicorePluginInternal.py +++ b/modules/aicore/aicorePluginInternal.py @@ -1,7 +1,7 @@ import logging from typing import Dict, Any, List, Union from modules.aicore.aicoreBase import BaseConnectorAi -from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum +from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse # Configure logger logger = logging.getLogger(__name__) @@ -76,158 +76,36 @@ class AiInternal(BaseConnectorAi): ) ] - async def extractDocument(self, documentData: Union[str, bytes], extractionType: str = "basic") -> Dict[str, Any]: + async def extractDocument(self, modelCall: AiModelCall) -> AiModelResponse: """ - Extract content from a document. - - Args: - documentData: The document data to extract from - extractionType: Type of extraction (basic, advanced, detailed) - - Returns: - Dictionary with extraction results + NOP - we only need the model for price calculations """ - try: - logger.info(f"Starting document extraction with type: {extractionType}") - - # Simulate document extraction processing - # In a real implementation, this would use actual document processing libraries - - if isinstance(documentData, bytes): - content = documentData.decode('utf-8', errors='ignore') - else: - content = str(documentData) - - # Basic extraction logic - extractedContent = { - "text": content, - "metadata": { - "extraction_type": extractionType, - "content_length": len(content), - "processing_time": 0.1 # Simulated - } - } - - logger.info(f"Document extraction completed successfully") - return extractedContent - - except Exception as e: - logger.error(f"Error during document extraction: {str(e)}") - return { - "error": str(e), - "success": False - } + logger.error(f"Document extraction not to call here") + return AiModelResponse( + content="", + success=False, + error="Internal connector should not be called directly" + ) - async def generateDocument(self, template: str, data: Dict[str, Any], format: str = "html") -> Dict[str, Any]: + async def generateDocument(self, modelCall: AiModelCall) -> AiModelResponse: """ - Generate a document from a template and data. - - Args: - template: The document template - data: Data to populate the template - format: Output format (html, pdf, docx, etc.) - - Returns: - Dictionary with generated document + NOP - we only need the model for price calculations """ - try: - logger.info(f"Starting document generation with format: {format}") - - # Simulate document generation processing - # In a real implementation, this would use actual templating engines - - # Basic template processing - generatedContent = template - for key, value in data.items(): - placeholder = f"{{{key}}}" - generatedContent = generatedContent.replace(placeholder, str(value)) - - result = { - "content": generatedContent, - "format": format, - "metadata": { - "template_length": len(template), - "data_keys": list(data.keys()), - "processing_time": 0.2 # Simulated - } - } - - logger.info(f"Document generation completed successfully") - return result - - except Exception as e: - logger.error(f"Error during document generation: {str(e)}") - return { - "error": str(e), - "success": False - } + logger.error(f"Document generation not to call here") + return AiModelResponse( + content="", + success=False, + error="Internal connector should not be called directly" + ) - async def renderDocument(self, content: str, targetFormat: str, options: Dict[str, Any] = None) -> Dict[str, Any]: + async def renderDocument(self, modelCall: AiModelCall) -> AiModelResponse: """ - Render a document to a specific format. - - Args: - content: The content to render - targetFormat: Target format (html, pdf, docx, etc.) - options: Rendering options - - Returns: - Dictionary with rendered document + NOP - we only need the model for price calculations """ - try: - logger.info(f"Starting document rendering to format: {targetFormat}") - - if options is None: - options = {} - - # Simulate document rendering processing - # In a real implementation, this would use actual rendering libraries - - # Basic rendering logic based on target format - if targetFormat.lower() == "html": - renderedContent = f"{content}" - elif targetFormat.lower() == "pdf": - # Simulate PDF rendering - renderedContent = f"PDF_CONTENT_PLACEHOLDER: {content}" - else: - # Default to plain text - renderedContent = content - - result = { - "content": renderedContent, - "format": targetFormat, - "metadata": { - "input_length": len(content), - "output_length": len(renderedContent), - "processing_time": 0.3, # Simulated - "options": options - } - } - - logger.info(f"Document rendering completed successfully") - return result - - except Exception as e: - logger.error(f"Error during document rendering: {str(e)}") - return { - "error": str(e), - "success": False - } + logger.error(f"Document rendering not to call here") + return AiModelResponse( + content="", + success=False, + error="Internal connector should not be called directly" + ) - async def _testConnection(self) -> bool: - """ - Tests the internal processing capabilities. - - Returns: - True if internal processing is working, False otherwise - """ - try: - # Test basic functionality - testContent = "Test document content" - result = await self.extractDocument(testContent) - - return result.get("success", True) and "error" not in result - - except Exception as e: - logger.error(f"Internal connector test failed: {str(e)}") - return False diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py index 1202d004..849a4600 100644 --- a/modules/aicore/aicorePluginOpenai.py +++ b/modules/aicore/aicorePluginOpenai.py @@ -5,7 +5,7 @@ from typing import Dict, Any, List, Union from fastapi import HTTPException from modules.shared.configuration import APP_CONFIG from modules.aicore.aicoreBase import BaseConnectorAi -from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum +from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse # Configure logger logger = logging.getLogger(__name__) @@ -125,40 +125,34 @@ class AiOpenai(BaseConnectorAi): ) ] - async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> str: + async def callAiBasic(self, modelCall: AiModelCall) -> AiModelResponse: """ - Calls the OpenAI API with the given messages. + Calls the OpenAI API with the given messages using standardized pattern. Args: - messages: List of messages in OpenAI format (role, content) - temperature: Temperature for response generation (0.0-1.0) - maxTokens: Maximum number of tokens in the response + modelCall: AiModelCall with messages and options Returns: - The response from the OpenAI API + AiModelResponse with content and metadata Raises: HTTPException: For errors in API communication """ try: - # Use parameters from configuration if none were overridden - if temperature is None: - temperature = self.config.get("temperature", 0.2) - - # Don't set maxTokens from config - let the model use its full context length - # Our continuation system handles stopping early via prompt engineering + # Extract parameters from modelCall + messages = modelCall.messages + model = modelCall.model + options = modelCall.options + temperature = options.get("temperature", self.config.get("temperature", 0.2)) + maxTokens = model.maxTokens payload = { "model": self.modelName, "messages": messages, - "temperature": temperature + "temperature": temperature, + "max_tokens": maxTokens } - # Add max_tokens - use provided value or throw error - if maxTokens is None: - raise ValueError("maxTokens must be provided for OpenAI API calls") - payload["max_tokens"] = maxTokens - response = await self.httpClient.post( self.apiUrl, json=payload @@ -186,7 +180,13 @@ class AiOpenai(BaseConnectorAi): responseJson = response.json() content = responseJson["choices"][0]["message"]["content"] - return content + + return AiModelResponse( + content=content, + success=True, + modelId=self.modelName, + metadata={"response_id": responseJson.get("id", "")} + ) except ContextLengthExceededException: # Re-raise context length exceptions without wrapping diff --git a/modules/aicore/aicorePluginPerplexity.py b/modules/aicore/aicorePluginPerplexity.py index f2f80f3d..8ce4e9da 100644 --- a/modules/aicore/aicorePluginPerplexity.py +++ b/modules/aicore/aicorePluginPerplexity.py @@ -5,7 +5,7 @@ from typing import Dict, Any, List, Union, Optional from fastapi import HTTPException from modules.shared.configuration import APP_CONFIG from modules.aicore.aicoreBase import BaseConnectorAi -from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum +from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse # Configure logger logger = logging.getLogger(__name__) @@ -141,40 +141,34 @@ class AiPerplexity(BaseConnectorAi): ) ] - async def callAiBasic(self, messages: List[Dict[str, Any]], temperature: float = None, maxTokens: int = None) -> str: + async def callAiBasic(self, modelCall: AiModelCall) -> AiModelResponse: """ - Calls the Perplexity API with the given messages. + Calls the Perplexity API with the given messages using standardized pattern. Args: - messages: List of messages in OpenAI format (role, content) - temperature: Temperature for response generation (0.0-1.0) - maxTokens: Maximum number of tokens in the response + modelCall: AiModelCall with messages and options Returns: - The response from the Perplexity API + AiModelResponse with content and metadata Raises: HTTPException: For errors in API communication """ try: - # Use parameters from configuration if none were overridden - if temperature is None: - temperature = self.config.get("temperature", 0.2) - - # Don't set maxTokens from config - let the model use its full context length - # Our continuation system handles stopping early via prompt engineering + # Extract parameters from modelCall + messages = modelCall.messages + model = modelCall.model + options = modelCall.options + temperature = options.get("temperature", self.config.get("temperature", 0.2)) + maxTokens = model.maxTokens payload = { "model": self.modelName, "messages": messages, - "temperature": temperature + "temperature": temperature, + "max_tokens": maxTokens } - # Add max_tokens - use provided value or throw error - if maxTokens is None: - raise ValueError("maxTokens must be provided for Perplexity API calls") - payload["max_tokens"] = maxTokens - response = await self.httpClient.post( self.apiUrl, json=payload @@ -198,7 +192,13 @@ class AiPerplexity(BaseConnectorAi): responseJson = response.json() content = responseJson["choices"][0]["message"]["content"] - return content + + return AiModelResponse( + content=content, + success=True, + modelId=self.modelName, + metadata={"response_id": responseJson.get("id", "")} + ) except Exception as e: logger.error(f"Error calling Perplexity API: {str(e)}") diff --git a/modules/aicore/aicorePluginTavily.py b/modules/aicore/aicorePluginTavily.py index 73966dca..b6d25f74 100644 --- a/modules/aicore/aicorePluginTavily.py +++ b/modules/aicore/aicorePluginTavily.py @@ -9,21 +9,7 @@ from tavily import AsyncTavilyClient from modules.shared.configuration import APP_CONFIG from modules.shared.timezoneUtils import get_utc_timestamp from modules.aicore.aicoreBase import BaseConnectorAi -from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum -from modules.datamodels.datamodelWeb import ( - WebSearchActionResult, - WebSearchActionDocument, - WebSearchDocumentData, - WebSearchResultItem, - WebCrawlActionResult, - WebCrawlActionDocument, - WebCrawlDocumentData, - WebCrawlResultItem, - WebScrapeActionResult, - WebScrapeActionDocument, - WebSearchDocumentData as WebScrapeDocumentData, - WebScrapeResultItem, -) +from modules.datamodels.datamodelAi import AiModel, ModelCapabilitiesEnum, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelResponse logger = logging.getLogger(__name__) @@ -39,6 +25,32 @@ class WebCrawlResult: url: str content: str +@dataclass +class WebResearchRequest: + """Ultra-simplified web research request""" + user_prompt: str + urls: Optional[List[str]] = None + max_results: int = 5 + max_pages: int = 10 + search_depth: str = "basic" + extract_depth: str = "advanced" + format: str = "markdown" + country: Optional[str] = None + time_range: Optional[str] = None + topic: Optional[str] = None + language: Optional[str] = None + +@dataclass +class WebResearchResult: + """Ultra-simplified web research result - just success/error + documents""" + success: bool = True + error: Optional[str] = None + documents: List[dict] = None # Simple dict instead of ActionDocument + + def __post_init__(self): + if self.documents is None: + self.documents = [] + class ConnectorWeb(BaseConnectorAi): """Tavily web search connector.""" @@ -152,127 +164,167 @@ class ConnectorWeb(BaseConnectorAi): webSearchMaxResults=int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20")), ) - # Standardized methods returning ActionResults for the interface to consume - async def search(self, request) -> "WebSearchActionResult": + # Standardized method using AiModelCall/AiModelResponse pattern + + async def search(self, modelCall) -> "AiModelResponse": + """Search using standardized AiModelCall/AiModelResponse pattern""" try: + # Extract parameters from modelCall + query = modelCall.messages[0]["content"] if modelCall.messages else "" + options = modelCall.options + raw_results = await self._search( - query=request.query, - max_results=request.max_results, - search_depth=request.search_depth, - time_range=request.time_range, - topic=request.topic, - include_domains=request.include_domains, - exclude_domains=request.exclude_domains, - language=request.language, - include_answer=request.include_answer, - include_raw_content=request.include_raw_content, + query=query, + max_results=options.get("max_results", 5), + search_depth=options.get("search_depth"), + time_range=options.get("time_range"), + topic=options.get("topic"), + include_domains=options.get("include_domains"), + exclude_domains=options.get("exclude_domains"), + language=options.get("language"), + include_answer=options.get("include_answer"), + include_raw_content=options.get("include_raw_content"), ) + + # Convert to JSON string + results_json = { + "query": query, + "results": [ + { + "title": result.title, + "url": result.url, + "content": getattr(result, 'raw_content', None) + } + for result in raw_results + ], + "total_count": len(raw_results) + } + + import json + content = json.dumps(results_json, indent=2) + + return AiModelResponse( + content=content, + success=True, + metadata={ + "total_count": len(raw_results), + "search_depth": options.get("search_depth", "basic") + } + ) + except Exception as e: - return WebSearchActionResult(success=False, error=str(e)) - - result_items = [ - WebSearchResultItem( - title=result.title, - url=result.url, - raw_content=getattr(result, 'raw_content', None) + return AiModelResponse( + content="", + success=False, + error=str(e) ) - for result in raw_results - ] - document_data = WebSearchDocumentData( - query=request.query, - results=result_items, - total_count=len(result_items), - ) - - document = WebSearchActionDocument( - documentName=f"web_search_results_{get_utc_timestamp()}.json", - documentData=document_data, - mimeType="application/json", - ) - - return WebSearchActionResult( - success=True, documents=[document], resultLabel="web_search_results" - ) - - async def crawl(self, request) -> "WebCrawlActionResult": + async def crawl(self, modelCall) -> "AiModelResponse": + """Crawl using standardized AiModelCall/AiModelResponse pattern""" try: + # Extract parameters from modelCall + options = modelCall.options + urls = options.get("urls", []) + raw_results = await self._crawl( - [str(u) for u in request.urls], - extract_depth=request.extract_depth, - format=request.format, + urls, + extract_depth=options.get("extract_depth"), + format=options.get("format"), ) + + # Convert to JSON string + results_json = { + "urls": urls, + "results": [ + { + "url": result.url, + "content": result.content + } + for result in raw_results + ], + "total_count": len(raw_results) + } + + import json + content = json.dumps(results_json, indent=2) + + return AiModelResponse( + content=content, + success=True, + metadata={ + "total_count": len(raw_results), + "extract_depth": options.get("extract_depth", "basic") + } + ) + except Exception as e: - return WebCrawlActionResult(success=False, error=str(e)) + return AiModelResponse( + content="", + success=False, + error=str(e) + ) - result_items = [ - WebCrawlResultItem(url=result.url, content=result.content) - for result in raw_results - ] - - document_data = WebCrawlDocumentData( - urls=[str(u) for u in request.urls], - results=result_items, - total_count=len(result_items), - ) - - document = WebCrawlActionDocument( - documentName=f"web_crawl_results_{get_utc_timestamp()}.json", - documentData=document_data, - mimeType="application/json", - ) - - return WebCrawlActionResult( - success=True, documents=[document], resultLabel="web_crawl_results" - ) - - async def scrape(self, request) -> "WebScrapeActionResult": + async def scrape(self, modelCall) -> "AiModelResponse": + """Scrape using standardized AiModelCall/AiModelResponse pattern""" try: + # Extract parameters from modelCall + query = modelCall.messages[0]["content"] if modelCall.messages else "" + options = modelCall.options + search_results = await self._search( - query=request.query, - max_results=request.max_results, - search_depth=request.search_depth, - time_range=request.time_range, - topic=request.topic, - include_domains=request.include_domains, - exclude_domains=request.exclude_domains, - language=request.language, - include_answer=request.include_answer, - include_raw_content=request.include_raw_content, + query=query, + max_results=options.get("max_results", 5), + search_depth=options.get("search_depth"), + time_range=options.get("time_range"), + topic=options.get("topic"), + include_domains=options.get("include_domains"), + exclude_domains=options.get("exclude_domains"), + language=options.get("language"), + include_answer=options.get("include_answer"), + include_raw_content=options.get("include_raw_content"), ) - except Exception as e: - return WebScrapeActionResult(success=False, error=str(e)) - try: urls = [result.url for result in search_results] crawl_results = await self._crawl( urls, - extract_depth=request.extract_depth, - format=request.format, + extract_depth=options.get("extract_depth"), + format=options.get("format"), ) + + # Convert to JSON string + results_json = { + "query": query, + "results": [ + { + "url": result.url, + "content": result.content + } + for result in crawl_results + ], + "total_count": len(crawl_results) + } + + import json + content = json.dumps(results_json, indent=2) + + return AiModelResponse( + content=content, + success=True, + metadata={ + "total_count": len(crawl_results), + "search_depth": options.get("search_depth", "basic"), + "extract_depth": options.get("extract_depth", "basic") + } + ) + except Exception as e: - return WebScrapeActionResult(success=False, error=str(e)) + return AiModelResponse( + content="", + success=False, + error=str(e) + ) - result_items = [ - WebScrapeResultItem(url=result.url, content=result.content) - for result in crawl_results - ] - - document_data = WebScrapeDocumentData( - query=request.query, - results=result_items, - total_count=len(result_items), - ) - - document = WebScrapeActionDocument( - documentName=f"web_scrape_results_{get_utc_timestamp()}.json", - documentData=document_data, - mimeType="application/json", - ) - - return WebScrapeActionResult( - success=True, documents=[document], resultLabel="web_scrape_results" - ) + # Helper Functions async def _search_urls_raw(self, *, diff --git a/modules/datamodels/datamodelAi.py b/modules/datamodels/datamodelAi.py index f154a79c..da5c1228 100644 --- a/modules/datamodels/datamodelAi.py +++ b/modules/datamodels/datamodelAi.py @@ -185,3 +185,31 @@ class EnhancedAiCallOptions(AiCallOptions): description="Separator between chunks in merged output" ) + +class AiModelCall(BaseModel): + """Standardized input for AI model calls.""" + + messages: List[Dict[str, Any]] = Field(description="Messages in OpenAI format (role, content)") + model: Optional[AiModel] = Field(default=None, description="The AI model being called") + options: Dict[str, Any] = Field(default_factory=dict, description="Additional model-specific options") + + class Config: + arbitraryTypesAllowed = True + + +class AiModelResponse(BaseModel): + """Standardized output from AI model calls.""" + + content: str = Field(description="The AI response content") + success: bool = Field(default=True, description="Whether the call was successful") + error: Optional[str] = Field(default=None, description="Error message if success=False") + + # Optional metadata that models can include + modelId: Optional[str] = Field(default=None, description="Model identifier used") + processingTime: Optional[float] = Field(default=None, description="Processing time in seconds") + tokensUsed: Optional[Dict[str, int]] = Field(default=None, description="Token usage (input, output, total)") + metadata: Optional[Dict[str, Any]] = Field(default=None, description="Additional model-specific metadata") + + class Config: + arbitraryTypesAllowed = True + diff --git a/modules/datamodels/datamodelDocument.py b/modules/datamodels/datamodelDocument.py index 4c37c106..a437b6f1 100644 --- a/modules/datamodels/datamodelDocument.py +++ b/modules/datamodels/datamodelDocument.py @@ -121,10 +121,5 @@ class JsonMergeResult(BaseModel): metadata: Dict[str, Any] = Field(default_factory=dict, description="Merge process metadata") -# Update forward references (compatible with Pydantic v1 and v2) -try: - # Pydantic v2 - ListItem.model_rebuild() -except AttributeError: - # Pydantic v1 - ListItem.update_forward_refs() +# Update forward references +ListItem.model_rebuild() diff --git a/modules/datamodels/datamodelWeb.py b/modules/datamodels/datamodelWeb.py deleted file mode 100644 index bc1e03e3..00000000 --- a/modules/datamodels/datamodelWeb.py +++ /dev/null @@ -1,142 +0,0 @@ -"""Web-related modules""" -from pydantic import BaseModel, Field, HttpUrl -from typing import List, Optional, Literal, Dict, Any -from modules.shared.configuration import APP_CONFIG -from modules.datamodels.datamodelChat import ActionDocument, ActionResult - - -WEB_SEARCH_MAX_QUERY_LENGTH: int = int(APP_CONFIG.get("Web_Search_MAX_QUERY_LENGTH", "400")) -WEB_SEARCH_MAX_RESULTS: int = int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20")) -WEB_SEARCH_MIN_RESULTS: int = int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1")) - - -class WebResearchOptions(BaseModel): - """Advanced options for web research workflow""" - max_pages: int = Field(default=10, ge=1, le=50, description="Maximum pages to crawl") - search_depth: Literal["basic", "advanced"] = Field(default="basic", description="Tavily search depth") - extract_depth: Literal["basic", "advanced"] = Field(default="advanced", description="Tavily extract depth") - format: Literal["text", "markdown"] = Field(default="markdown", description="Content format") - return_report: bool = Field(default=True, description="Return formatted report or raw data") - pages_search_depth: int = Field(default=1, ge=1, le=5, description="How deep to crawl: 1=main pages only, 2=main+sub-pages, 3=main+sub+sub-sub, etc.") - country: Optional[str] = Field(default=None, description="Country code for search bias") - time_range: Optional[Literal["d", "w", "m", "y"]] = Field(default=None, description="Time range for search") - topic: Optional[Literal["general", "news", "academic"]] = Field(default=None, description="Search topic") - language: Optional[str] = Field(default=None, description="Language code") - include_answer: Optional[bool] = Field(default=None, description="Include AI answer") - include_raw_content: Optional[bool] = Field(default=None, description="Include raw content") - -class WebResearchRequest(BaseModel): - """Main web research request""" - user_prompt: str = Field(min_length=1, max_length=WEB_SEARCH_MAX_QUERY_LENGTH, description="User's research question or prompt") - urls: Optional[List[str]] = Field(default=None, description="Specific URLs to crawl (optional)") - max_results: int = Field(default=5, ge=1, le=WEB_SEARCH_MAX_RESULTS, description="Max search results") - options: WebResearchOptions = Field(default_factory=WebResearchOptions, description="Advanced options") - -class WebSearchResultItem(BaseModel): - """Individual search result""" - title: str - url: HttpUrl - raw_content: Optional[str] = Field(default=None, description="Raw HTML content") - -class WebCrawlResultItem(BaseModel): - """Individual crawl result""" - url: HttpUrl - content: str - -class WebResearchDocumentData(BaseModel): - """Complete web research results""" - user_prompt: str - websites_analyzed: int - additional_links_found: int - analysis_result: str - sources: List[WebSearchResultItem] - additional_links: List[str] - individual_content: Optional[Dict[str, str]] = None # URL -> content mapping - debug_info: Optional[Dict[str, Any]] = None - -class WebResearchActionDocument(ActionDocument): - documentData: WebResearchDocumentData - -class WebResearchActionResult(ActionResult): - documents: List[WebResearchActionDocument] = Field(default_factory=list) - -# Legacy models for connector compatibility - -class WebSearchDocumentData(BaseModel): - """Search results document data""" - query: str - results: List[WebSearchResultItem] - total_count: int - -class WebSearchActionDocument(ActionDocument): - documentData: WebSearchDocumentData - -class WebSearchActionResult(ActionResult): - documents: List[WebSearchActionDocument] = Field(default_factory=list) - -class WebCrawlDocumentData(BaseModel): - """Crawl results document data""" - urls: List[HttpUrl] - results: List[WebCrawlResultItem] - total_count: int - -class WebCrawlActionDocument(ActionDocument): - documentData: WebCrawlDocumentData - -class WebCrawlActionResult(ActionResult): - documents: List[WebCrawlActionDocument] = Field(default_factory=list) - -class WebScrapeDocumentData(BaseModel): - """Scrape results document data""" - query: str - results: List[WebSearchResultItem] - total_count: int - -class WebScrapeActionDocument(ActionDocument): - documentData: WebScrapeDocumentData - -class WebScrapeActionResult(ActionResult): - documents: List[WebScrapeActionDocument] = Field(default_factory=list) - -class WebSearchRequest(BaseModel): - """Search request for Tavily""" - query: str - max_results: int = 5 - search_depth: Optional[Literal["basic", "advanced"]] = None - time_range: Optional[Literal["d", "w", "m", "y"]] = None - topic: Optional[Literal["general", "news", "academic"]] = None - include_domains: Optional[List[str]] = None - exclude_domains: Optional[List[str]] = None - language: Optional[str] = None - include_answer: Optional[bool] = None - include_raw_content: Optional[bool] = None - auto_parameters: Optional[bool] = None - country: Optional[str] = None - -class WebCrawlRequest(BaseModel): - """Crawl request for Tavily""" - urls: List[HttpUrl] - extract_depth: Optional[Literal["basic", "advanced"]] = None - format: Optional[Literal["text", "markdown"]] = None - -class WebScrapeRequest(BaseModel): - """Scrape request for Tavily""" - query: str - max_results: int = 5 - search_depth: Optional[Literal["basic", "advanced"]] = None - time_range: Optional[Literal["d", "w", "m", "y"]] = None - topic: Optional[Literal["general", "news", "academic"]] = None - include_domains: Optional[List[str]] = None - exclude_domains: Optional[List[str]] = None - language: Optional[str] = None - include_answer: Optional[bool] = None - include_raw_content: Optional[bool] = None - auto_parameters: Optional[bool] = None - country: Optional[str] = None - extract_depth: Optional[Literal["basic", "advanced"]] = None - format: Optional[Literal["text", "markdown"]] = None - -class WebScrapeResultItem(BaseModel): - """Individual scrape result""" - url: HttpUrl - content: str diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py index 918944a7..bc082ed5 100644 --- a/modules/interfaces/interfaceAiObjects.py +++ b/modules/interfaces/interfaceAiObjects.py @@ -13,17 +13,10 @@ from modules.datamodels.datamodelAi import ( AiCallOptions, AiCallRequest, AiCallResponse, - OperationTypeEnum, + OperationTypeEnum, + AiModelCall, + AiModelResponse, ) -from modules.datamodels.datamodelWeb import ( - WebResearchRequest, - WebResearchActionResult, - WebSearchResultItem, - WebCrawlResultItem, - WebSearchRequest, - WebCrawlRequest, -) -from modules.datamodels.datamodelChat import ActionDocument # Dynamic model registry - models are now loaded from connectors via aicore system @@ -94,8 +87,7 @@ class AiObjects: context = request.context or "" options = request.options - # Calculate input bytes - inputBytes = len((prompt + context).encode("utf-8")) + # Input bytes will be calculated inside _callWithModel # Compress optionally (prompt/context) - simple truncation fallback kept here def _maybeTruncate(text: str, limit: int) -> str: @@ -109,11 +101,7 @@ class AiObjects: if options.compressContext and len(context.encode("utf-8")) > 70000: context = _maybeTruncate(context, 70000) - # Derive generation parameters - temperature = getattr(options, "temperature", None) - if temperature is None: - temperature = 0.2 - maxTokens = getattr(options, "maxTokens", None) + # Generation parameters are handled inside _callWithModel # Get failover models for this operation type availableModels = modelRegistry.getAvailableModels() @@ -127,7 +115,7 @@ class AiObjects: modelName="error", priceUsd=0.0, processingTime=0.0, - bytesSent=inputBytes, + bytesSent=0, bytesReceived=0, errorCount=1 ) @@ -139,7 +127,7 @@ class AiObjects: logger.info(f"Attempting AI call with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})") # Call the model - response = await self._callWithModel(model, prompt, context, temperature, maxTokens, inputBytes) + response = await self._callWithModel(model, prompt, context) logger.info(f"✅ AI call successful with model: {model.name}") return response @@ -165,7 +153,7 @@ class AiObjects: modelName="error", priceUsd=0.0, processingTime=0.0, - bytesSent=inputBytes, + bytesSent=0, bytesReceived=0, errorCount=1 ) @@ -216,7 +204,7 @@ class AiObjects: if partSize <= modelContextBytes: # Part fits - call AI directly - response = await self._callWithModel(model, prompt, contentPart.data, 0.2, None, partSize) + response = await self._callWithModel(model, prompt, contentPart.data) logger.info(f"✅ Content part processed successfully with model: {model.name}") return response else: @@ -228,7 +216,7 @@ class AiObjects: # Process each chunk chunkResults = [] for chunk in chunks: - chunkResponse = await self._callWithModel(model, prompt, chunk['data'], 0.2, None, chunk['size']) + chunkResponse = await self._callWithModel(model, prompt, chunk['data']) chunkResults.append(chunkResponse) # Merge chunk results @@ -405,8 +393,11 @@ class AiObjects: errorCount=1 ) - async def _callWithModel(self, model: AiModel, prompt: str, context: str, temperature: float, maxTokens: int, inputBytes: int) -> AiCallResponse: + async def _callWithModel(self, model: AiModel, prompt: str, context: str) -> AiCallResponse: """Call a specific model and return the response.""" + # Calculate input bytes from prompt and context + inputBytes = len((prompt + context).encode('utf-8')) + # Replace placeholder in prompt for this specific model contextLength = model.contextLength if contextLength > 0: @@ -434,27 +425,21 @@ class AiObjects: if not connector: raise ValueError(f"No connector found for model {model.name}") - # Call the model's function directly + # Call the model's function directly - completely generic if model.functionCall: - # Use the model's function call directly - if model.name.startswith("perplexity_callAiWithWebSearch"): - query = modelPrompt - if context: - query = f"Context: {context}\n\nQuery: {modelPrompt}" - content = await model.functionCall(query, temperature=temperature, maxTokens=maxTokens) - elif model.name.startswith("perplexity_researchTopic"): - content = await model.functionCall(modelPrompt) - elif model.name.startswith("perplexity_answerQuestion"): - content = await model.functionCall(modelPrompt, context) - elif model.name.startswith("perplexity_getCurrentNews"): - content = await model.functionCall(modelPrompt) - else: - # Standard callAiBasic - if model.connectorType == "anthropic": - response = await model.functionCall(messages, temperature=temperature, maxTokens=maxTokens) - content = response["choices"][0]["message"]["content"] - else: - content = await model.functionCall(messages, temperature=temperature, maxTokens=maxTokens) + # Create standardized call object + modelCall = AiModelCall( + messages=messages, + model=model + ) + + # Call the model with standardized interface + modelResponse = await model.functionCall(modelCall) + + # Extract content from standardized response + if not modelResponse.success: + raise ValueError(f"Model call failed: {modelResponse.error}") + content = modelResponse.content else: raise ValueError(f"Model {model.name} has no function call defined") @@ -482,9 +467,6 @@ class AiObjects: if options is None: options = AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE) - # Calculate input bytes (prompt + image data) - inputBytes = len(prompt.encode("utf-8")) + len(imageData) if isinstance(imageData, bytes) else len(prompt.encode("utf-8")) + len(str(imageData).encode("utf-8")) - # Get fallback models for image analysis availableModels = modelRegistry.getAvailableModels() failoverModelList = modelSelector.getFailoverModelList(prompt, "", options, availableModels) @@ -497,7 +479,7 @@ class AiObjects: modelName="error", priceUsd=0.0, processingTime=0.0, - bytesSent=inputBytes, + bytesSent=0, bytesReceived=0, errorCount=1 ) @@ -509,7 +491,7 @@ class AiObjects: logger.info(f"Attempting image analysis with model: {model.name} (attempt {attempt + 1}/{len(failoverModelList)})") # Call the model - response = await self._callImageWithModel(model, prompt, imageData, mimeType, inputBytes) + response = await self._callImageWithModel(model, prompt, imageData, mimeType) logger.info(f"✅ Image analysis successful with model: {model.name}") return response @@ -535,13 +517,23 @@ class AiObjects: modelName="error", priceUsd=0.0, processingTime=0.0, - bytesSent=inputBytes, + bytesSent=0, bytesReceived=0, errorCount=1 ) - async def _callImageWithModel(self, model: AiModel, prompt: str, imageData: Union[str, bytes], mimeType: str, inputBytes: int) -> AiCallResponse: + async def _callImageWithModel(self, model: AiModel, prompt: str, imageData: Union[str, bytes], mimeType: str) -> AiCallResponse: """Call a specific model for image analysis and return the response.""" + # Calculate input bytes from prompt and image data + promptBytes = len(prompt.encode('utf-8')) + if isinstance(imageData, str): + # Base64 encoded string + imageBytes = len(imageData.encode('utf-8')) + else: + # Raw bytes + imageBytes = len(imageData) + inputBytes = promptBytes + imageBytes + # Start timing startTime = time.time() @@ -633,67 +625,52 @@ class AiObjects: errorCount=1 ) - # Web functionality methods - Simple interface to Tavily connector - async def searchWebsites(self, query: str, maxResults: int = 5, **kwargs) -> List[WebSearchResultItem]: - """Search for websites using Tavily.""" - request = WebSearchRequest( - query=query, - max_results=maxResults, - **kwargs + # Web functionality methods - Now use standardized AiModelCall/AiModelResponse pattern + async def searchWebsites(self, query: str, maxResults: int = 5, **kwargs) -> str: + """Search for websites using Tavily with standardized pattern.""" + from modules.datamodels.datamodelAi import AiModelCall + + modelCall = AiModelCall( + messages=[{"role": "user", "content": query}], + options={"max_results": maxResults, **kwargs} ) + # Get Tavily connector from registry tavilyConnector = modelRegistry.getConnectorForModel("tavily_search") if not tavilyConnector: raise ValueError("Tavily connector not available") - result = await tavilyConnector.search(request) - if result.success and result.documents: - return result.documents[0].documentData.results - return [] + result = await tavilyConnector.search(modelCall) + return result.content if result.success else "" - async def crawlWebsites(self, urls: List[str], extractDepth: str = "advanced", format: str = "markdown") -> List[WebCrawlResultItem]: - """Crawl websites using Tavily.""" - from pydantic import HttpUrl - from urllib.parse import urlparse + async def crawlWebsites(self, urls: List[str], extractDepth: str = "advanced", format: str = "markdown") -> str: + """Crawl websites using Tavily with standardized pattern.""" + from modules.datamodels.datamodelAi import AiModelCall - # Safely create HttpUrl objects with proper scheme handling - httpUrls = [] - for url in urls: - try: - # Ensure URL has a scheme - parsed = urlparse(url) - if not parsed.scheme: - url = f"https://{url}" - - # Use HttpUrl with scheme parameter (this works for all URLs) - httpUrls.append(HttpUrl(url, scheme="https")) - - except Exception as e: - logger.warning(f"Skipping invalid URL {url}: {e}") - continue - - if not httpUrls: - return [] - - request = WebCrawlRequest( - urls=httpUrls, - extract_depth=extractDepth, - format=format + modelCall = AiModelCall( + messages=[{"role": "user", "content": "crawl websites"}], + options={"urls": urls, "extract_depth": extractDepth, "format": format} ) + # Get Tavily connector from registry tavilyConnector = modelRegistry.getConnectorForModel("tavily_crawl") if not tavilyConnector: raise ValueError("Tavily connector not available") - result = await tavilyConnector.crawl(request) - if result.success and result.documents: - return result.documents[0].documentData.results - return [] + result = await tavilyConnector.crawl(modelCall) + return result.content if result.success else "" async def extractContent(self, urls: List[str], extractDepth: str = "advanced", format: str = "markdown") -> Dict[str, str]: """Extract content from URLs and return as dictionary.""" + import json crawlResults = await self.crawlWebsites(urls, extractDepth, format) - return {str(result.url): result.content for result in crawlResults} + + # Parse JSON response and extract content + try: + data = json.loads(crawlResults) + return {result["url"]: result["content"] for result in data.get("results", [])} + except (json.JSONDecodeError, KeyError): + return {} # Core Web Tools - Clean interface for web operations async def readPage(self, url: str, extractDepth: str = "advanced") -> Optional[str]: diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 7d77d9e1..2f1c7443 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -4,13 +4,7 @@ from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum from modules.datamodels.datamodelExtraction import ChunkResult, ContentExtracted -from modules.datamodels.datamodelWeb import ( - WebResearchRequest, - WebResearchActionResult, - WebResearchDocumentData, - WebResearchActionDocument, - WebSearchResultItem, -) +from modules.aicore.aicorePluginTavily import WebResearchRequest, WebResearchResult from modules.interfaces.interfaceAiObjects import AiObjects from modules.services.serviceAi.subCoreAi import SubCoreAi from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing @@ -136,7 +130,7 @@ class AiService: return await self.coreAi.generateImage(prompt, size, quality, style, options) # Web Research - async def webResearch(self, request: WebResearchRequest) -> WebResearchActionResult: + async def webResearch(self, request: WebResearchRequest) -> WebResearchResult: """Perform web research using interface functions.""" await self._ensureAiObjectsInitialized() return await self.webResearchService.webResearch(request) diff --git a/modules/services/serviceAi/subCoreAi.py b/modules/services/serviceAi/subCoreAi.py index 33509eb5..6ab4a260 100644 --- a/modules/services/serviceAi/subCoreAi.py +++ b/modules/services/serviceAi/subCoreAi.py @@ -3,6 +3,7 @@ import logging from typing import Dict, Any, List, Optional, Tuple, Union from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum +from modules.datamodels.datamodelExtraction import ContentPart from modules.services.serviceAi.subSharedAiUtils import ( buildPromptWithPlaceholders, extractTextFromContentParts, @@ -12,6 +13,9 @@ from modules.services.serviceAi.subSharedAiUtils import ( logger = logging.getLogger(__name__) +# Rebuild the model to resolve forward references +AiCallRequest.model_rebuild() + # Loop instruction texts for different formats LoopInstructionTexts = { @@ -123,13 +127,23 @@ class SubCoreAi: ) # Write the ACTUAL prompt sent to AI (including continuation context) - self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}") + if iteration == 1: + # First iteration - use the historic naming pattern + self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt") + else: + # Subsequent iterations - include iteration number + self.services.utils.writeDebugFile(iterationPrompt, f"{debugPrefix}_prompt_iteration_{iteration}") response = await self.aiObjects.call(request) result = response.content # Write raw AI response to debug file - self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}") + if iteration == 1: + # First iteration - use the historic naming pattern + self.services.utils.writeDebugFile(result, f"{debugPrefix}_response") + else: + # Subsequent iterations - include iteration number + self.services.utils.writeDebugFile(result, f"{debugPrefix}_response_iteration_{iteration}") # Emit stats for this iteration self.services.workflow.storeWorkflowStat( @@ -436,7 +450,7 @@ CRITICAL REQUIREMENTS: } # Log AI response for debugging - self.services.utils.writeDebugFile(str(result), "documentGenerationResponse", documents) + self.services.utils.writeDebugFile(str(result), "document_generation_response", documents) return result except Exception as e: @@ -483,8 +497,16 @@ CRITICAL REQUIREMENTS: self.services.utils.debugLogToFile(f"Calling aiObjects.callImage with operationType: {options.operationType}", "AI_SERVICE") logger.info(f"Calling aiObjects.callImage with operationType: {options.operationType}") + + # Write image analysis prompt to debug file + self.services.utils.writeDebugFile(prompt, "image_analysis_prompt") + response = await self.aiObjects.callImage(prompt, imageData, mimeType, options) + # Write image analysis response to debug file + result = response.content if hasattr(response, 'content') else str(response) + self.services.utils.writeDebugFile(result, "image_analysis_response") + # Emit stats for image analysis self.services.workflow.storeWorkflowStat( self.services.currentWorkflow, diff --git a/modules/services/serviceAi/subDocumentGeneration.py b/modules/services/serviceAi/subDocumentGeneration.py index 6ec7b932..3db28da2 100644 --- a/modules/services/serviceAi/subDocumentGeneration.py +++ b/modules/services/serviceAi/subDocumentGeneration.py @@ -348,9 +348,15 @@ class SubDocumentGeneration: options=requestOptions ) + # Write document generation prompt to debug file + self.services.utils.writeDebugFile(generationPrompt, "document_generation_enhancement_prompt") + # Call AI to enhance the content response = await self.aiObjects.call(request) + # Write document generation response to debug file + self.services.utils.writeDebugFile(response.content or '', "document_generation_enhancement_response") + if response and response.content: # Parse the AI response as JSON try: diff --git a/modules/services/serviceAi/subDocumentProcessing.py b/modules/services/serviceAi/subDocumentProcessing.py index 72ac2950..f77fa0be 100644 --- a/modules/services/serviceAi/subDocumentProcessing.py +++ b/modules/services/serviceAi/subDocumentProcessing.py @@ -5,7 +5,7 @@ import time from typing import Dict, Any, List, Optional, Tuple, Union from modules.datamodels.datamodelChat import ChatDocument from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum -from modules.datamodels.datamodelExtraction import ChunkResult, ContentExtracted +from modules.datamodels.datamodelExtraction import ChunkResult, ContentExtracted, PartResult from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService logger = logging.getLogger(__name__) @@ -99,7 +99,7 @@ class SubDocumentProcessing: mergedContent = self._mergePartResults(partResults, options) # Save merged extraction content to debug - self.services.utils.writeDebugFile(mergedContent or '', "extractionMergedText") + self.services.utils.writeDebugFile(mergedContent or '', "extraction_merged_text") return mergedContent @@ -179,7 +179,7 @@ class SubDocumentProcessing: # Save merged JSON extraction content to debug jsonStr = json.dumps(mergedJsonDocument, ensure_ascii=False, indent=2) - self.services.utils.writeDebugFile(jsonStr, "extractionMergedJson") + self.services.utils.writeDebugFile(jsonStr, "extraction_merged_json") return mergedJsonDocument @@ -692,7 +692,7 @@ CONTINUATION INSTRUCTIONS: self.services.utils.debugLogToFile(f"Image analysis result for chunk {chunk_index}: length={len(ai_result) if ai_result else 0}, preview={ai_result[:200] if ai_result else 'None'}...", "AI_SERVICE") # Save image extraction response to debug file - self.services.utils.writeDebugFile(ai_result or 'No response', f"extraction_image_chunk_{chunk_index}") + self.services.utils.writeDebugFile(ai_result or 'No response', f"extraction_image_chunk_{chunk_index}_response") # Check if result is empty or None if not ai_result or not ai_result.strip(): @@ -794,8 +794,8 @@ CONTINUATION INSTRUCTIONS: self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE") # Save extraction prompt and response to debug - self.services.utils.writeDebugFile(augmented_prompt, f"extraction-Chunk{chunk_index}-Prompt") - self.services.utils.writeDebugFile(ai_result or '', f"extraction-Chunk{chunk_index}-Response") + self.services.utils.writeDebugFile(augmented_prompt, f"extraction_chunk_{chunk_index}_prompt") + self.services.utils.writeDebugFile(ai_result or '', f"extraction_chunk_{chunk_index}_response") # If generating JSON, validate the response if generate_json: @@ -889,8 +889,8 @@ CONTINUATION INSTRUCTIONS: self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE") # Save extraction prompt and response to debug - self.services.utils.writeDebugFile(augmented_prompt_text, f"extractionChunk{chunk_index}-Prompt") - self.services.utils.writeDebugFile(ai_result or '', f"extractionChunk{chunk_index}-Response") + self.services.utils.writeDebugFile(augmented_prompt_text, f"extraction_chunk_{chunk_index}_prompt") + self.services.utils.writeDebugFile(ai_result or '', f"extraction_chunk_{chunk_index}_response") # If generating JSON, validate the response if generate_json: diff --git a/modules/services/serviceAi/subWebResearch.py b/modules/services/serviceAi/subWebResearch.py index 000d828a..deb9f9b2 100644 --- a/modules/services/serviceAi/subWebResearch.py +++ b/modules/services/serviceAi/subWebResearch.py @@ -1,12 +1,6 @@ import logging from typing import Dict, Any, List, Optional, Tuple, Union -from modules.datamodels.datamodelWeb import ( - WebResearchRequest, - WebResearchActionResult, - WebResearchDocumentData, - WebResearchActionDocument, - WebSearchResultItem, -) +from modules.aicore.aicorePluginTavily import WebResearchRequest, WebResearchResult from modules.interfaces.interfaceAiObjects import AiObjects from modules.shared.configuration import APP_CONFIG @@ -26,7 +20,7 @@ class SubWebResearch: self.services = services self.aiObjects = aiObjects - async def webResearch(self, request: WebResearchRequest) -> WebResearchActionResult: + async def webResearch(self, request: WebResearchRequest) -> WebResearchResult: """Perform web research using interface functions.""" try: logger.info(f"WEB RESEARCH STARTED") @@ -81,8 +75,15 @@ class SubWebResearch: prompt=query_optimizer_prompt, options=AiCallOptions() ) + + # Write web research query optimization prompt to debug file + self.services.utils.writeDebugFile(query_optimizer_prompt, "web_research_query_optimizer_prompt") + ai_response_obj = await self.aiObjects.call(ai_request) ai_response = ai_response_obj.content + + # Write web research query optimization response to debug file + self.services.utils.writeDebugFile(ai_response, "web_research_query_optimizer_response") logger.debug(f"AI query optimizer response: {ai_response}") # Parse AI response to extract search query @@ -222,7 +223,7 @@ class SubWebResearch: if not search_urls: logger.error("No relevant websites found") - return WebResearchActionResult(success=False, error="No relevant websites found") + return WebResearchResult(success=False, error="No relevant websites found") # Now use AI to determine the main URLs based on user's intention logger.info(f"AI selecting main URLs from {len(search_urls)} search results based on user intent") @@ -241,8 +242,15 @@ class SubWebResearch: prompt=ai_prompt, options=AiCallOptions() ) + + # Write web research URL selection prompt to debug file + self.services.utils.writeDebugFile(ai_prompt, "web_research_url_selection_prompt") + ai_response_obj = await self.aiObjects.call(ai_request) ai_response = ai_response_obj.content + + # Write web research URL selection response to debug file + self.services.utils.writeDebugFile(ai_response, "web_research_url_selection_response") logger.debug(f"AI response for main URL selection: {ai_response}") # Parse AI response to extract URLs @@ -331,14 +339,14 @@ class SubWebResearch: if not allContent: logger.error("Could not extract content from any websites") - return WebResearchActionResult(success=False, error="Could not extract content from any websites") + return WebResearchResult(success=False, error="Could not extract content from any websites") logger.info(f"=== WEB RESEARCH COMPLETED ===") logger.info(f"Successfully crawled {len(allContent)} URLs total") logger.info(f"Crawl depth: {effective_depth} levels") # Create simple result with raw content - sources = [WebSearchResultItem(title=url, url=url) for url in selectedWebsites] + sources = [{"title": url, "url": url} for url in selectedWebsites] # Get all additional links (all URLs except main ones) additional_links = [url for url in allContent.keys() if url not in selectedWebsites] @@ -348,37 +356,34 @@ class SubWebResearch: for url, content in allContent.items(): combinedContent += f"\n\n=== {url} ===\n{content}\n" - documentData = WebResearchDocumentData( - user_prompt=request.user_prompt, - websites_analyzed=len(allContent), - additional_links_found=len(additional_links), - analysis_result=combinedContent, # Raw content, no analysis - sources=sources, - additional_links=additional_links, - individual_content=allContent, # Individual URL -> content mapping - debug_info={ - "crawl_depth": effective_depth, - "max_configured_depth": max_depth, - "max_links_per_domain": max_links_per_domain, - "crawl_timeout_minutes": crawl_timeout_minutes, - "total_urls_crawled": len(allContent), - "main_urls": len(selectedWebsites), - "additional_urls": len(additional_links) - } - ) + # Create simplified document structure + document = { + "documentName": f"webResearch_{request.user_prompt[:50]}.json", + "documentData": { + "user_prompt": request.user_prompt, + "analysis_result": combinedContent, + "sources": sources, + "additional_links": additional_links, + "metadata": { + "websites_analyzed": len(allContent), + "additional_links_found": len(additional_links), + "crawl_depth": effective_depth, + "max_configured_depth": max_depth, + "max_links_per_domain": max_links_per_domain, + "crawl_timeout_minutes": crawl_timeout_minutes, + "total_urls_crawled": len(allContent), + "main_urls": len(selectedWebsites), + "additional_urls": len(additional_links) + } + }, + "mimeType": "application/json" + } - document = WebResearchActionDocument( - documentName=f"webResearch_{request.user_prompt[:50]}.json", - documentData=documentData, - mimeType="application/json" - ) - - return WebResearchActionResult( + return WebResearchResult( success=True, - documents=[document], - resultLabel="webResearch_results" + documents=[document] ) except Exception as e: logger.error(f"Error in web research: {str(e)}") - return WebResearchActionResult(success=False, error=str(e)) + return WebResearchResult(success=False, error=str(e)) diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py index 35b950db..6a91b5cf 100644 --- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py +++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py @@ -337,8 +337,8 @@ class BaseRenderer(ABC): response = await ai_service.aiObjects.call(request) # Save styling prompt and response to debug - self.services.utils.writeDebugFile(style_template, "rendererStylingPrompt") - self.services.utils.writeDebugFile(response.content or '', "rendererStylingResponse") + self.services.utils.writeDebugFile(style_template, "renderer_styling_prompt") + self.services.utils.writeDebugFile(response.content or '', "renderer_styling_response") # Clean and parse JSON result = response.content.strip() if response and response.content else "" diff --git a/modules/services/serviceGeneration/renderers/rendererImage.py b/modules/services/serviceGeneration/renderers/rendererImage.py index 6147d42b..71f9272b 100644 --- a/modules/services/serviceGeneration/renderers/rendererImage.py +++ b/modules/services/serviceGeneration/renderers/rendererImage.py @@ -60,7 +60,7 @@ class RendererImage(BaseRenderer): image_prompt = await self._create_imageGenerate_prompt(extracted_content, document_title, user_prompt, ai_service) # Save image generation prompt to debug - ai_service.services.utils.writeDebugFile(image_prompt, "rendererImageGenerationPrompt") + ai_service.services.utils.writeDebugFile(image_prompt, "image_generation_prompt") # Generate image using AI image_result = await ai_service.aiObjects.generateImage( @@ -71,7 +71,7 @@ class RendererImage(BaseRenderer): ) # Save image generation response to debug - ai_service.services.utils.writeDebugFile(str(image_result), "rendererImageGenerationResponse") + ai_service.services.utils.writeDebugFile(str(image_result), "image_generation_response") # Extract base64 image data from result if image_result and image_result.get("success", False): diff --git a/modules/workflows/methods/methodAi.py b/modules/workflows/methods/methodAi.py index e5c4cf71..d2816d92 100644 --- a/modules/workflows/methods/methodAi.py +++ b/modules/workflows/methods/methodAi.py @@ -12,7 +12,7 @@ from modules.workflows.methods.methodBase import MethodBase, action from modules.datamodels.datamodelChat import ActionResult from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum, ModelCapabilitiesEnum from modules.datamodels.datamodelChat import ChatDocument -from modules.datamodels.datamodelWeb import WebResearchRequest, WebResearchOptions +from modules.aicore.aicorePluginTavily import WebResearchRequest logger = logging.getLogger(__name__) @@ -263,24 +263,18 @@ class MethodAi(MethodBase): error="Search query is required" ) - # Build WebResearchOptions - options = WebResearchOptions( - max_pages=max_pages, - search_depth=search_depth, - extract_depth=extract_depth, - pages_search_depth=pages_search_depth, - country=country, - time_range=time_range, - topic=topic, - language=language - ) - - # Build WebResearchRequest + # Build WebResearchRequest (simplified dataclass) request = WebResearchRequest( user_prompt=user_prompt, urls=urls, max_results=max_results, - options=options + max_pages=max_pages, + search_depth=search_depth, + extract_depth=extract_depth, + country=country, + time_range=time_range, + topic=topic, + language=language ) # Call web research service @@ -294,7 +288,7 @@ class MethodAi(MethodBase): if not result.success: return ActionResult.isFailure(error=result.error) - # Convert WebResearchActionResult to ActionResult format + # Convert WebResearchResult to ActionResult format documents = [] for doc in result.documents: documents.append({ diff --git a/test_ai_behavior.py b/test_ai_behavior.py index 34a2ad60..02bf2f54 100644 --- a/test_ai_behavior.py +++ b/test_ai_behavior.py @@ -14,7 +14,7 @@ sys.path.append(os.path.dirname(__file__)) # Import the service initialization from modules.features.chatPlayground.mainChatPlayground import getServices -from modules.datamodels.datamodelAi import AiCallOptions, OperationType +from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum from modules.datamodels.datamodelUam import User # The test uses the AI service which handles JSON template internally