closed workflow action plan
This commit is contained in:
parent
ebfdd9ab03
commit
9ba45952e4
10 changed files with 390 additions and 123 deletions
|
|
@ -158,7 +158,7 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str:
|
||||||
# Get previous workflow round context for better understanding of follow-up prompts
|
# Get previous workflow round context for better understanding of follow-up prompts
|
||||||
previous_round_context = _getPreviousRoundContext(service, context.workflow)
|
previous_round_context = _getPreviousRoundContext(service, context.workflow)
|
||||||
|
|
||||||
return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages.
|
return f"""You are a task planning AI that analyzes user requests and creates structured, self-contained task plans with user-friendly feedback messages.
|
||||||
|
|
||||||
USER REQUEST: {user_request}
|
USER REQUEST: {user_request}
|
||||||
|
|
||||||
|
|
@ -173,8 +173,8 @@ INSTRUCTIONS:
|
||||||
use the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what the user wants to retry or continue
|
use the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what the user wants to retry or continue
|
||||||
3. Group related topics and sequential steps into single, comprehensive tasks
|
3. Group related topics and sequential steps into single, comprehensive tasks
|
||||||
4. Focus on business outcomes, not technical operations
|
4. Focus on business outcomes, not technical operations
|
||||||
5. Each task should produce meaningful, usable outputs
|
5. Make each task self-contained: clearly state what to do and what outputs are expected
|
||||||
6. Ensure proper handover between tasks using result labels
|
6. Ensure proper handover between tasks (later actions will use your task outputs)
|
||||||
7. Detect the language of the user request and include it in languageUserDetected
|
7. Detect the language of the user request and include it in languageUserDetected
|
||||||
8. Generate user-friendly messages for each task in the user's request language
|
8. Generate user-friendly messages for each task in the user's request language
|
||||||
9. Return a JSON object with the exact structure shown below
|
9. Return a JSON object with the exact structure shown below
|
||||||
|
|
@ -201,12 +201,14 @@ SPLIT INTO MULTIPLE TASKS:
|
||||||
TASK PLANNING PRINCIPLES:
|
TASK PLANNING PRINCIPLES:
|
||||||
- Break down complex requests into logical, sequential steps
|
- Break down complex requests into logical, sequential steps
|
||||||
- Focus on business value and outcomes
|
- Focus on business value and outcomes
|
||||||
- Keep tasks at a meaningful level of abstraction
|
- Keep tasks at a meaningful level of abstraction (not implementation details)
|
||||||
- Each task should produce results that can be used by subsequent tasks
|
- Each task should produce results that can be used by subsequent tasks
|
||||||
- Ensure clear dependencies and handovers between tasks
|
- Ensure clear dependencies and handovers between tasks
|
||||||
- Provide clear, actionable user messages in the user's request language
|
- Provide clear, actionable user messages in the user's request language
|
||||||
- Group related activities to minimize task fragmentation
|
- Group related activities to minimize task fragmentation
|
||||||
- Only create multiple tasks when dealing with truly different, independent objectives
|
- Only create multiple tasks when dealing with truly different, independent objectives
|
||||||
|
- Make task objectives action-oriented and specific (include scope, data sources to consider, and output intent at high level)
|
||||||
|
- Write success_criteria as measurable acceptance criteria focusing on outputs (what artifacts or insights will exist and how they are validated)
|
||||||
|
|
||||||
FOLLOW-UP PROMPT HANDLING:
|
FOLLOW-UP PROMPT HANDLING:
|
||||||
- If the user request is a follow-up (e.g., "try again", "versuche es nochmals", "retry", "continue", "proceed"),
|
- If the user request is a follow-up (e.g., "try again", "versuche es nochmals", "retry", "continue", "proceed"),
|
||||||
|
|
@ -246,6 +248,12 @@ EXAMPLES OF GOOD TASK OBJECTIVES (COMBINING RELATED ACTIVITIES):
|
||||||
- "Execute business communication using specified channels and document outcomes"
|
- "Execute business communication using specified channels and document outcomes"
|
||||||
- "Develop comprehensive business strategy with implementation roadmap and success metrics"
|
- "Develop comprehensive business strategy with implementation roadmap and success metrics"
|
||||||
|
|
||||||
|
EXAMPLES OF WELL-FORMED SUCCESS CRITERIA (OUTPUT-FOCUSED):
|
||||||
|
- "Deliver a prioritized list of 10–20 candidates with justification"
|
||||||
|
- "Provide a structured JSON with fields: company, ticker, rationale, metrics"
|
||||||
|
- "Produce a presentation outline with 5 sections and bullet points per section"
|
||||||
|
- "Include data sources and date stamped references for traceability"
|
||||||
|
|
||||||
EXAMPLES OF GOOD SUCCESS CRITERIA:
|
EXAMPLES OF GOOD SUCCESS CRITERIA:
|
||||||
- "Key insights extracted and ready for business use"
|
- "Key insights extracted and ready for business use"
|
||||||
- "Professional communication created with clear business value"
|
- "Professional communication created with clear business value"
|
||||||
|
|
@ -417,7 +425,7 @@ USAGE GUIDE:
|
||||||
CRITICAL DOCUMENT REFERENCE RULES:
|
CRITICAL DOCUMENT REFERENCE RULES:
|
||||||
- ONLY use the exact labels listed in AVAILABLE DOCUMENTS below, or result labels from previous actions
|
- ONLY use the exact labels listed in AVAILABLE DOCUMENTS below, or result labels from previous actions
|
||||||
- When generating multiple actions, you may only use as input documents those that are already present in AVAILABLE DOCUMENTS or produced by actions that come earlier in the list. Do NOT use as input any document label that will be produced by a later action.
|
- When generating multiple actions, you may only use as input documents those that are already present in AVAILABLE DOCUMENTS or produced by actions that come earlier in the list. Do NOT use as input any document label that will be produced by a later action.
|
||||||
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you CANNOT create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed, if you miss something.
|
- If there are no documents available, you CANNOT create document extraction actions. Instead, prefer using web actions (web.search, web.scrape, web.crawl) when external information can satisfy the request; only generate a status/information report if the task truly requires user-provided documents.
|
||||||
|
|
||||||
CURRENT WORKFLOW CONTEXT:
|
CURRENT WORKFLOW CONTEXT:
|
||||||
- Current Round: {current_round}
|
- Current Round: {current_round}
|
||||||
|
|
@ -470,18 +478,30 @@ PREVIOUS TASK HANDOVER CONTEXT:
|
||||||
|
|
||||||
ACTION GENERATION PRINCIPLES:
|
ACTION GENERATION PRINCIPLES:
|
||||||
- Create meaningful actions per task step
|
- Create meaningful actions per task step
|
||||||
- Use comprehensive AI prompts for document processing
|
|
||||||
- Focus on business outcomes, not technical operations
|
- Focus on business outcomes, not technical operations
|
||||||
- Combine related operations into single actions when possible
|
- Combine related operations into single actions when possible
|
||||||
- Use the task's AI prompt if provided, or create a comprehensive one
|
- Select the method that best fulfills the objective based on context (do not default to any specific method).
|
||||||
- Each action should produce meaningful, usable outputs
|
- Each action must be self-contained and executable with the provided parameters
|
||||||
- For document extraction, ensure prompts are specific and detailed
|
- For document extraction, ensure prompts are specific and detailed
|
||||||
- Include validation steps in extraction prompts
|
- Include validation steps in extraction prompts where relevant
|
||||||
- If this is a retry, learn from previous failures and improve the approach
|
- If this is a retry, learn from previous failures and improve the approach
|
||||||
- Address specific issues mentioned in previous review feedback
|
- Address specific issues mentioned in previous review feedback
|
||||||
- When specifying expectedDocumentFormats, ensure AI prompts explicitly request pure data without markdown formatting
|
- When specifying expectedDocumentFormats, ensure AI prompts explicitly request pure data without markdown formatting
|
||||||
- Generate user-friendly messages for each action in the user's language ({user_language})
|
- Generate user-friendly messages for each action in the user's language ({user_language})
|
||||||
|
|
||||||
|
PARAMETER COMPLETENESS REQUIREMENTS:
|
||||||
|
- Every parameter must contain all information needed to execute without implicit context
|
||||||
|
- Use explicit, concrete values (units, languages, formats, limits, date ranges, IDs) when applicable
|
||||||
|
- For search-like parameters (if any method requires a query), derive the query from the task objective AND ALL success criteria dimensions. Include:
|
||||||
|
- Key entities and domain terms from the objective
|
||||||
|
- All distinct facets from success_criteria (e.g., valuation AND AI potential AND know-how needs)
|
||||||
|
- Geography/localization (e.g., Schweiz/Suisse/Switzerland; use multilingual synonyms when helpful)
|
||||||
|
- Time horizon or recency if relevant
|
||||||
|
- Boolean operators and synonyms to increase precision (use AND/OR, quotes, parentheses)
|
||||||
|
- Avoid single-topic or generic queries focused only on one facet (e.g., pure valuation metrics)
|
||||||
|
- When facets are truly distinct, create 1–3 focused actions with precise queries rather than one vague catch-all
|
||||||
|
- Document list parameters must reference only existing labels or prior action outputs; do not reference future outputs
|
||||||
|
|
||||||
USER LANGUAGE: {user_language} - All user messages must be generated in this language.
|
USER LANGUAGE: {user_language} - All user messages must be generated in this language.
|
||||||
|
|
||||||
DOCUMENT ROUTING GUIDANCE:
|
DOCUMENT ROUTING GUIDANCE:
|
||||||
|
|
@ -494,7 +514,7 @@ DOCUMENT ROUTING GUIDANCE:
|
||||||
INSTRUCTIONS:
|
INSTRUCTIONS:
|
||||||
- Generate actions to accomplish this task step using available documents, connections, and previous results
|
- Generate actions to accomplish this task step using available documents, connections, and previous results
|
||||||
- Use docItem for single documents and docList for groups of documents as shown in AVAILABLE DOCUMENTS
|
- Use docItem for single documents and docList for groups of documents as shown in AVAILABLE DOCUMENTS
|
||||||
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you cannot create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed.
|
- If there are no documents available, do not create document extraction actions. Select methods strictly based on the task objective; choose web actions when external information is required. Otherwise, generate a status/information report requesting needed inputs.
|
||||||
- Always pass documentList as a LIST of references (docItem and/or docList) - this list CANNOT be empty for document extraction actions
|
- Always pass documentList as a LIST of references (docItem and/or docList) - this list CANNOT be empty for document extraction actions
|
||||||
- For referencing documents from previous actions, use the format "round{{round_number}}_task{{task_number}}_action{{action_number}}_{{context}}"
|
- For referencing documents from previous actions, use the format "round{{round_number}}_task{{task_number}}_action{{action_number}}_{{context}}"
|
||||||
- For resultLabel, use the format: "round{current_round}_task{{task_id}}_action{{action_number}}_{{short_label}}" where:
|
- For resultLabel, use the format: "round{current_round}_task{{task_id}}_action{{action_number}}_{{short_label}}" where:
|
||||||
|
|
@ -639,23 +659,9 @@ EXAMPLES OF GOOD ACTIONS:
|
||||||
]
|
]
|
||||||
}}
|
}}
|
||||||
|
|
||||||
6. When no documents are available (NO DOCUMENTS AVAILABLE scenario):
|
|
||||||
{{
|
|
||||||
"method": "document",
|
|
||||||
"action": "generateReport",
|
|
||||||
"parameters": {{
|
|
||||||
"documentList": [],
|
|
||||||
"title": "Workflow Status Report"
|
|
||||||
}},
|
|
||||||
"resultLabel": "round{current_round}_task{current_task}_action1_status_report",
|
|
||||||
"description": "Generate a status report informing the user that no documents are available for processing and requesting document upload or alternative input.",
|
|
||||||
"userMessage": "Ich erstelle einen Statusbericht, der Sie darüber informiert, dass keine Dokumente zur Verarbeitung verfügbar sind und um Dokumente oder alternative Eingaben bittet."
|
|
||||||
}}
|
|
||||||
|
|
||||||
IMPORTANT NOTES:
|
IMPORTANT NOTES:
|
||||||
- Respond with ONLY the JSON object. Do not include any explanatory text.
|
- Respond with ONLY the JSON object. Do not include any explanatory text.
|
||||||
- Before creating any document extraction action, verify that AVAILABLE DOCUMENTS contains actual document references.
|
- Before creating any document extraction action, verify that AVAILABLE DOCUMENTS contains actual document references.
|
||||||
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", use example 6 above to create a status report action instead of document extraction.
|
|
||||||
- Always include a user-friendly userMessage for each action in the user's language ({user_language}).
|
- Always include a user-friendly userMessage for each action in the user's language ({user_language}).
|
||||||
- The examples above show German user messages as reference - adapt the language to match the USER LANGUAGE specified above."""
|
- The examples above show German user messages as reference - adapt the language to match the USER LANGUAGE specified above."""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,18 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
|
||||||
"""
|
"""
|
||||||
# Step 1: Search
|
# Step 1: Search
|
||||||
try:
|
try:
|
||||||
search_results = await self._search(request.query, request.max_results)
|
search_results = await self._search(
|
||||||
|
query=request.query,
|
||||||
|
max_results=request.max_results,
|
||||||
|
search_depth=request.search_depth,
|
||||||
|
time_range=request.time_range,
|
||||||
|
topic=request.topic,
|
||||||
|
include_domains=request.include_domains,
|
||||||
|
exclude_domains=request.exclude_domains,
|
||||||
|
language=request.language,
|
||||||
|
include_answer=request.include_answer,
|
||||||
|
include_raw_content=request.include_raw_content,
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return WebSearchActionResult(success=False, error=str(e))
|
return WebSearchActionResult(success=False, error=str(e))
|
||||||
|
|
||||||
|
|
@ -113,14 +124,29 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
|
||||||
"""Turns a query in a list of urls with extracted content."""
|
"""Turns a query in a list of urls with extracted content."""
|
||||||
# Step 1: Search
|
# Step 1: Search
|
||||||
try:
|
try:
|
||||||
search_results = await self._search(request.query, request.max_results)
|
search_results = await self._search(
|
||||||
|
query=request.query,
|
||||||
|
max_results=request.max_results,
|
||||||
|
search_depth=request.search_depth,
|
||||||
|
time_range=request.time_range,
|
||||||
|
topic=request.topic,
|
||||||
|
include_domains=request.include_domains,
|
||||||
|
exclude_domains=request.exclude_domains,
|
||||||
|
language=request.language,
|
||||||
|
include_answer=request.include_answer,
|
||||||
|
include_raw_content=request.include_raw_content,
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return WebScrapeActionResult(success=False, error=str(e))
|
return WebScrapeActionResult(success=False, error=str(e))
|
||||||
|
|
||||||
# Step 2: Crawl
|
# Step 2: Crawl
|
||||||
try:
|
try:
|
||||||
urls = [result.url for result in search_results]
|
urls = [result.url for result in search_results]
|
||||||
crawl_results = await self._crawl(urls)
|
crawl_results = await self._crawl(
|
||||||
|
urls,
|
||||||
|
extract_depth=request.extract_depth,
|
||||||
|
format=request.format,
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return WebScrapeActionResult(success=False, error=str(e))
|
return WebScrapeActionResult(success=False, error=str(e))
|
||||||
|
|
||||||
|
|
@ -132,7 +158,19 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
async def _search(self, query: str, max_results: int) -> list[TavilySearchResult]:
|
async def _search(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
max_results: int,
|
||||||
|
search_depth: str | None = None,
|
||||||
|
time_range: str | None = None,
|
||||||
|
topic: str | None = None,
|
||||||
|
include_domains: list[str] | None = None,
|
||||||
|
exclude_domains: list[str] | None = None,
|
||||||
|
language: str | None = None,
|
||||||
|
include_answer: bool | None = None,
|
||||||
|
include_raw_content: bool | None = None,
|
||||||
|
) -> list[TavilySearchResult]:
|
||||||
"""Calls the Tavily API to perform a web search."""
|
"""Calls the Tavily API to perform a web search."""
|
||||||
# Make sure max_results is within the allowed range
|
# Make sure max_results is within the allowed range
|
||||||
min_results = get_web_search_min_results()
|
min_results = get_web_search_min_results()
|
||||||
|
|
@ -141,7 +179,26 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
|
||||||
raise ValueError(f"max_results must be between {min_results} and {max_allowed_results}")
|
raise ValueError(f"max_results must be between {min_results} and {max_allowed_results}")
|
||||||
|
|
||||||
# Perform actual API call
|
# Perform actual API call
|
||||||
response = await self.client.search(query=query, max_results=max_results)
|
# Build kwargs only for provided options to avoid API rejections
|
||||||
|
kwargs: dict = {"query": query, "max_results": max_results}
|
||||||
|
if search_depth is not None:
|
||||||
|
kwargs["search_depth"] = search_depth
|
||||||
|
if time_range is not None:
|
||||||
|
kwargs["time_range"] = time_range
|
||||||
|
if topic is not None:
|
||||||
|
kwargs["topic"] = topic
|
||||||
|
if include_domains is not None:
|
||||||
|
kwargs["include_domains"] = include_domains
|
||||||
|
if exclude_domains is not None:
|
||||||
|
kwargs["exclude_domains"] = exclude_domains
|
||||||
|
if language is not None:
|
||||||
|
kwargs["language"] = language
|
||||||
|
if include_answer is not None:
|
||||||
|
kwargs["include_answer"] = include_answer
|
||||||
|
if include_raw_content is not None:
|
||||||
|
kwargs["include_raw_content"] = include_raw_content
|
||||||
|
|
||||||
|
response = await self.client.search(**kwargs)
|
||||||
|
|
||||||
return [
|
return [
|
||||||
TavilySearchResult(title=result["title"], url=result["url"])
|
TavilySearchResult(title=result["title"], url=result["url"])
|
||||||
|
|
@ -174,7 +231,12 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
|
||||||
success=True, documents=[document], resultLabel="web_search_results"
|
success=True, documents=[document], resultLabel="web_search_results"
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _crawl(self, urls: list) -> list[TavilyCrawlResult]:
|
async def _crawl(
|
||||||
|
self,
|
||||||
|
urls: list,
|
||||||
|
extract_depth: str | None = None,
|
||||||
|
format: str | None = None,
|
||||||
|
) -> list[TavilyCrawlResult]:
|
||||||
"""Calls the Tavily API to extract text content from URLs with retry logic."""
|
"""Calls the Tavily API to extract text content from URLs with retry logic."""
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
|
|
@ -185,8 +247,13 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
|
||||||
for attempt in range(max_retries + 1):
|
for attempt in range(max_retries + 1):
|
||||||
try:
|
try:
|
||||||
# Use asyncio.wait_for for timeout
|
# Use asyncio.wait_for for timeout
|
||||||
|
# Build kwargs for extract
|
||||||
|
kwargs_extract: dict = {"urls": urls}
|
||||||
|
kwargs_extract["extract_depth"] = extract_depth or "advanced"
|
||||||
|
kwargs_extract["format"] = format or "text"
|
||||||
|
|
||||||
response = await asyncio.wait_for(
|
response = await asyncio.wait_for(
|
||||||
self.client.extract(urls=urls, extract_depth="advanced", format="text"),
|
self.client.extract(**kwargs_extract),
|
||||||
timeout=timeout
|
timeout=timeout
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from modules.interfaces.interfaceChatModel import ActionDocument, ActionResult
|
from modules.interfaces.interfaceChatModel import ActionDocument, ActionResult
|
||||||
from pydantic import BaseModel, Field, HttpUrl
|
from pydantic import BaseModel, Field, HttpUrl
|
||||||
from typing import List
|
from typing import List, Optional, Literal
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -31,6 +31,17 @@ def get_web_search_min_results() -> int:
|
||||||
class WebSearchRequest(BaseModel):
|
class WebSearchRequest(BaseModel):
|
||||||
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
|
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
|
||||||
max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results())
|
max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results())
|
||||||
|
# Tavily tuning options
|
||||||
|
search_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
||||||
|
time_range: Optional[Literal["d", "w", "m", "y"]] = Field(
|
||||||
|
default=None, description="Limit results to last day/week/month/year"
|
||||||
|
)
|
||||||
|
topic: Optional[Literal["general", "news", "academic"]] = Field(default=None)
|
||||||
|
include_domains: Optional[List[str]] = Field(default=None)
|
||||||
|
exclude_domains: Optional[List[str]] = Field(default=None)
|
||||||
|
language: Optional[str] = Field(default=None, description="ISO language code like 'en', 'de'")
|
||||||
|
include_answer: Optional[bool] = Field(default=None)
|
||||||
|
include_raw_content: Optional[bool] = Field(default=None)
|
||||||
|
|
||||||
|
|
||||||
class WebSearchResultItem(BaseModel):
|
class WebSearchResultItem(BaseModel):
|
||||||
|
|
@ -68,6 +79,9 @@ class WebSearchBase(ABC):
|
||||||
|
|
||||||
class WebCrawlRequest(BaseModel):
|
class WebCrawlRequest(BaseModel):
|
||||||
urls: List[HttpUrl]
|
urls: List[HttpUrl]
|
||||||
|
# Tavily extract options
|
||||||
|
extract_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
||||||
|
format: Optional[Literal["text", "markdown"]] = Field(default=None)
|
||||||
|
|
||||||
|
|
||||||
class WebCrawlResultItem(BaseModel):
|
class WebCrawlResultItem(BaseModel):
|
||||||
|
|
@ -108,6 +122,18 @@ class WebCrawlBase(ABC):
|
||||||
class WebScrapeRequest(BaseModel):
|
class WebScrapeRequest(BaseModel):
|
||||||
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
|
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
|
||||||
max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results())
|
max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results())
|
||||||
|
# Pass-through search options
|
||||||
|
search_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
||||||
|
time_range: Optional[Literal["d", "w", "m", "y"]] = Field(default=None)
|
||||||
|
topic: Optional[Literal["general", "news", "academic"]] = Field(default=None)
|
||||||
|
include_domains: Optional[List[str]] = Field(default=None)
|
||||||
|
exclude_domains: Optional[List[str]] = Field(default=None)
|
||||||
|
language: Optional[str] = Field(default=None)
|
||||||
|
include_answer: Optional[bool] = Field(default=None)
|
||||||
|
include_raw_content: Optional[bool] = Field(default=None)
|
||||||
|
# Extract options
|
||||||
|
extract_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
||||||
|
format: Optional[Literal["text", "markdown"]] = Field(default=None)
|
||||||
|
|
||||||
|
|
||||||
class WebScrapeResultItem(BaseModel):
|
class WebScrapeResultItem(BaseModel):
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,8 @@ class MethodAi(MethodBase):
|
||||||
try:
|
try:
|
||||||
aiPrompt = parameters.get("aiPrompt")
|
aiPrompt = parameters.get("aiPrompt")
|
||||||
documentList = parameters.get("documentList", [])
|
documentList = parameters.get("documentList", [])
|
||||||
|
if isinstance(documentList, str):
|
||||||
|
documentList = [documentList]
|
||||||
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
processingMode = parameters.get("processingMode", "basic")
|
processingMode = parameters.get("processingMode", "basic")
|
||||||
includeMetadata = parameters.get("includeMetadata", True)
|
includeMetadata = parameters.get("includeMetadata", True)
|
||||||
|
|
@ -171,10 +173,43 @@ class MethodAi(MethodBase):
|
||||||
if context:
|
if context:
|
||||||
logger.info(f"Including context from {len(documentList)} documents")
|
logger.info(f"Including context from {len(documentList)} documents")
|
||||||
|
|
||||||
|
# Encourage longer, structured outputs with a min-length hint
|
||||||
|
min_tokens_hint = "\n\nPlease ensure the response is substantial and complete."
|
||||||
|
call_prompt = enhanced_prompt + min_tokens_hint
|
||||||
|
|
||||||
if processingMode in ["advanced", "detailed"]:
|
if processingMode in ["advanced", "detailed"]:
|
||||||
result = await self.service.callAiTextAdvanced(enhanced_prompt, context)
|
result = await self.service.callAiTextAdvanced(call_prompt, context)
|
||||||
else:
|
else:
|
||||||
result = await self.service.callAiTextBasic(enhanced_prompt, context)
|
result = await self.service.callAiTextBasic(call_prompt, context)
|
||||||
|
|
||||||
|
# If expected JSON and too short/not JSON, retry with stricter JSON guardrails
|
||||||
|
if output_extension == ".json":
|
||||||
|
import json
|
||||||
|
cleaned = (result or "").strip()
|
||||||
|
if cleaned.startswith('```json'):
|
||||||
|
cleaned = cleaned[7:]
|
||||||
|
if cleaned.endswith('```'):
|
||||||
|
cleaned = cleaned[:-3]
|
||||||
|
cleaned = cleaned.strip()
|
||||||
|
needs_retry = False
|
||||||
|
try:
|
||||||
|
parsed = json.loads(cleaned)
|
||||||
|
# Heuristic: small dict -> possibly underfilled
|
||||||
|
if isinstance(parsed, dict) and len(parsed.keys()) <= 2:
|
||||||
|
needs_retry = True
|
||||||
|
except Exception:
|
||||||
|
needs_retry = True
|
||||||
|
|
||||||
|
if needs_retry:
|
||||||
|
guardrail_prompt = (
|
||||||
|
enhanced_prompt
|
||||||
|
+ "\n\nCRITICAL: Return ONLY valid JSON, no markdown, no code fences. "
|
||||||
|
"Include all requested fields with detailed content."
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
result = await self.service.callAiTextAdvanced(guardrail_prompt, context)
|
||||||
|
except Exception:
|
||||||
|
result = cleaned # fallback to first attempt
|
||||||
|
|
||||||
# Create result document
|
# Create result document
|
||||||
fileName = f"ai_{processingMode}_{self._format_timestamp_for_filename()}{output_extension}"
|
fileName = f"ai_{processingMode}_{self._format_timestamp_for_filename()}{output_extension}"
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,6 @@ Handles document operations using the document service.
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
from typing import Dict, Any, List, Optional
|
from typing import Dict, Any, List, Optional
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
|
|
@ -34,13 +33,15 @@ class MethodDocument(MethodBase):
|
||||||
Extract content from any document using AI prompt.
|
Extract content from any document using AI prompt.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
documentList (str): Document list reference
|
documentList (list): Document list reference(s)
|
||||||
aiPrompt (str): AI prompt for extraction
|
aiPrompt (str): AI prompt for extraction
|
||||||
expectedDocumentFormats (list, optional): Output formats
|
expectedDocumentFormats (list, optional): Output formats
|
||||||
includeMetadata (bool, optional): Include metadata (default: True)
|
includeMetadata (bool, optional): Include metadata (default: True)
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
documentList = parameters.get("documentList")
|
documentList = parameters.get("documentList")
|
||||||
|
if isinstance(documentList, str):
|
||||||
|
documentList = [documentList]
|
||||||
aiPrompt = parameters.get("aiPrompt")
|
aiPrompt = parameters.get("aiPrompt")
|
||||||
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
includeMetadata = parameters.get("includeMetadata", True)
|
includeMetadata = parameters.get("includeMetadata", True)
|
||||||
|
|
@ -188,6 +189,8 @@ class MethodDocument(MethodBase):
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
document_list = parameters.get("documentList", [])
|
document_list = parameters.get("documentList", [])
|
||||||
|
if isinstance(document_list, str):
|
||||||
|
document_list = [document_list]
|
||||||
expected_document_formats = parameters.get("expectedDocumentFormats", [])
|
expected_document_formats = parameters.get("expectedDocumentFormats", [])
|
||||||
original_documents = parameters.get("originalDocuments", [])
|
original_documents = parameters.get("originalDocuments", [])
|
||||||
include_metadata = parameters.get("includeMetadata", True)
|
include_metadata = parameters.get("includeMetadata", True)
|
||||||
|
|
@ -606,13 +609,15 @@ class MethodDocument(MethodBase):
|
||||||
Generate HTML report from multiple documents using AI.
|
Generate HTML report from multiple documents using AI.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
documentList (str): Document list reference
|
documentList (list): Document list reference(s)
|
||||||
prompt (str): AI prompt for report generation
|
prompt (str): AI prompt for report generation
|
||||||
title (str, optional): Report title (default: "Summary Report")
|
title (str, optional): Report title (default: "Summary Report")
|
||||||
includeMetadata (bool, optional): Include metadata (default: True)
|
includeMetadata (bool, optional): Include metadata (default: True)
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
documentList = parameters.get("documentList")
|
documentList = parameters.get("documentList")
|
||||||
|
if isinstance(documentList, str):
|
||||||
|
documentList = [documentList]
|
||||||
prompt = parameters.get("prompt")
|
prompt = parameters.get("prompt")
|
||||||
title = parameters.get("title", "Summary Report")
|
title = parameters.get("title", "Summary Report")
|
||||||
includeMetadata = parameters.get("includeMetadata", True)
|
includeMetadata = parameters.get("includeMetadata", True)
|
||||||
|
|
@ -708,13 +713,9 @@ class MethodDocument(MethodBase):
|
||||||
logger.info(f" Skipping document with no readable text content")
|
logger.info(f" Skipping document with no readable text content")
|
||||||
|
|
||||||
if not validDocuments:
|
if not validDocuments:
|
||||||
# If no valid documents, create a simple report
|
# No readable content; return a minimal valid HTML document
|
||||||
html = ["<html><head><meta charset='utf-8'><title>" + title + "</title></head><body>"]
|
timestamp = int(get_utc_timestamp())
|
||||||
html.append(f"<h1>{title}</h1>")
|
return f"<!DOCTYPE html><html><head><meta charset=\"UTF-8\"><title>{title}</title></head><body><h1>{title}</h1><p>Keine auswertbaren Inhalte gefunden.</p><p>Generated: {timestamp}</p></body></html>"
|
||||||
html.append(f"<p><b>Generated:</b> {int(get_utc_timestamp())}</p>")
|
|
||||||
html.append("<p><em>No content available in the provided documents.</em></p>")
|
|
||||||
html.append("</body></html>")
|
|
||||||
return '\n'.join(html)
|
|
||||||
|
|
||||||
# Create AI prompt for comprehensive report generation using user's prompt
|
# Create AI prompt for comprehensive report generation using user's prompt
|
||||||
combinedContent = "\n\n".join(allContent)
|
combinedContent = "\n\n".join(allContent)
|
||||||
|
|
@ -723,25 +724,34 @@ class MethodDocument(MethodBase):
|
||||||
|
|
||||||
Report Title: {title}
|
Report Title: {title}
|
||||||
|
|
||||||
Additional Requirements:
|
OUTPUT POLICY:
|
||||||
1. Create a professional, well-formatted HTML report
|
- Return ONLY a complete, raw HTML document.
|
||||||
2. Include an executive summary at the beginning
|
- Start with: <!DOCTYPE html>
|
||||||
3. Organize information logically with clear sections
|
- Must include: <html>, <head> (with <meta charset="UTF-8"> and <title>), and <body>.
|
||||||
4. Highlight key findings and insights
|
- The response must be valid, self-contained HTML suitable for saving as .html.
|
||||||
5. Include relevant data, statistics, and conclusions
|
|
||||||
6. Use proper HTML formatting with headers, lists, and styling
|
|
||||||
7. Make it readable and professional
|
|
||||||
|
|
||||||
Document Content:
|
Structure:
|
||||||
---START OF DOCUMENT CONTENT-----------------------------------------------
|
- Title and short subtitle
|
||||||
|
- Executive summary
|
||||||
|
- Sections with clear headings
|
||||||
|
- Use tables for structured data when helpful
|
||||||
|
- Key findings and recommendations
|
||||||
|
- Generation date and number of documents
|
||||||
|
|
||||||
|
Quality and design requirements:
|
||||||
|
- Use clear, professional, and accessible styling in a <style> block
|
||||||
|
- Apply clean layout, spacing, and visual hierarchy for headings
|
||||||
|
- Keep HTML and CSS standards-compliant and lightweight
|
||||||
|
|
||||||
|
SOURCE DOCUMENT CONTENT:
|
||||||
|
---START---
|
||||||
{combinedContent}
|
{combinedContent}
|
||||||
---END OF DOCUMENT CONTENT-----------------------------------------------
|
---END---
|
||||||
Generate a complete HTML report that addresses the user's specific requirements and integrates all the information into a cohesive, professional document.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Call AI to generate the report
|
# Call AI to generate the report
|
||||||
logger.info(f"Generating AI report for {len(validDocuments)} documents")
|
logger.info(f"Generating AI report for {len(validDocuments)} documents")
|
||||||
aiReport = await self.service.callAiTextBasic(aiPrompt, combinedContent)
|
aiReport = await self.service.callAiTextAdvanced(aiPrompt, combinedContent)
|
||||||
|
|
||||||
# If AI call fails, return error - AI is crucial for report generation
|
# If AI call fails, return error - AI is crucial for report generation
|
||||||
if not aiReport or aiReport.strip() == "":
|
if not aiReport or aiReport.strip() == "":
|
||||||
|
|
@ -751,39 +761,21 @@ Generate a complete HTML report that addresses the user's specific requirements
|
||||||
# Clean up the AI response and ensure it's valid HTML
|
# Clean up the AI response and ensure it's valid HTML
|
||||||
aiReport = aiReport.strip()
|
aiReport = aiReport.strip()
|
||||||
|
|
||||||
# Strip fenced code blocks like ```html ... ``` if present
|
# Normalize: strip code fences if present
|
||||||
if aiReport.startswith("```") and aiReport.endswith("```"):
|
if aiReport.startswith("```") and aiReport.endswith("```"):
|
||||||
lines = aiReport.split('\n')
|
lines = aiReport.split('\n')
|
||||||
if len(lines) >= 2:
|
if len(lines) >= 2:
|
||||||
# remove first and last fence lines (language tag allowed on first)
|
|
||||||
aiReport = '\n'.join(lines[1:-1]).strip()
|
aiReport = '\n'.join(lines[1:-1]).strip()
|
||||||
|
|
||||||
# Check if AI response starts with DOCTYPE or html tag (complete HTML document)
|
cleaned = aiReport.strip()
|
||||||
if aiReport.startswith('<!DOCTYPE') or aiReport.startswith('<html'):
|
|
||||||
# AI returned complete HTML document, use it directly
|
|
||||||
return aiReport
|
|
||||||
else:
|
|
||||||
# AI returned HTML content without document structure, wrap it
|
|
||||||
|
|
||||||
# Check if AI response already contains a title/header
|
# Return exactly what we have (no wrapping)
|
||||||
has_title = any(title.lower() in aiReport.lower() for title in [title, "outlook", "report", "status"])
|
return cleaned
|
||||||
|
|
||||||
# Wrap the AI content in proper HTML structure
|
|
||||||
html = ["<html><head><meta charset='utf-8'><title>" + title + "</title></head><body>"]
|
|
||||||
|
|
||||||
# Only add the title if the AI response doesn't already have one
|
|
||||||
if not has_title:
|
|
||||||
html.append(f"<h1>{title}</h1>")
|
|
||||||
|
|
||||||
html.append(f"<p><b>Generated:</b> {int(get_utc_timestamp())}</p>")
|
|
||||||
html.append(f"<p><b>Total Documents Analyzed:</b> {len(validDocuments)}</p>")
|
|
||||||
html.append("<hr>")
|
|
||||||
html.append(aiReport)
|
|
||||||
html.append("</body></html>")
|
|
||||||
return '\n'.join(html)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error generating AI report: {str(e)}")
|
logger.error(f"Error generating AI report: {str(e)}")
|
||||||
# Re-raise the error - AI is crucial for report generation
|
# Re-raise the error - AI is crucial for report generation
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1478,7 +1478,11 @@ class MethodOutlook(MethodBase):
|
||||||
attachments = parameters.get("attachments", [])
|
attachments = parameters.get("attachments", [])
|
||||||
tone = parameters.get("tone", "professional")
|
tone = parameters.get("tone", "professional")
|
||||||
documentList = parameters.get("documentList", [])
|
documentList = parameters.get("documentList", [])
|
||||||
|
if isinstance(documentList, str):
|
||||||
|
documentList = [documentList]
|
||||||
attachmentDocumentList = parameters.get("attachmentDocumentList", [])
|
attachmentDocumentList = parameters.get("attachmentDocumentList", [])
|
||||||
|
if isinstance(attachmentDocumentList, str):
|
||||||
|
attachmentDocumentList = [attachmentDocumentList]
|
||||||
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||||
|
|
||||||
if not context:
|
if not context:
|
||||||
|
|
|
||||||
|
|
@ -829,7 +829,7 @@ class MethodSharepoint(MethodBase):
|
||||||
Read documents from SharePoint across all accessible sites
|
Read documents from SharePoint across all accessible sites
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
documentList (str): Reference to the document list to read
|
documentList (list): Reference(s) to the document list to read
|
||||||
connectionReference (str): Reference to the Microsoft connection
|
connectionReference (str): Reference to the Microsoft connection
|
||||||
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
|
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
|
||||||
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
|
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
|
||||||
|
|
@ -837,6 +837,8 @@ class MethodSharepoint(MethodBase):
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
documentList = parameters.get("documentList")
|
documentList = parameters.get("documentList")
|
||||||
|
if isinstance(documentList, str):
|
||||||
|
documentList = [documentList]
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
pathQuery = parameters.get("pathQuery", "*")
|
pathQuery = parameters.get("pathQuery", "*")
|
||||||
pathObject = parameters.get("pathObject")
|
pathObject = parameters.get("pathObject")
|
||||||
|
|
@ -886,8 +888,7 @@ class MethodSharepoint(MethodBase):
|
||||||
return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
|
return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
|
||||||
|
|
||||||
# Get documents from reference - ensure documentList is a list, not a string
|
# Get documents from reference - ensure documentList is a list, not a string
|
||||||
if isinstance(documentList, str):
|
# documentList is already normalized above
|
||||||
documentList = [documentList] # Convert string to list
|
|
||||||
chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
|
chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
|
||||||
|
|
||||||
if not chatDocuments:
|
if not chatDocuments:
|
||||||
|
|
@ -1107,13 +1108,15 @@ class MethodSharepoint(MethodBase):
|
||||||
connectionReference (str): Reference to the Microsoft connection
|
connectionReference (str): Reference to the Microsoft connection
|
||||||
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
|
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
|
||||||
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
|
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
|
||||||
documentList (str): Reference to the document list to upload
|
documentList (list): Reference(s) to the document list to upload
|
||||||
fileNames (List[str]): List of names for the uploaded files
|
fileNames (List[str]): List of names for the uploaded files
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
pathQuery = parameters.get("pathQuery")
|
pathQuery = parameters.get("pathQuery")
|
||||||
documentList = parameters.get("documentList")
|
documentList = parameters.get("documentList")
|
||||||
|
if isinstance(documentList, str):
|
||||||
|
documentList = [documentList]
|
||||||
fileNames = parameters.get("fileNames")
|
fileNames = parameters.get("fileNames")
|
||||||
pathObject = parameters.get("pathObject")
|
pathObject = parameters.get("pathObject")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,40 +25,53 @@ class MethodWeb(MethodBase):
|
||||||
|
|
||||||
@action
|
@action
|
||||||
async def search(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def search(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""Perform a web search and outputs a csv file with a list of found URLs
|
"""Perform a web search and output a CSV with the found URLs. Each result row contains columns "url" and "title".
|
||||||
|
|
||||||
Each result contains columns "url" and "title".
|
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
query (str): Search query to perform
|
query (str, required): Search query.
|
||||||
maxResults (int, optional): Maximum number of results (default: 10)
|
maxResults (int, optional): Max number of results. Default: 10.
|
||||||
|
searchDepth ("basic"|"advanced", optional): Search depth. Default: provider default.
|
||||||
|
timeRange ("d"|"w"|"m"|"y", optional): Limit to last day/week/month/year.
|
||||||
|
topic ("general"|"news"|"academic", optional): Result domain preference.
|
||||||
|
includeDomains (list[str], optional): Only include these domains.
|
||||||
|
excludeDomains (list[str], optional): Exclude these domains.
|
||||||
|
language (str, optional): ISO code like "de", "en" to bias results.
|
||||||
|
includeAnswer (bool, optional): Ask provider to generate a short answer.
|
||||||
|
includeRawContent (bool, optional): Include raw content where possible.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Prepare request data
|
# Prepare request data (generic, no region/language bias)
|
||||||
|
raw_query = parameters.get("query")
|
||||||
|
max_results = parameters.get("maxResults", 10)
|
||||||
|
|
||||||
|
if not raw_query or not isinstance(raw_query, str):
|
||||||
|
return ActionResult(success=False, error="Search query is required")
|
||||||
|
|
||||||
web_search_request = WebSearchRequest(
|
web_search_request = WebSearchRequest(
|
||||||
query=parameters.get("query"),
|
query=raw_query.strip(),
|
||||||
max_results=parameters.get("maxResults", 10),
|
max_results=max_results,
|
||||||
|
search_depth=parameters.get("searchDepth"),
|
||||||
|
time_range=parameters.get("timeRange"),
|
||||||
|
topic=parameters.get("topic"),
|
||||||
|
include_domains=parameters.get("includeDomains"),
|
||||||
|
exclude_domains=parameters.get("excludeDomains"),
|
||||||
|
language=parameters.get("language"),
|
||||||
|
include_answer=parameters.get("includeAnswer"),
|
||||||
|
include_raw_content=parameters.get("includeRawContent"),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Perform request
|
# Perform request
|
||||||
web_interface = await WebInterface.create()
|
web_interface = await WebInterface.create()
|
||||||
web_search_result = await web_interface.search(web_search_request)
|
web_search_result = await web_interface.search(web_search_request)
|
||||||
|
|
||||||
# Convert search results to CSV format
|
# Convert search results to CSV format (generic)
|
||||||
if web_search_result.success and web_search_result.documents:
|
if web_search_result.success and web_search_result.documents:
|
||||||
csv_content = web_interface.convert_web_search_result_to_csv(web_search_result)
|
csv_content = web_interface.convert_web_search_result_to_csv(web_search_result)
|
||||||
|
|
||||||
# Create CSV document
|
|
||||||
csv_document = web_interface.create_csv_action_document(
|
csv_document = web_interface.create_csv_action_document(
|
||||||
csv_content,
|
csv_content, f"web_search_results.csv"
|
||||||
f"web_search_results.csv"
|
|
||||||
)
|
|
||||||
|
|
||||||
return ActionResult(
|
|
||||||
success=True,
|
|
||||||
documents=[csv_document]
|
|
||||||
)
|
)
|
||||||
|
return ActionResult(success=True, documents=[csv_document])
|
||||||
else:
|
else:
|
||||||
return web_search_result
|
return web_search_result
|
||||||
|
|
||||||
|
|
@ -105,15 +118,21 @@ class MethodWeb(MethodBase):
|
||||||
|
|
||||||
@action
|
@action
|
||||||
async def crawl(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def crawl(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""Crawls a list of URLs and extracts information from them.
|
"""Crawl a list of URLs and extract text content.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
documentList (str): Document list reference containing URL lists from search results
|
documentList (list[str]|str, required): Reference(s) to documents containing URLs (e.g., CSV from search). Can be a single ref or list.
|
||||||
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
expectedDocumentFormats (list, optional): Hint for downstream handling.
|
||||||
|
extractDepth ("basic"|"advanced", optional): Extraction depth. Default: "advanced".
|
||||||
|
format ("text"|"markdown", optional): Output format. Default: "text".
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
document_list = parameters.get("documentList")
|
document_list = parameters.get("documentList")
|
||||||
|
|
||||||
|
# Normalize to list if a single string reference is provided
|
||||||
|
if isinstance(document_list, str):
|
||||||
|
document_list = [document_list]
|
||||||
|
|
||||||
if not document_list:
|
if not document_list:
|
||||||
return ActionResult(
|
return ActionResult(
|
||||||
success=False, error="No document list reference provided."
|
success=False, error="No document list reference provided."
|
||||||
|
|
@ -214,24 +233,72 @@ class MethodWeb(MethodBase):
|
||||||
unique_urls = list(dict.fromkeys(all_urls))
|
unique_urls = list(dict.fromkeys(all_urls))
|
||||||
logger.info(f"Extracted {len(unique_urls)} unique URLs from {len(chat_documents)} documents")
|
logger.info(f"Extracted {len(unique_urls)} unique URLs from {len(chat_documents)} documents")
|
||||||
|
|
||||||
# Prepare request data
|
# Prepare request data with normalization
|
||||||
web_crawl_request = WebCrawlRequest(urls=unique_urls)
|
allowed_extract_depth = {"basic", "advanced"}
|
||||||
|
allowed_formats = {"text", "markdown"}
|
||||||
|
extract_depth = parameters.get("extractDepth")
|
||||||
|
if extract_depth and extract_depth not in allowed_extract_depth:
|
||||||
|
logger.warning(f"Invalid extractDepth '{extract_depth}' provided. Falling back to 'advanced'.")
|
||||||
|
extract_depth = "advanced"
|
||||||
|
fmt = parameters.get("format")
|
||||||
|
if fmt and fmt not in allowed_formats:
|
||||||
|
logger.warning(f"Invalid format '{fmt}' provided. Falling back to 'text'.")
|
||||||
|
fmt = "text"
|
||||||
|
|
||||||
|
web_crawl_request = WebCrawlRequest(
|
||||||
|
urls=unique_urls,
|
||||||
|
extract_depth=extract_depth,
|
||||||
|
format=fmt,
|
||||||
|
)
|
||||||
|
|
||||||
# Perform request
|
# Perform request
|
||||||
web_interface = await WebInterface.create()
|
web_interface = await WebInterface.create()
|
||||||
web_crawl_result = await web_interface.crawl(web_crawl_request)
|
web_crawl_result = await web_interface.crawl(web_crawl_request)
|
||||||
|
|
||||||
# Convert to proper JSON format
|
# Convert and enrich with concise summaries per URL for better context
|
||||||
if web_crawl_result.success:
|
if web_crawl_result.success:
|
||||||
json_content = web_interface.convert_web_result_to_json(web_crawl_result)
|
try:
|
||||||
|
doc = web_crawl_result.documents[0]
|
||||||
|
results = getattr(doc.documentData, "results", [])
|
||||||
|
enriched = []
|
||||||
|
# Summarize each result briefly using AI for added context
|
||||||
|
for item in results:
|
||||||
|
url = str(getattr(item, "url", ""))
|
||||||
|
content = str(getattr(item, "content", ""))
|
||||||
|
summary = ""
|
||||||
|
try:
|
||||||
|
if content:
|
||||||
|
prompt = (
|
||||||
|
"Summarize the following webpage content in 3-5 concise bullet points. "
|
||||||
|
"Focus on key points, figures, named entities (companies/institutions), and location context. "
|
||||||
|
"Return only bullet points without any preface."
|
||||||
|
)
|
||||||
|
context = content[:4000]
|
||||||
|
summary = await self.service.callAiTextBasic(prompt, context)
|
||||||
|
summary = summary.strip()
|
||||||
|
except Exception:
|
||||||
|
summary = ""
|
||||||
|
enriched.append({
|
||||||
|
"url": url,
|
||||||
|
"summary": summary,
|
||||||
|
"snippet": content[:500]
|
||||||
|
})
|
||||||
|
|
||||||
|
import json as _json
|
||||||
|
payload = {
|
||||||
|
"success": True,
|
||||||
|
"total_count": len(enriched),
|
||||||
|
"results": enriched,
|
||||||
|
}
|
||||||
|
json_content = _json.dumps(payload, ensure_ascii=False, indent=2)
|
||||||
|
except Exception:
|
||||||
|
# Fallback to original conversion
|
||||||
|
json_content = web_interface.convert_web_result_to_json(web_crawl_result)
|
||||||
|
|
||||||
json_document = web_interface.create_json_action_document(
|
json_document = web_interface.create_json_action_document(
|
||||||
json_content,
|
json_content, f"web_crawl_results.json"
|
||||||
f"web_crawl_results.json"
|
|
||||||
)
|
|
||||||
return ActionResult(
|
|
||||||
success=True,
|
|
||||||
documents=[json_document]
|
|
||||||
)
|
)
|
||||||
|
return ActionResult(success=True, documents=[json_document])
|
||||||
else:
|
else:
|
||||||
return web_crawl_result
|
return web_crawl_result
|
||||||
|
|
||||||
|
|
@ -241,17 +308,44 @@ class MethodWeb(MethodBase):
|
||||||
|
|
||||||
@action
|
@action
|
||||||
async def scrape(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def scrape(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""Scrapes web content by searching for URLs and then extracting their content.
|
"""Search and then crawl the found URLs in one step. To use for market analysis, web research, internet searches
|
||||||
|
|
||||||
Combines search and crawl operations in one step.
|
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
query (str): Search query to perform
|
query (str, required): Search query.
|
||||||
maxResults (int, optional): Maximum number of results (default: 10)
|
maxResults (int, optional): Max number of results. Default: 10.
|
||||||
|
searchDepth ("basic"|"advanced", optional): Search depth.
|
||||||
|
timeRange ("d"|"w"|"m"|"y", optional): Time window.
|
||||||
|
topic ("general"|"news"|"academic", optional): Result domain preference.
|
||||||
|
includeDomains (list[str], optional): Only include these domains.
|
||||||
|
excludeDomains (list[str], optional): Exclude these domains.
|
||||||
|
language (str, optional): ISO language bias.
|
||||||
|
includeAnswer (bool, optional): Ask provider to include an answer.
|
||||||
|
includeRawContent (bool, optional): Include raw content where possible.
|
||||||
|
extractDepth ("basic"|"advanced", optional): Crawl extraction depth. Default: "advanced".
|
||||||
|
format ("text"|"markdown", optional): Crawl output format. Default: "text".
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
query = parameters.get("query")
|
query = parameters.get("query")
|
||||||
max_results = parameters.get("maxResults", 10)
|
max_results = parameters.get("maxResults", 10)
|
||||||
|
# Normalize optional enums to avoid validation errors
|
||||||
|
allowed_search_depth = {"basic", "advanced"}
|
||||||
|
allowed_extract_depth = {"basic", "advanced"}
|
||||||
|
allowed_formats = {"text", "markdown"}
|
||||||
|
|
||||||
|
search_depth = parameters.get("searchDepth")
|
||||||
|
if search_depth and search_depth not in allowed_search_depth:
|
||||||
|
logger.warning(f"Invalid searchDepth '{search_depth}' provided. Falling back to None.")
|
||||||
|
search_depth = None
|
||||||
|
|
||||||
|
extract_depth = parameters.get("extractDepth")
|
||||||
|
if extract_depth and extract_depth not in allowed_extract_depth:
|
||||||
|
logger.warning(f"Invalid extractDepth '{extract_depth}' provided. Falling back to 'advanced'.")
|
||||||
|
extract_depth = "advanced"
|
||||||
|
|
||||||
|
fmt = parameters.get("format")
|
||||||
|
if fmt and fmt not in allowed_formats:
|
||||||
|
logger.warning(f"Invalid format '{fmt}' provided. Falling back to 'text'.")
|
||||||
|
fmt = "text"
|
||||||
|
|
||||||
if not query:
|
if not query:
|
||||||
return ActionResult(success=False, error="Search query is required")
|
return ActionResult(success=False, error="Search query is required")
|
||||||
|
|
@ -260,6 +354,16 @@ class MethodWeb(MethodBase):
|
||||||
web_scrape_request = WebScrapeRequest(
|
web_scrape_request = WebScrapeRequest(
|
||||||
query=query,
|
query=query,
|
||||||
max_results=max_results,
|
max_results=max_results,
|
||||||
|
search_depth=search_depth,
|
||||||
|
time_range=parameters.get("timeRange"),
|
||||||
|
topic=parameters.get("topic"),
|
||||||
|
include_domains=parameters.get("includeDomains"),
|
||||||
|
exclude_domains=parameters.get("excludeDomains"),
|
||||||
|
language=parameters.get("language"),
|
||||||
|
include_answer=parameters.get("includeAnswer"),
|
||||||
|
include_raw_content=parameters.get("includeRawContent"),
|
||||||
|
extract_depth=extract_depth,
|
||||||
|
format=fmt,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Perform request
|
# Perform request
|
||||||
|
|
|
||||||
|
|
@ -174,6 +174,19 @@ def _getUserBase(token: str = Depends(cookieAuth)) -> User:
|
||||||
headers={"WWW-Authenticate": "Bearer"},
|
headers={"WWW-Authenticate": "Bearer"},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Guard: token may be None or malformed when cookie/header is missing or bad
|
||||||
|
if not token or not isinstance(token, str):
|
||||||
|
logger.warning("Missing JWT Token (no cookie/header)")
|
||||||
|
raise credentialsException
|
||||||
|
# Basic JWT format check (header.payload.signature)
|
||||||
|
try:
|
||||||
|
if token.count(".") != 2:
|
||||||
|
logger.warning("Malformed JWT token format")
|
||||||
|
raise credentialsException
|
||||||
|
except Exception:
|
||||||
|
# If anything odd happens while checking format, treat as invalid creds
|
||||||
|
raise credentialsException
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Decode token
|
# Decode token
|
||||||
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
|
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
|
||||||
|
|
|
||||||
|
|
@ -163,6 +163,23 @@ class TokenManager:
|
||||||
logger.debug(f"refresh_token: Starting refresh for token {old_token.id}, authority: {old_token.authority}")
|
logger.debug(f"refresh_token: Starting refresh for token {old_token.id}, authority: {old_token.authority}")
|
||||||
logger.debug(f"refresh_token: Token details: userId={old_token.userId}, connectionId={old_token.connectionId}, hasRefreshToken={bool(old_token.tokenRefresh)}")
|
logger.debug(f"refresh_token: Token details: userId={old_token.userId}, connectionId={old_token.connectionId}, hasRefreshToken={bool(old_token.tokenRefresh)}")
|
||||||
|
|
||||||
|
# Cooldown: avoid refreshing too frequently if a workflow triggers refresh repeatedly
|
||||||
|
# Only allow a new refresh if at least 10 minutes passed since the token was created/refreshed
|
||||||
|
try:
|
||||||
|
now_ts = get_utc_timestamp()
|
||||||
|
created_ts = float(old_token.createdAt) if old_token.createdAt is not None else 0.0
|
||||||
|
seconds_since_last_refresh = now_ts - created_ts
|
||||||
|
if seconds_since_last_refresh < 10 * 60:
|
||||||
|
logger.info(
|
||||||
|
f"refresh_token: Skipping refresh for connection {old_token.connectionId} due to cooldown. "
|
||||||
|
f"Last refresh {int(seconds_since_last_refresh)}s ago (< 600s)."
|
||||||
|
)
|
||||||
|
# Return the existing token to avoid caller errors while preventing provider rate limits
|
||||||
|
return old_token
|
||||||
|
except Exception:
|
||||||
|
# If any issue reading timestamps, proceed with normal refresh to be safe
|
||||||
|
pass
|
||||||
|
|
||||||
if not old_token.tokenRefresh:
|
if not old_token.tokenRefresh:
|
||||||
logger.warning(f"No refresh token available for {old_token.authority}")
|
logger.warning(f"No refresh token available for {old_token.authority}")
|
||||||
return None
|
return None
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue