closed workflow action plan
This commit is contained in:
parent
ebfdd9ab03
commit
9ba45952e4
10 changed files with 390 additions and 123 deletions
|
|
@ -158,7 +158,7 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str:
|
|||
# Get previous workflow round context for better understanding of follow-up prompts
|
||||
previous_round_context = _getPreviousRoundContext(service, context.workflow)
|
||||
|
||||
return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages.
|
||||
return f"""You are a task planning AI that analyzes user requests and creates structured, self-contained task plans with user-friendly feedback messages.
|
||||
|
||||
USER REQUEST: {user_request}
|
||||
|
||||
|
|
@ -173,8 +173,8 @@ INSTRUCTIONS:
|
|||
use the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what the user wants to retry or continue
|
||||
3. Group related topics and sequential steps into single, comprehensive tasks
|
||||
4. Focus on business outcomes, not technical operations
|
||||
5. Each task should produce meaningful, usable outputs
|
||||
6. Ensure proper handover between tasks using result labels
|
||||
5. Make each task self-contained: clearly state what to do and what outputs are expected
|
||||
6. Ensure proper handover between tasks (later actions will use your task outputs)
|
||||
7. Detect the language of the user request and include it in languageUserDetected
|
||||
8. Generate user-friendly messages for each task in the user's request language
|
||||
9. Return a JSON object with the exact structure shown below
|
||||
|
|
@ -201,12 +201,14 @@ SPLIT INTO MULTIPLE TASKS:
|
|||
TASK PLANNING PRINCIPLES:
|
||||
- Break down complex requests into logical, sequential steps
|
||||
- Focus on business value and outcomes
|
||||
- Keep tasks at a meaningful level of abstraction
|
||||
- Keep tasks at a meaningful level of abstraction (not implementation details)
|
||||
- Each task should produce results that can be used by subsequent tasks
|
||||
- Ensure clear dependencies and handovers between tasks
|
||||
- Provide clear, actionable user messages in the user's request language
|
||||
- Group related activities to minimize task fragmentation
|
||||
- Only create multiple tasks when dealing with truly different, independent objectives
|
||||
- Make task objectives action-oriented and specific (include scope, data sources to consider, and output intent at high level)
|
||||
- Write success_criteria as measurable acceptance criteria focusing on outputs (what artifacts or insights will exist and how they are validated)
|
||||
|
||||
FOLLOW-UP PROMPT HANDLING:
|
||||
- If the user request is a follow-up (e.g., "try again", "versuche es nochmals", "retry", "continue", "proceed"),
|
||||
|
|
@ -246,6 +248,12 @@ EXAMPLES OF GOOD TASK OBJECTIVES (COMBINING RELATED ACTIVITIES):
|
|||
- "Execute business communication using specified channels and document outcomes"
|
||||
- "Develop comprehensive business strategy with implementation roadmap and success metrics"
|
||||
|
||||
EXAMPLES OF WELL-FORMED SUCCESS CRITERIA (OUTPUT-FOCUSED):
|
||||
- "Deliver a prioritized list of 10–20 candidates with justification"
|
||||
- "Provide a structured JSON with fields: company, ticker, rationale, metrics"
|
||||
- "Produce a presentation outline with 5 sections and bullet points per section"
|
||||
- "Include data sources and date stamped references for traceability"
|
||||
|
||||
EXAMPLES OF GOOD SUCCESS CRITERIA:
|
||||
- "Key insights extracted and ready for business use"
|
||||
- "Professional communication created with clear business value"
|
||||
|
|
@ -417,7 +425,7 @@ USAGE GUIDE:
|
|||
CRITICAL DOCUMENT REFERENCE RULES:
|
||||
- ONLY use the exact labels listed in AVAILABLE DOCUMENTS below, or result labels from previous actions
|
||||
- When generating multiple actions, you may only use as input documents those that are already present in AVAILABLE DOCUMENTS or produced by actions that come earlier in the list. Do NOT use as input any document label that will be produced by a later action.
|
||||
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you CANNOT create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed, if you miss something.
|
||||
- If there are no documents available, you CANNOT create document extraction actions. Instead, prefer using web actions (web.search, web.scrape, web.crawl) when external information can satisfy the request; only generate a status/information report if the task truly requires user-provided documents.
|
||||
|
||||
CURRENT WORKFLOW CONTEXT:
|
||||
- Current Round: {current_round}
|
||||
|
|
@ -470,18 +478,30 @@ PREVIOUS TASK HANDOVER CONTEXT:
|
|||
|
||||
ACTION GENERATION PRINCIPLES:
|
||||
- Create meaningful actions per task step
|
||||
- Use comprehensive AI prompts for document processing
|
||||
- Focus on business outcomes, not technical operations
|
||||
- Combine related operations into single actions when possible
|
||||
- Use the task's AI prompt if provided, or create a comprehensive one
|
||||
- Each action should produce meaningful, usable outputs
|
||||
- Select the method that best fulfills the objective based on context (do not default to any specific method).
|
||||
- Each action must be self-contained and executable with the provided parameters
|
||||
- For document extraction, ensure prompts are specific and detailed
|
||||
- Include validation steps in extraction prompts
|
||||
- Include validation steps in extraction prompts where relevant
|
||||
- If this is a retry, learn from previous failures and improve the approach
|
||||
- Address specific issues mentioned in previous review feedback
|
||||
- When specifying expectedDocumentFormats, ensure AI prompts explicitly request pure data without markdown formatting
|
||||
- Generate user-friendly messages for each action in the user's language ({user_language})
|
||||
|
||||
PARAMETER COMPLETENESS REQUIREMENTS:
|
||||
- Every parameter must contain all information needed to execute without implicit context
|
||||
- Use explicit, concrete values (units, languages, formats, limits, date ranges, IDs) when applicable
|
||||
- For search-like parameters (if any method requires a query), derive the query from the task objective AND ALL success criteria dimensions. Include:
|
||||
- Key entities and domain terms from the objective
|
||||
- All distinct facets from success_criteria (e.g., valuation AND AI potential AND know-how needs)
|
||||
- Geography/localization (e.g., Schweiz/Suisse/Switzerland; use multilingual synonyms when helpful)
|
||||
- Time horizon or recency if relevant
|
||||
- Boolean operators and synonyms to increase precision (use AND/OR, quotes, parentheses)
|
||||
- Avoid single-topic or generic queries focused only on one facet (e.g., pure valuation metrics)
|
||||
- When facets are truly distinct, create 1–3 focused actions with precise queries rather than one vague catch-all
|
||||
- Document list parameters must reference only existing labels or prior action outputs; do not reference future outputs
|
||||
|
||||
USER LANGUAGE: {user_language} - All user messages must be generated in this language.
|
||||
|
||||
DOCUMENT ROUTING GUIDANCE:
|
||||
|
|
@ -494,7 +514,7 @@ DOCUMENT ROUTING GUIDANCE:
|
|||
INSTRUCTIONS:
|
||||
- Generate actions to accomplish this task step using available documents, connections, and previous results
|
||||
- Use docItem for single documents and docList for groups of documents as shown in AVAILABLE DOCUMENTS
|
||||
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you cannot create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed.
|
||||
- If there are no documents available, do not create document extraction actions. Select methods strictly based on the task objective; choose web actions when external information is required. Otherwise, generate a status/information report requesting needed inputs.
|
||||
- Always pass documentList as a LIST of references (docItem and/or docList) - this list CANNOT be empty for document extraction actions
|
||||
- For referencing documents from previous actions, use the format "round{{round_number}}_task{{task_number}}_action{{action_number}}_{{context}}"
|
||||
- For resultLabel, use the format: "round{current_round}_task{{task_id}}_action{{action_number}}_{{short_label}}" where:
|
||||
|
|
@ -639,23 +659,9 @@ EXAMPLES OF GOOD ACTIONS:
|
|||
]
|
||||
}}
|
||||
|
||||
6. When no documents are available (NO DOCUMENTS AVAILABLE scenario):
|
||||
{{
|
||||
"method": "document",
|
||||
"action": "generateReport",
|
||||
"parameters": {{
|
||||
"documentList": [],
|
||||
"title": "Workflow Status Report"
|
||||
}},
|
||||
"resultLabel": "round{current_round}_task{current_task}_action1_status_report",
|
||||
"description": "Generate a status report informing the user that no documents are available for processing and requesting document upload or alternative input.",
|
||||
"userMessage": "Ich erstelle einen Statusbericht, der Sie darüber informiert, dass keine Dokumente zur Verarbeitung verfügbar sind und um Dokumente oder alternative Eingaben bittet."
|
||||
}}
|
||||
|
||||
IMPORTANT NOTES:
|
||||
- Respond with ONLY the JSON object. Do not include any explanatory text.
|
||||
- Before creating any document extraction action, verify that AVAILABLE DOCUMENTS contains actual document references.
|
||||
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", use example 6 above to create a status report action instead of document extraction.
|
||||
- Always include a user-friendly userMessage for each action in the user's language ({user_language}).
|
||||
- The examples above show German user messages as reference - adapt the language to match the USER LANGUAGE specified above."""
|
||||
|
||||
|
|
|
|||
|
|
@ -81,7 +81,18 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
|
|||
"""
|
||||
# Step 1: Search
|
||||
try:
|
||||
search_results = await self._search(request.query, request.max_results)
|
||||
search_results = await self._search(
|
||||
query=request.query,
|
||||
max_results=request.max_results,
|
||||
search_depth=request.search_depth,
|
||||
time_range=request.time_range,
|
||||
topic=request.topic,
|
||||
include_domains=request.include_domains,
|
||||
exclude_domains=request.exclude_domains,
|
||||
language=request.language,
|
||||
include_answer=request.include_answer,
|
||||
include_raw_content=request.include_raw_content,
|
||||
)
|
||||
except Exception as e:
|
||||
return WebSearchActionResult(success=False, error=str(e))
|
||||
|
||||
|
|
@ -113,14 +124,29 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
|
|||
"""Turns a query in a list of urls with extracted content."""
|
||||
# Step 1: Search
|
||||
try:
|
||||
search_results = await self._search(request.query, request.max_results)
|
||||
search_results = await self._search(
|
||||
query=request.query,
|
||||
max_results=request.max_results,
|
||||
search_depth=request.search_depth,
|
||||
time_range=request.time_range,
|
||||
topic=request.topic,
|
||||
include_domains=request.include_domains,
|
||||
exclude_domains=request.exclude_domains,
|
||||
language=request.language,
|
||||
include_answer=request.include_answer,
|
||||
include_raw_content=request.include_raw_content,
|
||||
)
|
||||
except Exception as e:
|
||||
return WebScrapeActionResult(success=False, error=str(e))
|
||||
|
||||
# Step 2: Crawl
|
||||
try:
|
||||
urls = [result.url for result in search_results]
|
||||
crawl_results = await self._crawl(urls)
|
||||
crawl_results = await self._crawl(
|
||||
urls,
|
||||
extract_depth=request.extract_depth,
|
||||
format=request.format,
|
||||
)
|
||||
except Exception as e:
|
||||
return WebScrapeActionResult(success=False, error=str(e))
|
||||
|
||||
|
|
@ -132,7 +158,19 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
|
|||
|
||||
return result
|
||||
|
||||
async def _search(self, query: str, max_results: int) -> list[TavilySearchResult]:
|
||||
async def _search(
|
||||
self,
|
||||
query: str,
|
||||
max_results: int,
|
||||
search_depth: str | None = None,
|
||||
time_range: str | None = None,
|
||||
topic: str | None = None,
|
||||
include_domains: list[str] | None = None,
|
||||
exclude_domains: list[str] | None = None,
|
||||
language: str | None = None,
|
||||
include_answer: bool | None = None,
|
||||
include_raw_content: bool | None = None,
|
||||
) -> list[TavilySearchResult]:
|
||||
"""Calls the Tavily API to perform a web search."""
|
||||
# Make sure max_results is within the allowed range
|
||||
min_results = get_web_search_min_results()
|
||||
|
|
@ -141,7 +179,26 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
|
|||
raise ValueError(f"max_results must be between {min_results} and {max_allowed_results}")
|
||||
|
||||
# Perform actual API call
|
||||
response = await self.client.search(query=query, max_results=max_results)
|
||||
# Build kwargs only for provided options to avoid API rejections
|
||||
kwargs: dict = {"query": query, "max_results": max_results}
|
||||
if search_depth is not None:
|
||||
kwargs["search_depth"] = search_depth
|
||||
if time_range is not None:
|
||||
kwargs["time_range"] = time_range
|
||||
if topic is not None:
|
||||
kwargs["topic"] = topic
|
||||
if include_domains is not None:
|
||||
kwargs["include_domains"] = include_domains
|
||||
if exclude_domains is not None:
|
||||
kwargs["exclude_domains"] = exclude_domains
|
||||
if language is not None:
|
||||
kwargs["language"] = language
|
||||
if include_answer is not None:
|
||||
kwargs["include_answer"] = include_answer
|
||||
if include_raw_content is not None:
|
||||
kwargs["include_raw_content"] = include_raw_content
|
||||
|
||||
response = await self.client.search(**kwargs)
|
||||
|
||||
return [
|
||||
TavilySearchResult(title=result["title"], url=result["url"])
|
||||
|
|
@ -174,7 +231,12 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
|
|||
success=True, documents=[document], resultLabel="web_search_results"
|
||||
)
|
||||
|
||||
async def _crawl(self, urls: list) -> list[TavilyCrawlResult]:
|
||||
async def _crawl(
|
||||
self,
|
||||
urls: list,
|
||||
extract_depth: str | None = None,
|
||||
format: str | None = None,
|
||||
) -> list[TavilyCrawlResult]:
|
||||
"""Calls the Tavily API to extract text content from URLs with retry logic."""
|
||||
import asyncio
|
||||
|
||||
|
|
@ -185,8 +247,13 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
|
|||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
# Use asyncio.wait_for for timeout
|
||||
# Build kwargs for extract
|
||||
kwargs_extract: dict = {"urls": urls}
|
||||
kwargs_extract["extract_depth"] = extract_depth or "advanced"
|
||||
kwargs_extract["format"] = format or "text"
|
||||
|
||||
response = await asyncio.wait_for(
|
||||
self.client.extract(urls=urls, extract_depth="advanced", format="text"),
|
||||
self.client.extract(**kwargs_extract),
|
||||
timeout=timeout
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from modules.interfaces.interfaceChatModel import ActionDocument, ActionResult
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
from typing import List
|
||||
from typing import List, Optional, Literal
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
|
||||
|
|
@ -31,6 +31,17 @@ def get_web_search_min_results() -> int:
|
|||
class WebSearchRequest(BaseModel):
|
||||
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
|
||||
max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results())
|
||||
# Tavily tuning options
|
||||
search_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
||||
time_range: Optional[Literal["d", "w", "m", "y"]] = Field(
|
||||
default=None, description="Limit results to last day/week/month/year"
|
||||
)
|
||||
topic: Optional[Literal["general", "news", "academic"]] = Field(default=None)
|
||||
include_domains: Optional[List[str]] = Field(default=None)
|
||||
exclude_domains: Optional[List[str]] = Field(default=None)
|
||||
language: Optional[str] = Field(default=None, description="ISO language code like 'en', 'de'")
|
||||
include_answer: Optional[bool] = Field(default=None)
|
||||
include_raw_content: Optional[bool] = Field(default=None)
|
||||
|
||||
|
||||
class WebSearchResultItem(BaseModel):
|
||||
|
|
@ -68,6 +79,9 @@ class WebSearchBase(ABC):
|
|||
|
||||
class WebCrawlRequest(BaseModel):
|
||||
urls: List[HttpUrl]
|
||||
# Tavily extract options
|
||||
extract_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
||||
format: Optional[Literal["text", "markdown"]] = Field(default=None)
|
||||
|
||||
|
||||
class WebCrawlResultItem(BaseModel):
|
||||
|
|
@ -108,6 +122,18 @@ class WebCrawlBase(ABC):
|
|||
class WebScrapeRequest(BaseModel):
|
||||
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
|
||||
max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results())
|
||||
# Pass-through search options
|
||||
search_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
||||
time_range: Optional[Literal["d", "w", "m", "y"]] = Field(default=None)
|
||||
topic: Optional[Literal["general", "news", "academic"]] = Field(default=None)
|
||||
include_domains: Optional[List[str]] = Field(default=None)
|
||||
exclude_domains: Optional[List[str]] = Field(default=None)
|
||||
language: Optional[str] = Field(default=None)
|
||||
include_answer: Optional[bool] = Field(default=None)
|
||||
include_raw_content: Optional[bool] = Field(default=None)
|
||||
# Extract options
|
||||
extract_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
|
||||
format: Optional[Literal["text", "markdown"]] = Field(default=None)
|
||||
|
||||
|
||||
class WebScrapeResultItem(BaseModel):
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ class MethodAi(MethodBase):
|
|||
try:
|
||||
aiPrompt = parameters.get("aiPrompt")
|
||||
documentList = parameters.get("documentList", [])
|
||||
if isinstance(documentList, str):
|
||||
documentList = [documentList]
|
||||
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||
processingMode = parameters.get("processingMode", "basic")
|
||||
includeMetadata = parameters.get("includeMetadata", True)
|
||||
|
|
@ -171,10 +173,43 @@ class MethodAi(MethodBase):
|
|||
if context:
|
||||
logger.info(f"Including context from {len(documentList)} documents")
|
||||
|
||||
# Encourage longer, structured outputs with a min-length hint
|
||||
min_tokens_hint = "\n\nPlease ensure the response is substantial and complete."
|
||||
call_prompt = enhanced_prompt + min_tokens_hint
|
||||
|
||||
if processingMode in ["advanced", "detailed"]:
|
||||
result = await self.service.callAiTextAdvanced(enhanced_prompt, context)
|
||||
result = await self.service.callAiTextAdvanced(call_prompt, context)
|
||||
else:
|
||||
result = await self.service.callAiTextBasic(enhanced_prompt, context)
|
||||
result = await self.service.callAiTextBasic(call_prompt, context)
|
||||
|
||||
# If expected JSON and too short/not JSON, retry with stricter JSON guardrails
|
||||
if output_extension == ".json":
|
||||
import json
|
||||
cleaned = (result or "").strip()
|
||||
if cleaned.startswith('```json'):
|
||||
cleaned = cleaned[7:]
|
||||
if cleaned.endswith('```'):
|
||||
cleaned = cleaned[:-3]
|
||||
cleaned = cleaned.strip()
|
||||
needs_retry = False
|
||||
try:
|
||||
parsed = json.loads(cleaned)
|
||||
# Heuristic: small dict -> possibly underfilled
|
||||
if isinstance(parsed, dict) and len(parsed.keys()) <= 2:
|
||||
needs_retry = True
|
||||
except Exception:
|
||||
needs_retry = True
|
||||
|
||||
if needs_retry:
|
||||
guardrail_prompt = (
|
||||
enhanced_prompt
|
||||
+ "\n\nCRITICAL: Return ONLY valid JSON, no markdown, no code fences. "
|
||||
"Include all requested fields with detailed content."
|
||||
)
|
||||
try:
|
||||
result = await self.service.callAiTextAdvanced(guardrail_prompt, context)
|
||||
except Exception:
|
||||
result = cleaned # fallback to first attempt
|
||||
|
||||
# Create result document
|
||||
fileName = f"ai_{processingMode}_{self._format_timestamp_for_filename()}{output_extension}"
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Handles document operations using the document service.
|
|||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime, UTC
|
||||
|
||||
|
|
@ -34,13 +33,15 @@ class MethodDocument(MethodBase):
|
|||
Extract content from any document using AI prompt.
|
||||
|
||||
Parameters:
|
||||
documentList (str): Document list reference
|
||||
documentList (list): Document list reference(s)
|
||||
aiPrompt (str): AI prompt for extraction
|
||||
expectedDocumentFormats (list, optional): Output formats
|
||||
includeMetadata (bool, optional): Include metadata (default: True)
|
||||
"""
|
||||
try:
|
||||
documentList = parameters.get("documentList")
|
||||
if isinstance(documentList, str):
|
||||
documentList = [documentList]
|
||||
aiPrompt = parameters.get("aiPrompt")
|
||||
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||
includeMetadata = parameters.get("includeMetadata", True)
|
||||
|
|
@ -188,6 +189,8 @@ class MethodDocument(MethodBase):
|
|||
"""
|
||||
try:
|
||||
document_list = parameters.get("documentList", [])
|
||||
if isinstance(document_list, str):
|
||||
document_list = [document_list]
|
||||
expected_document_formats = parameters.get("expectedDocumentFormats", [])
|
||||
original_documents = parameters.get("originalDocuments", [])
|
||||
include_metadata = parameters.get("includeMetadata", True)
|
||||
|
|
@ -606,13 +609,15 @@ class MethodDocument(MethodBase):
|
|||
Generate HTML report from multiple documents using AI.
|
||||
|
||||
Parameters:
|
||||
documentList (str): Document list reference
|
||||
documentList (list): Document list reference(s)
|
||||
prompt (str): AI prompt for report generation
|
||||
title (str, optional): Report title (default: "Summary Report")
|
||||
includeMetadata (bool, optional): Include metadata (default: True)
|
||||
"""
|
||||
try:
|
||||
documentList = parameters.get("documentList")
|
||||
if isinstance(documentList, str):
|
||||
documentList = [documentList]
|
||||
prompt = parameters.get("prompt")
|
||||
title = parameters.get("title", "Summary Report")
|
||||
includeMetadata = parameters.get("includeMetadata", True)
|
||||
|
|
@ -708,13 +713,9 @@ class MethodDocument(MethodBase):
|
|||
logger.info(f" Skipping document with no readable text content")
|
||||
|
||||
if not validDocuments:
|
||||
# If no valid documents, create a simple report
|
||||
html = ["<html><head><meta charset='utf-8'><title>" + title + "</title></head><body>"]
|
||||
html.append(f"<h1>{title}</h1>")
|
||||
html.append(f"<p><b>Generated:</b> {int(get_utc_timestamp())}</p>")
|
||||
html.append("<p><em>No content available in the provided documents.</em></p>")
|
||||
html.append("</body></html>")
|
||||
return '\n'.join(html)
|
||||
# No readable content; return a minimal valid HTML document
|
||||
timestamp = int(get_utc_timestamp())
|
||||
return f"<!DOCTYPE html><html><head><meta charset=\"UTF-8\"><title>{title}</title></head><body><h1>{title}</h1><p>Keine auswertbaren Inhalte gefunden.</p><p>Generated: {timestamp}</p></body></html>"
|
||||
|
||||
# Create AI prompt for comprehensive report generation using user's prompt
|
||||
combinedContent = "\n\n".join(allContent)
|
||||
|
|
@ -723,25 +724,34 @@ class MethodDocument(MethodBase):
|
|||
|
||||
Report Title: {title}
|
||||
|
||||
Additional Requirements:
|
||||
1. Create a professional, well-formatted HTML report
|
||||
2. Include an executive summary at the beginning
|
||||
3. Organize information logically with clear sections
|
||||
4. Highlight key findings and insights
|
||||
5. Include relevant data, statistics, and conclusions
|
||||
6. Use proper HTML formatting with headers, lists, and styling
|
||||
7. Make it readable and professional
|
||||
OUTPUT POLICY:
|
||||
- Return ONLY a complete, raw HTML document.
|
||||
- Start with: <!DOCTYPE html>
|
||||
- Must include: <html>, <head> (with <meta charset="UTF-8"> and <title>), and <body>.
|
||||
- The response must be valid, self-contained HTML suitable for saving as .html.
|
||||
|
||||
Document Content:
|
||||
---START OF DOCUMENT CONTENT-----------------------------------------------
|
||||
Structure:
|
||||
- Title and short subtitle
|
||||
- Executive summary
|
||||
- Sections with clear headings
|
||||
- Use tables for structured data when helpful
|
||||
- Key findings and recommendations
|
||||
- Generation date and number of documents
|
||||
|
||||
Quality and design requirements:
|
||||
- Use clear, professional, and accessible styling in a <style> block
|
||||
- Apply clean layout, spacing, and visual hierarchy for headings
|
||||
- Keep HTML and CSS standards-compliant and lightweight
|
||||
|
||||
SOURCE DOCUMENT CONTENT:
|
||||
---START---
|
||||
{combinedContent}
|
||||
---END OF DOCUMENT CONTENT-----------------------------------------------
|
||||
Generate a complete HTML report that addresses the user's specific requirements and integrates all the information into a cohesive, professional document.
|
||||
---END---
|
||||
"""
|
||||
|
||||
# Call AI to generate the report
|
||||
logger.info(f"Generating AI report for {len(validDocuments)} documents")
|
||||
aiReport = await self.service.callAiTextBasic(aiPrompt, combinedContent)
|
||||
aiReport = await self.service.callAiTextAdvanced(aiPrompt, combinedContent)
|
||||
|
||||
# If AI call fails, return error - AI is crucial for report generation
|
||||
if not aiReport or aiReport.strip() == "":
|
||||
|
|
@ -751,39 +761,21 @@ Generate a complete HTML report that addresses the user's specific requirements
|
|||
# Clean up the AI response and ensure it's valid HTML
|
||||
aiReport = aiReport.strip()
|
||||
|
||||
# Strip fenced code blocks like ```html ... ``` if present
|
||||
# Normalize: strip code fences if present
|
||||
if aiReport.startswith("```") and aiReport.endswith("```"):
|
||||
lines = aiReport.split('\n')
|
||||
if len(lines) >= 2:
|
||||
# remove first and last fence lines (language tag allowed on first)
|
||||
aiReport = '\n'.join(lines[1:-1]).strip()
|
||||
|
||||
# Check if AI response starts with DOCTYPE or html tag (complete HTML document)
|
||||
if aiReport.startswith('<!DOCTYPE') or aiReport.startswith('<html'):
|
||||
# AI returned complete HTML document, use it directly
|
||||
return aiReport
|
||||
else:
|
||||
# AI returned HTML content without document structure, wrap it
|
||||
|
||||
# Check if AI response already contains a title/header
|
||||
has_title = any(title.lower() in aiReport.lower() for title in [title, "outlook", "report", "status"])
|
||||
|
||||
# Wrap the AI content in proper HTML structure
|
||||
html = ["<html><head><meta charset='utf-8'><title>" + title + "</title></head><body>"]
|
||||
|
||||
# Only add the title if the AI response doesn't already have one
|
||||
if not has_title:
|
||||
html.append(f"<h1>{title}</h1>")
|
||||
|
||||
html.append(f"<p><b>Generated:</b> {int(get_utc_timestamp())}</p>")
|
||||
html.append(f"<p><b>Total Documents Analyzed:</b> {len(validDocuments)}</p>")
|
||||
html.append("<hr>")
|
||||
html.append(aiReport)
|
||||
html.append("</body></html>")
|
||||
return '\n'.join(html)
|
||||
cleaned = aiReport.strip()
|
||||
|
||||
# Return exactly what we have (no wrapping)
|
||||
return cleaned
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating AI report: {str(e)}")
|
||||
# Re-raise the error - AI is crucial for report generation
|
||||
raise
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1478,7 +1478,11 @@ class MethodOutlook(MethodBase):
|
|||
attachments = parameters.get("attachments", [])
|
||||
tone = parameters.get("tone", "professional")
|
||||
documentList = parameters.get("documentList", [])
|
||||
if isinstance(documentList, str):
|
||||
documentList = [documentList]
|
||||
attachmentDocumentList = parameters.get("attachmentDocumentList", [])
|
||||
if isinstance(attachmentDocumentList, str):
|
||||
attachmentDocumentList = [attachmentDocumentList]
|
||||
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||
|
||||
if not context:
|
||||
|
|
|
|||
|
|
@ -829,7 +829,7 @@ class MethodSharepoint(MethodBase):
|
|||
Read documents from SharePoint across all accessible sites
|
||||
|
||||
Parameters:
|
||||
documentList (str): Reference to the document list to read
|
||||
documentList (list): Reference(s) to the document list to read
|
||||
connectionReference (str): Reference to the Microsoft connection
|
||||
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
|
||||
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
|
||||
|
|
@ -837,6 +837,8 @@ class MethodSharepoint(MethodBase):
|
|||
"""
|
||||
try:
|
||||
documentList = parameters.get("documentList")
|
||||
if isinstance(documentList, str):
|
||||
documentList = [documentList]
|
||||
connectionReference = parameters.get("connectionReference")
|
||||
pathQuery = parameters.get("pathQuery", "*")
|
||||
pathObject = parameters.get("pathObject")
|
||||
|
|
@ -886,8 +888,7 @@ class MethodSharepoint(MethodBase):
|
|||
return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
|
||||
|
||||
# Get documents from reference - ensure documentList is a list, not a string
|
||||
if isinstance(documentList, str):
|
||||
documentList = [documentList] # Convert string to list
|
||||
# documentList is already normalized above
|
||||
chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
|
||||
|
||||
if not chatDocuments:
|
||||
|
|
@ -1107,13 +1108,15 @@ class MethodSharepoint(MethodBase):
|
|||
connectionReference (str): Reference to the Microsoft connection
|
||||
pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
|
||||
pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
|
||||
documentList (str): Reference to the document list to upload
|
||||
documentList (list): Reference(s) to the document list to upload
|
||||
fileNames (List[str]): List of names for the uploaded files
|
||||
"""
|
||||
try:
|
||||
connectionReference = parameters.get("connectionReference")
|
||||
pathQuery = parameters.get("pathQuery")
|
||||
documentList = parameters.get("documentList")
|
||||
if isinstance(documentList, str):
|
||||
documentList = [documentList]
|
||||
fileNames = parameters.get("fileNames")
|
||||
pathObject = parameters.get("pathObject")
|
||||
|
||||
|
|
|
|||
|
|
@ -25,40 +25,53 @@ class MethodWeb(MethodBase):
|
|||
|
||||
@action
|
||||
async def search(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""Perform a web search and outputs a csv file with a list of found URLs
|
||||
|
||||
Each result contains columns "url" and "title".
|
||||
"""Perform a web search and output a CSV with the found URLs. Each result row contains columns "url" and "title".
|
||||
|
||||
Parameters:
|
||||
query (str): Search query to perform
|
||||
maxResults (int, optional): Maximum number of results (default: 10)
|
||||
query (str, required): Search query.
|
||||
maxResults (int, optional): Max number of results. Default: 10.
|
||||
searchDepth ("basic"|"advanced", optional): Search depth. Default: provider default.
|
||||
timeRange ("d"|"w"|"m"|"y", optional): Limit to last day/week/month/year.
|
||||
topic ("general"|"news"|"academic", optional): Result domain preference.
|
||||
includeDomains (list[str], optional): Only include these domains.
|
||||
excludeDomains (list[str], optional): Exclude these domains.
|
||||
language (str, optional): ISO code like "de", "en" to bias results.
|
||||
includeAnswer (bool, optional): Ask provider to generate a short answer.
|
||||
includeRawContent (bool, optional): Include raw content where possible.
|
||||
"""
|
||||
|
||||
try:
|
||||
# Prepare request data
|
||||
# Prepare request data (generic, no region/language bias)
|
||||
raw_query = parameters.get("query")
|
||||
max_results = parameters.get("maxResults", 10)
|
||||
|
||||
if not raw_query or not isinstance(raw_query, str):
|
||||
return ActionResult(success=False, error="Search query is required")
|
||||
|
||||
web_search_request = WebSearchRequest(
|
||||
query=parameters.get("query"),
|
||||
max_results=parameters.get("maxResults", 10),
|
||||
query=raw_query.strip(),
|
||||
max_results=max_results,
|
||||
search_depth=parameters.get("searchDepth"),
|
||||
time_range=parameters.get("timeRange"),
|
||||
topic=parameters.get("topic"),
|
||||
include_domains=parameters.get("includeDomains"),
|
||||
exclude_domains=parameters.get("excludeDomains"),
|
||||
language=parameters.get("language"),
|
||||
include_answer=parameters.get("includeAnswer"),
|
||||
include_raw_content=parameters.get("includeRawContent"),
|
||||
)
|
||||
|
||||
# Perform request
|
||||
web_interface = await WebInterface.create()
|
||||
web_search_result = await web_interface.search(web_search_request)
|
||||
|
||||
# Convert search results to CSV format
|
||||
# Convert search results to CSV format (generic)
|
||||
if web_search_result.success and web_search_result.documents:
|
||||
csv_content = web_interface.convert_web_search_result_to_csv(web_search_result)
|
||||
|
||||
# Create CSV document
|
||||
csv_document = web_interface.create_csv_action_document(
|
||||
csv_content,
|
||||
f"web_search_results.csv"
|
||||
)
|
||||
|
||||
return ActionResult(
|
||||
success=True,
|
||||
documents=[csv_document]
|
||||
csv_content, f"web_search_results.csv"
|
||||
)
|
||||
return ActionResult(success=True, documents=[csv_document])
|
||||
else:
|
||||
return web_search_result
|
||||
|
||||
|
|
@ -105,15 +118,21 @@ class MethodWeb(MethodBase):
|
|||
|
||||
@action
|
||||
async def crawl(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""Crawls a list of URLs and extracts information from them.
|
||||
"""Crawl a list of URLs and extract text content.
|
||||
|
||||
Parameters:
|
||||
documentList (str): Document list reference containing URL lists from search results
|
||||
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||
documentList (list[str]|str, required): Reference(s) to documents containing URLs (e.g., CSV from search). Can be a single ref or list.
|
||||
expectedDocumentFormats (list, optional): Hint for downstream handling.
|
||||
extractDepth ("basic"|"advanced", optional): Extraction depth. Default: "advanced".
|
||||
format ("text"|"markdown", optional): Output format. Default: "text".
|
||||
"""
|
||||
try:
|
||||
document_list = parameters.get("documentList")
|
||||
|
||||
# Normalize to list if a single string reference is provided
|
||||
if isinstance(document_list, str):
|
||||
document_list = [document_list]
|
||||
|
||||
if not document_list:
|
||||
return ActionResult(
|
||||
success=False, error="No document list reference provided."
|
||||
|
|
@ -214,24 +233,72 @@ class MethodWeb(MethodBase):
|
|||
unique_urls = list(dict.fromkeys(all_urls))
|
||||
logger.info(f"Extracted {len(unique_urls)} unique URLs from {len(chat_documents)} documents")
|
||||
|
||||
# Prepare request data
|
||||
web_crawl_request = WebCrawlRequest(urls=unique_urls)
|
||||
# Prepare request data with normalization
|
||||
allowed_extract_depth = {"basic", "advanced"}
|
||||
allowed_formats = {"text", "markdown"}
|
||||
extract_depth = parameters.get("extractDepth")
|
||||
if extract_depth and extract_depth not in allowed_extract_depth:
|
||||
logger.warning(f"Invalid extractDepth '{extract_depth}' provided. Falling back to 'advanced'.")
|
||||
extract_depth = "advanced"
|
||||
fmt = parameters.get("format")
|
||||
if fmt and fmt not in allowed_formats:
|
||||
logger.warning(f"Invalid format '{fmt}' provided. Falling back to 'text'.")
|
||||
fmt = "text"
|
||||
|
||||
web_crawl_request = WebCrawlRequest(
|
||||
urls=unique_urls,
|
||||
extract_depth=extract_depth,
|
||||
format=fmt,
|
||||
)
|
||||
|
||||
# Perform request
|
||||
web_interface = await WebInterface.create()
|
||||
web_crawl_result = await web_interface.crawl(web_crawl_request)
|
||||
|
||||
# Convert to proper JSON format
|
||||
# Convert and enrich with concise summaries per URL for better context
|
||||
if web_crawl_result.success:
|
||||
json_content = web_interface.convert_web_result_to_json(web_crawl_result)
|
||||
try:
|
||||
doc = web_crawl_result.documents[0]
|
||||
results = getattr(doc.documentData, "results", [])
|
||||
enriched = []
|
||||
# Summarize each result briefly using AI for added context
|
||||
for item in results:
|
||||
url = str(getattr(item, "url", ""))
|
||||
content = str(getattr(item, "content", ""))
|
||||
summary = ""
|
||||
try:
|
||||
if content:
|
||||
prompt = (
|
||||
"Summarize the following webpage content in 3-5 concise bullet points. "
|
||||
"Focus on key points, figures, named entities (companies/institutions), and location context. "
|
||||
"Return only bullet points without any preface."
|
||||
)
|
||||
context = content[:4000]
|
||||
summary = await self.service.callAiTextBasic(prompt, context)
|
||||
summary = summary.strip()
|
||||
except Exception:
|
||||
summary = ""
|
||||
enriched.append({
|
||||
"url": url,
|
||||
"summary": summary,
|
||||
"snippet": content[:500]
|
||||
})
|
||||
|
||||
import json as _json
|
||||
payload = {
|
||||
"success": True,
|
||||
"total_count": len(enriched),
|
||||
"results": enriched,
|
||||
}
|
||||
json_content = _json.dumps(payload, ensure_ascii=False, indent=2)
|
||||
except Exception:
|
||||
# Fallback to original conversion
|
||||
json_content = web_interface.convert_web_result_to_json(web_crawl_result)
|
||||
|
||||
json_document = web_interface.create_json_action_document(
|
||||
json_content,
|
||||
f"web_crawl_results.json"
|
||||
)
|
||||
return ActionResult(
|
||||
success=True,
|
||||
documents=[json_document]
|
||||
json_content, f"web_crawl_results.json"
|
||||
)
|
||||
return ActionResult(success=True, documents=[json_document])
|
||||
else:
|
||||
return web_crawl_result
|
||||
|
||||
|
|
@ -241,17 +308,44 @@ class MethodWeb(MethodBase):
|
|||
|
||||
@action
|
||||
async def scrape(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""Scrapes web content by searching for URLs and then extracting their content.
|
||||
|
||||
Combines search and crawl operations in one step.
|
||||
"""Search and then crawl the found URLs in one step. To use for market analysis, web research, internet searches
|
||||
|
||||
Parameters:
|
||||
query (str): Search query to perform
|
||||
maxResults (int, optional): Maximum number of results (default: 10)
|
||||
query (str, required): Search query.
|
||||
maxResults (int, optional): Max number of results. Default: 10.
|
||||
searchDepth ("basic"|"advanced", optional): Search depth.
|
||||
timeRange ("d"|"w"|"m"|"y", optional): Time window.
|
||||
topic ("general"|"news"|"academic", optional): Result domain preference.
|
||||
includeDomains (list[str], optional): Only include these domains.
|
||||
excludeDomains (list[str], optional): Exclude these domains.
|
||||
language (str, optional): ISO language bias.
|
||||
includeAnswer (bool, optional): Ask provider to include an answer.
|
||||
includeRawContent (bool, optional): Include raw content where possible.
|
||||
extractDepth ("basic"|"advanced", optional): Crawl extraction depth. Default: "advanced".
|
||||
format ("text"|"markdown", optional): Crawl output format. Default: "text".
|
||||
"""
|
||||
try:
|
||||
query = parameters.get("query")
|
||||
max_results = parameters.get("maxResults", 10)
|
||||
# Normalize optional enums to avoid validation errors
|
||||
allowed_search_depth = {"basic", "advanced"}
|
||||
allowed_extract_depth = {"basic", "advanced"}
|
||||
allowed_formats = {"text", "markdown"}
|
||||
|
||||
search_depth = parameters.get("searchDepth")
|
||||
if search_depth and search_depth not in allowed_search_depth:
|
||||
logger.warning(f"Invalid searchDepth '{search_depth}' provided. Falling back to None.")
|
||||
search_depth = None
|
||||
|
||||
extract_depth = parameters.get("extractDepth")
|
||||
if extract_depth and extract_depth not in allowed_extract_depth:
|
||||
logger.warning(f"Invalid extractDepth '{extract_depth}' provided. Falling back to 'advanced'.")
|
||||
extract_depth = "advanced"
|
||||
|
||||
fmt = parameters.get("format")
|
||||
if fmt and fmt not in allowed_formats:
|
||||
logger.warning(f"Invalid format '{fmt}' provided. Falling back to 'text'.")
|
||||
fmt = "text"
|
||||
|
||||
if not query:
|
||||
return ActionResult(success=False, error="Search query is required")
|
||||
|
|
@ -260,6 +354,16 @@ class MethodWeb(MethodBase):
|
|||
web_scrape_request = WebScrapeRequest(
|
||||
query=query,
|
||||
max_results=max_results,
|
||||
search_depth=search_depth,
|
||||
time_range=parameters.get("timeRange"),
|
||||
topic=parameters.get("topic"),
|
||||
include_domains=parameters.get("includeDomains"),
|
||||
exclude_domains=parameters.get("excludeDomains"),
|
||||
language=parameters.get("language"),
|
||||
include_answer=parameters.get("includeAnswer"),
|
||||
include_raw_content=parameters.get("includeRawContent"),
|
||||
extract_depth=extract_depth,
|
||||
format=fmt,
|
||||
)
|
||||
|
||||
# Perform request
|
||||
|
|
|
|||
|
|
@ -174,6 +174,19 @@ def _getUserBase(token: str = Depends(cookieAuth)) -> User:
|
|||
headers={"WWW-Authenticate": "Bearer"},
|
||||
)
|
||||
|
||||
# Guard: token may be None or malformed when cookie/header is missing or bad
|
||||
if not token or not isinstance(token, str):
|
||||
logger.warning("Missing JWT Token (no cookie/header)")
|
||||
raise credentialsException
|
||||
# Basic JWT format check (header.payload.signature)
|
||||
try:
|
||||
if token.count(".") != 2:
|
||||
logger.warning("Malformed JWT token format")
|
||||
raise credentialsException
|
||||
except Exception:
|
||||
# If anything odd happens while checking format, treat as invalid creds
|
||||
raise credentialsException
|
||||
|
||||
try:
|
||||
# Decode token
|
||||
payload = jwt.decode(token, SECRET_KEY, algorithms=[ALGORITHM])
|
||||
|
|
|
|||
|
|
@ -163,6 +163,23 @@ class TokenManager:
|
|||
logger.debug(f"refresh_token: Starting refresh for token {old_token.id}, authority: {old_token.authority}")
|
||||
logger.debug(f"refresh_token: Token details: userId={old_token.userId}, connectionId={old_token.connectionId}, hasRefreshToken={bool(old_token.tokenRefresh)}")
|
||||
|
||||
# Cooldown: avoid refreshing too frequently if a workflow triggers refresh repeatedly
|
||||
# Only allow a new refresh if at least 10 minutes passed since the token was created/refreshed
|
||||
try:
|
||||
now_ts = get_utc_timestamp()
|
||||
created_ts = float(old_token.createdAt) if old_token.createdAt is not None else 0.0
|
||||
seconds_since_last_refresh = now_ts - created_ts
|
||||
if seconds_since_last_refresh < 10 * 60:
|
||||
logger.info(
|
||||
f"refresh_token: Skipping refresh for connection {old_token.connectionId} due to cooldown. "
|
||||
f"Last refresh {int(seconds_since_last_refresh)}s ago (< 600s)."
|
||||
)
|
||||
# Return the existing token to avoid caller errors while preventing provider rate limits
|
||||
return old_token
|
||||
except Exception:
|
||||
# If any issue reading timestamps, proceed with normal refresh to be safe
|
||||
pass
|
||||
|
||||
if not old_token.tokenRefresh:
|
||||
logger.warning(f"No refresh token available for {old_token.authority}")
|
||||
return None
|
||||
|
|
|
|||
Loading…
Reference in a new issue