From 9ba45952e451ae0fc11eece352aa88ad6e24536d Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Mon, 22 Sep 2025 15:44:30 +0200
Subject: [PATCH] closed workflow action plan
---
modules/chat/handling/promptFactory.py | 54 +++----
modules/connectors/connectorWebTavily.py | 81 ++++++++++-
modules/interfaces/interfaceWebModel.py | 28 +++-
modules/methods/methodAi.py | 39 ++++-
modules/methods/methodDocument.py | 90 ++++++------
modules/methods/methodOutlook.py | 4 +
modules/methods/methodSharepoint.py | 11 +-
modules/methods/methodWeb.py | 176 ++++++++++++++++++-----
modules/security/auth.py | 13 ++
modules/security/tokenManager.py | 17 +++
10 files changed, 390 insertions(+), 123 deletions(-)
diff --git a/modules/chat/handling/promptFactory.py b/modules/chat/handling/promptFactory.py
index 2890e7bf..c15979e2 100644
--- a/modules/chat/handling/promptFactory.py
+++ b/modules/chat/handling/promptFactory.py
@@ -158,7 +158,7 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str:
# Get previous workflow round context for better understanding of follow-up prompts
previous_round_context = _getPreviousRoundContext(service, context.workflow)
- return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages.
+ return f"""You are a task planning AI that analyzes user requests and creates structured, self-contained task plans with user-friendly feedback messages.
USER REQUEST: {user_request}
@@ -173,8 +173,8 @@ INSTRUCTIONS:
use the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what the user wants to retry or continue
3. Group related topics and sequential steps into single, comprehensive tasks
4. Focus on business outcomes, not technical operations
-5. Each task should produce meaningful, usable outputs
-6. Ensure proper handover between tasks using result labels
+5. Make each task self-contained: clearly state what to do and what outputs are expected
+6. Ensure proper handover between tasks (later actions will use your task outputs)
7. Detect the language of the user request and include it in languageUserDetected
8. Generate user-friendly messages for each task in the user's request language
9. Return a JSON object with the exact structure shown below
@@ -201,12 +201,14 @@ SPLIT INTO MULTIPLE TASKS:
TASK PLANNING PRINCIPLES:
- Break down complex requests into logical, sequential steps
- Focus on business value and outcomes
-- Keep tasks at a meaningful level of abstraction
+- Keep tasks at a meaningful level of abstraction (not implementation details)
- Each task should produce results that can be used by subsequent tasks
- Ensure clear dependencies and handovers between tasks
- Provide clear, actionable user messages in the user's request language
- Group related activities to minimize task fragmentation
- Only create multiple tasks when dealing with truly different, independent objectives
+- Make task objectives action-oriented and specific (include scope, data sources to consider, and output intent at high level)
+- Write success_criteria as measurable acceptance criteria focusing on outputs (what artifacts or insights will exist and how they are validated)
FOLLOW-UP PROMPT HANDLING:
- If the user request is a follow-up (e.g., "try again", "versuche es nochmals", "retry", "continue", "proceed"),
@@ -246,6 +248,12 @@ EXAMPLES OF GOOD TASK OBJECTIVES (COMBINING RELATED ACTIVITIES):
- "Execute business communication using specified channels and document outcomes"
- "Develop comprehensive business strategy with implementation roadmap and success metrics"
+EXAMPLES OF WELL-FORMED SUCCESS CRITERIA (OUTPUT-FOCUSED):
+- "Deliver a prioritized list of 10–20 candidates with justification"
+- "Provide a structured JSON with fields: company, ticker, rationale, metrics"
+- "Produce a presentation outline with 5 sections and bullet points per section"
+- "Include data sources and date stamped references for traceability"
+
EXAMPLES OF GOOD SUCCESS CRITERIA:
- "Key insights extracted and ready for business use"
- "Professional communication created with clear business value"
@@ -417,7 +425,7 @@ USAGE GUIDE:
CRITICAL DOCUMENT REFERENCE RULES:
- ONLY use the exact labels listed in AVAILABLE DOCUMENTS below, or result labels from previous actions
- When generating multiple actions, you may only use as input documents those that are already present in AVAILABLE DOCUMENTS or produced by actions that come earlier in the list. Do NOT use as input any document label that will be produced by a later action.
-- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you CANNOT create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed, if you miss something.
+- If there are no documents available, you CANNOT create document extraction actions. Instead, prefer using web actions (web.search, web.scrape, web.crawl) when external information can satisfy the request; only generate a status/information report if the task truly requires user-provided documents.
CURRENT WORKFLOW CONTEXT:
- Current Round: {current_round}
@@ -470,18 +478,30 @@ PREVIOUS TASK HANDOVER CONTEXT:
ACTION GENERATION PRINCIPLES:
- Create meaningful actions per task step
-- Use comprehensive AI prompts for document processing
- Focus on business outcomes, not technical operations
- Combine related operations into single actions when possible
-- Use the task's AI prompt if provided, or create a comprehensive one
-- Each action should produce meaningful, usable outputs
+- Select the method that best fulfills the objective based on context (do not default to any specific method).
+- Each action must be self-contained and executable with the provided parameters
- For document extraction, ensure prompts are specific and detailed
-- Include validation steps in extraction prompts
+- Include validation steps in extraction prompts where relevant
- If this is a retry, learn from previous failures and improve the approach
- Address specific issues mentioned in previous review feedback
- When specifying expectedDocumentFormats, ensure AI prompts explicitly request pure data without markdown formatting
- Generate user-friendly messages for each action in the user's language ({user_language})
+PARAMETER COMPLETENESS REQUIREMENTS:
+- Every parameter must contain all information needed to execute without implicit context
+- Use explicit, concrete values (units, languages, formats, limits, date ranges, IDs) when applicable
+- For search-like parameters (if any method requires a query), derive the query from the task objective AND ALL success criteria dimensions. Include:
+ - Key entities and domain terms from the objective
+ - All distinct facets from success_criteria (e.g., valuation AND AI potential AND know-how needs)
+ - Geography/localization (e.g., Schweiz/Suisse/Switzerland; use multilingual synonyms when helpful)
+ - Time horizon or recency if relevant
+ - Boolean operators and synonyms to increase precision (use AND/OR, quotes, parentheses)
+ - Avoid single-topic or generic queries focused only on one facet (e.g., pure valuation metrics)
+ - When facets are truly distinct, create 1–3 focused actions with precise queries rather than one vague catch-all
+- Document list parameters must reference only existing labels or prior action outputs; do not reference future outputs
+
USER LANGUAGE: {user_language} - All user messages must be generated in this language.
DOCUMENT ROUTING GUIDANCE:
@@ -494,7 +514,7 @@ DOCUMENT ROUTING GUIDANCE:
INSTRUCTIONS:
- Generate actions to accomplish this task step using available documents, connections, and previous results
- Use docItem for single documents and docList for groups of documents as shown in AVAILABLE DOCUMENTS
-- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you cannot create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed.
+- If there are no documents available, do not create document extraction actions. Select methods strictly based on the task objective; choose web actions when external information is required. Otherwise, generate a status/information report requesting needed inputs.
- Always pass documentList as a LIST of references (docItem and/or docList) - this list CANNOT be empty for document extraction actions
- For referencing documents from previous actions, use the format "round{{round_number}}_task{{task_number}}_action{{action_number}}_{{context}}"
- For resultLabel, use the format: "round{current_round}_task{{task_id}}_action{{action_number}}_{{short_label}}" where:
@@ -639,23 +659,9 @@ EXAMPLES OF GOOD ACTIONS:
]
}}
-6. When no documents are available (NO DOCUMENTS AVAILABLE scenario):
-{{
- "method": "document",
- "action": "generateReport",
- "parameters": {{
- "documentList": [],
- "title": "Workflow Status Report"
- }},
- "resultLabel": "round{current_round}_task{current_task}_action1_status_report",
- "description": "Generate a status report informing the user that no documents are available for processing and requesting document upload or alternative input.",
- "userMessage": "Ich erstelle einen Statusbericht, der Sie darüber informiert, dass keine Dokumente zur Verarbeitung verfügbar sind und um Dokumente oder alternative Eingaben bittet."
-}}
-
IMPORTANT NOTES:
- Respond with ONLY the JSON object. Do not include any explanatory text.
- Before creating any document extraction action, verify that AVAILABLE DOCUMENTS contains actual document references.
-- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", use example 6 above to create a status report action instead of document extraction.
- Always include a user-friendly userMessage for each action in the user's language ({user_language}).
- The examples above show German user messages as reference - adapt the language to match the USER LANGUAGE specified above."""
diff --git a/modules/connectors/connectorWebTavily.py b/modules/connectors/connectorWebTavily.py
index 97410493..59eb1396 100644
--- a/modules/connectors/connectorWebTavily.py
+++ b/modules/connectors/connectorWebTavily.py
@@ -81,7 +81,18 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
"""
# Step 1: Search
try:
- search_results = await self._search(request.query, request.max_results)
+ search_results = await self._search(
+ query=request.query,
+ max_results=request.max_results,
+ search_depth=request.search_depth,
+ time_range=request.time_range,
+ topic=request.topic,
+ include_domains=request.include_domains,
+ exclude_domains=request.exclude_domains,
+ language=request.language,
+ include_answer=request.include_answer,
+ include_raw_content=request.include_raw_content,
+ )
except Exception as e:
return WebSearchActionResult(success=False, error=str(e))
@@ -113,14 +124,29 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
"""Turns a query in a list of urls with extracted content."""
# Step 1: Search
try:
- search_results = await self._search(request.query, request.max_results)
+ search_results = await self._search(
+ query=request.query,
+ max_results=request.max_results,
+ search_depth=request.search_depth,
+ time_range=request.time_range,
+ topic=request.topic,
+ include_domains=request.include_domains,
+ exclude_domains=request.exclude_domains,
+ language=request.language,
+ include_answer=request.include_answer,
+ include_raw_content=request.include_raw_content,
+ )
except Exception as e:
return WebScrapeActionResult(success=False, error=str(e))
# Step 2: Crawl
try:
urls = [result.url for result in search_results]
- crawl_results = await self._crawl(urls)
+ crawl_results = await self._crawl(
+ urls,
+ extract_depth=request.extract_depth,
+ format=request.format,
+ )
except Exception as e:
return WebScrapeActionResult(success=False, error=str(e))
@@ -132,7 +158,19 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
return result
- async def _search(self, query: str, max_results: int) -> list[TavilySearchResult]:
+ async def _search(
+ self,
+ query: str,
+ max_results: int,
+ search_depth: str | None = None,
+ time_range: str | None = None,
+ topic: str | None = None,
+ include_domains: list[str] | None = None,
+ exclude_domains: list[str] | None = None,
+ language: str | None = None,
+ include_answer: bool | None = None,
+ include_raw_content: bool | None = None,
+ ) -> list[TavilySearchResult]:
"""Calls the Tavily API to perform a web search."""
# Make sure max_results is within the allowed range
min_results = get_web_search_min_results()
@@ -141,7 +179,26 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
raise ValueError(f"max_results must be between {min_results} and {max_allowed_results}")
# Perform actual API call
- response = await self.client.search(query=query, max_results=max_results)
+ # Build kwargs only for provided options to avoid API rejections
+ kwargs: dict = {"query": query, "max_results": max_results}
+ if search_depth is not None:
+ kwargs["search_depth"] = search_depth
+ if time_range is not None:
+ kwargs["time_range"] = time_range
+ if topic is not None:
+ kwargs["topic"] = topic
+ if include_domains is not None:
+ kwargs["include_domains"] = include_domains
+ if exclude_domains is not None:
+ kwargs["exclude_domains"] = exclude_domains
+ if language is not None:
+ kwargs["language"] = language
+ if include_answer is not None:
+ kwargs["include_answer"] = include_answer
+ if include_raw_content is not None:
+ kwargs["include_raw_content"] = include_raw_content
+
+ response = await self.client.search(**kwargs)
return [
TavilySearchResult(title=result["title"], url=result["url"])
@@ -174,7 +231,12 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
success=True, documents=[document], resultLabel="web_search_results"
)
- async def _crawl(self, urls: list) -> list[TavilyCrawlResult]:
+ async def _crawl(
+ self,
+ urls: list,
+ extract_depth: str | None = None,
+ format: str | None = None,
+ ) -> list[TavilyCrawlResult]:
"""Calls the Tavily API to extract text content from URLs with retry logic."""
import asyncio
@@ -185,8 +247,13 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
for attempt in range(max_retries + 1):
try:
# Use asyncio.wait_for for timeout
+ # Build kwargs for extract
+ kwargs_extract: dict = {"urls": urls}
+ kwargs_extract["extract_depth"] = extract_depth or "advanced"
+ kwargs_extract["format"] = format or "text"
+
response = await asyncio.wait_for(
- self.client.extract(urls=urls, extract_depth="advanced", format="text"),
+ self.client.extract(**kwargs_extract),
timeout=timeout
)
diff --git a/modules/interfaces/interfaceWebModel.py b/modules/interfaces/interfaceWebModel.py
index 26a16560..4f030e4e 100644
--- a/modules/interfaces/interfaceWebModel.py
+++ b/modules/interfaces/interfaceWebModel.py
@@ -3,7 +3,7 @@
from abc import ABC, abstractmethod
from modules.interfaces.interfaceChatModel import ActionDocument, ActionResult
from pydantic import BaseModel, Field, HttpUrl
-from typing import List
+from typing import List, Optional, Literal
from modules.shared.configuration import APP_CONFIG
@@ -31,6 +31,17 @@ def get_web_search_min_results() -> int:
class WebSearchRequest(BaseModel):
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results())
+ # Tavily tuning options
+ search_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
+ time_range: Optional[Literal["d", "w", "m", "y"]] = Field(
+ default=None, description="Limit results to last day/week/month/year"
+ )
+ topic: Optional[Literal["general", "news", "academic"]] = Field(default=None)
+ include_domains: Optional[List[str]] = Field(default=None)
+ exclude_domains: Optional[List[str]] = Field(default=None)
+ language: Optional[str] = Field(default=None, description="ISO language code like 'en', 'de'")
+ include_answer: Optional[bool] = Field(default=None)
+ include_raw_content: Optional[bool] = Field(default=None)
class WebSearchResultItem(BaseModel):
@@ -68,6 +79,9 @@ class WebSearchBase(ABC):
class WebCrawlRequest(BaseModel):
urls: List[HttpUrl]
+ # Tavily extract options
+ extract_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
+ format: Optional[Literal["text", "markdown"]] = Field(default=None)
class WebCrawlResultItem(BaseModel):
@@ -108,6 +122,18 @@ class WebCrawlBase(ABC):
class WebScrapeRequest(BaseModel):
query: str = Field(min_length=1, max_length=get_web_search_max_query_length())
max_results: int = Field(ge=get_web_search_min_results(), le=get_web_search_max_results())
+ # Pass-through search options
+ search_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
+ time_range: Optional[Literal["d", "w", "m", "y"]] = Field(default=None)
+ topic: Optional[Literal["general", "news", "academic"]] = Field(default=None)
+ include_domains: Optional[List[str]] = Field(default=None)
+ exclude_domains: Optional[List[str]] = Field(default=None)
+ language: Optional[str] = Field(default=None)
+ include_answer: Optional[bool] = Field(default=None)
+ include_raw_content: Optional[bool] = Field(default=None)
+ # Extract options
+ extract_depth: Optional[Literal["basic", "advanced"]] = Field(default=None)
+ format: Optional[Literal["text", "markdown"]] = Field(default=None)
class WebScrapeResultItem(BaseModel):
diff --git a/modules/methods/methodAi.py b/modules/methods/methodAi.py
index eda36f69..f947db83 100644
--- a/modules/methods/methodAi.py
+++ b/modules/methods/methodAi.py
@@ -41,6 +41,8 @@ class MethodAi(MethodBase):
try:
aiPrompt = parameters.get("aiPrompt")
documentList = parameters.get("documentList", [])
+ if isinstance(documentList, str):
+ documentList = [documentList]
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
processingMode = parameters.get("processingMode", "basic")
includeMetadata = parameters.get("includeMetadata", True)
@@ -171,10 +173,43 @@ class MethodAi(MethodBase):
if context:
logger.info(f"Including context from {len(documentList)} documents")
+ # Encourage longer, structured outputs with a min-length hint
+ min_tokens_hint = "\n\nPlease ensure the response is substantial and complete."
+ call_prompt = enhanced_prompt + min_tokens_hint
+
if processingMode in ["advanced", "detailed"]:
- result = await self.service.callAiTextAdvanced(enhanced_prompt, context)
+ result = await self.service.callAiTextAdvanced(call_prompt, context)
else:
- result = await self.service.callAiTextBasic(enhanced_prompt, context)
+ result = await self.service.callAiTextBasic(call_prompt, context)
+
+ # If expected JSON and too short/not JSON, retry with stricter JSON guardrails
+ if output_extension == ".json":
+ import json
+ cleaned = (result or "").strip()
+ if cleaned.startswith('```json'):
+ cleaned = cleaned[7:]
+ if cleaned.endswith('```'):
+ cleaned = cleaned[:-3]
+ cleaned = cleaned.strip()
+ needs_retry = False
+ try:
+ parsed = json.loads(cleaned)
+ # Heuristic: small dict -> possibly underfilled
+ if isinstance(parsed, dict) and len(parsed.keys()) <= 2:
+ needs_retry = True
+ except Exception:
+ needs_retry = True
+
+ if needs_retry:
+ guardrail_prompt = (
+ enhanced_prompt
+ + "\n\nCRITICAL: Return ONLY valid JSON, no markdown, no code fences. "
+ "Include all requested fields with detailed content."
+ )
+ try:
+ result = await self.service.callAiTextAdvanced(guardrail_prompt, context)
+ except Exception:
+ result = cleaned # fallback to first attempt
# Create result document
fileName = f"ai_{processingMode}_{self._format_timestamp_for_filename()}{output_extension}"
diff --git a/modules/methods/methodDocument.py b/modules/methods/methodDocument.py
index 8cd3ac1c..54f45cb9 100644
--- a/modules/methods/methodDocument.py
+++ b/modules/methods/methodDocument.py
@@ -5,7 +5,6 @@ Handles document operations using the document service.
import logging
import os
-import re
from typing import Dict, Any, List, Optional
from datetime import datetime, UTC
@@ -34,13 +33,15 @@ class MethodDocument(MethodBase):
Extract content from any document using AI prompt.
Parameters:
- documentList (str): Document list reference
+ documentList (list): Document list reference(s)
aiPrompt (str): AI prompt for extraction
expectedDocumentFormats (list, optional): Output formats
includeMetadata (bool, optional): Include metadata (default: True)
"""
try:
documentList = parameters.get("documentList")
+ if isinstance(documentList, str):
+ documentList = [documentList]
aiPrompt = parameters.get("aiPrompt")
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
includeMetadata = parameters.get("includeMetadata", True)
@@ -188,6 +189,8 @@ class MethodDocument(MethodBase):
"""
try:
document_list = parameters.get("documentList", [])
+ if isinstance(document_list, str):
+ document_list = [document_list]
expected_document_formats = parameters.get("expectedDocumentFormats", [])
original_documents = parameters.get("originalDocuments", [])
include_metadata = parameters.get("includeMetadata", True)
@@ -606,13 +609,15 @@ class MethodDocument(MethodBase):
Generate HTML report from multiple documents using AI.
Parameters:
- documentList (str): Document list reference
+ documentList (list): Document list reference(s)
prompt (str): AI prompt for report generation
title (str, optional): Report title (default: "Summary Report")
includeMetadata (bool, optional): Include metadata (default: True)
"""
try:
documentList = parameters.get("documentList")
+ if isinstance(documentList, str):
+ documentList = [documentList]
prompt = parameters.get("prompt")
title = parameters.get("title", "Summary Report")
includeMetadata = parameters.get("includeMetadata", True)
@@ -708,13 +713,9 @@ class MethodDocument(MethodBase):
logger.info(f" Skipping document with no readable text content")
if not validDocuments:
- # If no valid documents, create a simple report
- html = ["" + title + ""]
- html.append(f"{title}
")
- html.append(f"Generated: {int(get_utc_timestamp())}
")
- html.append("No content available in the provided documents.
")
- html.append("")
- return '\n'.join(html)
+ # No readable content; return a minimal valid HTML document
+ timestamp = int(get_utc_timestamp())
+ return f"{title}{title}
Keine auswertbaren Inhalte gefunden.
Generated: {timestamp}
"
# Create AI prompt for comprehensive report generation using user's prompt
combinedContent = "\n\n".join(allContent)
@@ -723,25 +724,34 @@ class MethodDocument(MethodBase):
Report Title: {title}
-Additional Requirements:
-1. Create a professional, well-formatted HTML report
-2. Include an executive summary at the beginning
-3. Organize information logically with clear sections
-4. Highlight key findings and insights
-5. Include relevant data, statistics, and conclusions
-6. Use proper HTML formatting with headers, lists, and styling
-7. Make it readable and professional
+OUTPUT POLICY:
+- Return ONLY a complete, raw HTML document.
+- Start with:
+- Must include: , (with and ), and .
+- The response must be valid, self-contained HTML suitable for saving as .html.
-Document Content:
----START OF DOCUMENT CONTENT-----------------------------------------------
+Structure:
+- Title and short subtitle
+- Executive summary
+- Sections with clear headings
+- Use tables for structured data when helpful
+- Key findings and recommendations
+- Generation date and number of documents
+
+Quality and design requirements:
+- Use clear, professional, and accessible styling in a