From c2d3877b1e92751e4baf337b49602bd508a5f505 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Thu, 4 Sep 2025 16:46:56 +0200
Subject: [PATCH] fixes sharepoint search
---
modules/chat/handling/promptFactory.py | 118 ++++++++-
modules/methods/methodSharepoint.py | 319 ++++++++++++++++++++++---
modules/routes/routeSecurityGoogle.py | 29 ++-
modules/security/tokenManager.py | 3 +-
4 files changed, 423 insertions(+), 46 deletions(-)
diff --git a/modules/chat/handling/promptFactory.py b/modules/chat/handling/promptFactory.py
index 9faa06b3..640aebba 100644
--- a/modules/chat/handling/promptFactory.py
+++ b/modules/chat/handling/promptFactory.py
@@ -11,6 +11,85 @@ logger = logging.getLogger(__name__)
# Prompt creation helpers extracted from managerChat.py
+def _getPreviousRoundContext(service, workflow) -> str:
+ """Get context from previous workflow rounds to help understand follow-up prompts"""
+ try:
+ if not workflow or not hasattr(workflow, 'messages') or not workflow.messages:
+ return ""
+
+ # Get current round number
+ current_round = getattr(workflow, 'currentRound', 0)
+
+ # If this is round 0 or 1, there's no previous context
+ if current_round <= 1:
+ return ""
+
+ # Find messages from previous rounds (rounds before current)
+ previous_messages = []
+ for message in workflow.messages:
+ message_round = getattr(message, 'roundNumber', 0)
+ if message_round > 0 and message_round < current_round:
+ previous_messages.append(message)
+
+ if not previous_messages:
+ return ""
+
+ # Sort by round number and sequence to get chronological order
+ previous_messages.sort(key=lambda msg: (getattr(msg, 'roundNumber', 0), getattr(msg, 'sequenceNr', 0)))
+
+ # Build context summary
+ context_parts = []
+ current_round_context = {}
+
+ for message in previous_messages:
+ round_num = getattr(message, 'roundNumber', 0)
+ if round_num not in current_round_context:
+ current_round_context[round_num] = {
+ 'user_inputs': [],
+ 'assistant_responses': [],
+ 'task_outcomes': [],
+ 'documents_processed': []
+ }
+
+ # Categorize messages
+ if message.role == 'user':
+ current_round_context[round_num]['user_inputs'].append(message.message)
+ elif message.role == 'assistant':
+ # Check if it's a task completion or error message
+ if 'task' in message.message.lower() and ('completed' in message.message.lower() or 'failed' in message.message.lower() or 'error' in message.message.lower()):
+ current_round_context[round_num]['task_outcomes'].append(message.message)
+ else:
+ current_round_context[round_num]['assistant_responses'].append(message.message)
+
+ # Check for document processing
+ if hasattr(message, 'documents') and message.documents:
+ doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')]
+ if doc_names:
+ current_round_context[round_num]['documents_processed'].extend(doc_names)
+
+ # Build context summary
+ for round_num in sorted(current_round_context.keys()):
+ round_data = current_round_context[round_num]
+ context_parts.append(f"ROUND {round_num} CONTEXT:")
+
+ if round_data['user_inputs']:
+ context_parts.append(f" User requests: {'; '.join(round_data['user_inputs'])}")
+
+ if round_data['task_outcomes']:
+ context_parts.append(f" Task outcomes: {'; '.join(round_data['task_outcomes'])}")
+
+ if round_data['documents_processed']:
+ context_parts.append(f" Documents processed: {', '.join(set(round_data['documents_processed']))}")
+
+ if context_parts:
+ return "\n".join(context_parts)
+ else:
+ return ""
+
+ except Exception as e:
+ logger.error(f"Error getting previous round context: {str(e)}")
+ return ""
+
def createTaskPlanningPrompt(context: TaskContext, service) -> str:
"""Create enhanced prompt for task planning with user-friendly message generation and language detection"""
# Get user language directly from service.user.language
@@ -22,21 +101,29 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str:
# Extract available documents from context - use Pydantic model directly
available_documents = context.available_documents or "No documents available"
+ # Get previous workflow round context for better understanding of follow-up prompts
+ previous_round_context = _getPreviousRoundContext(service, context.workflow)
+
return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages.
USER REQUEST: {user_request}
AVAILABLE DOCUMENTS: {available_documents}
+PREVIOUS WORKFLOW ROUNDS CONTEXT:
+{previous_round_context if previous_round_context else "No previous workflow rounds - this is the first round."}
+
INSTRUCTIONS:
-1. Analyze the user request and available documents
-2. Group related topics and sequential steps into single, comprehensive tasks
-3. Focus on business outcomes, not technical operations
-4. Each task should produce meaningful, usable outputs
-5. Ensure proper handover between tasks using result labels
-6. Detect the language of the user request and include it in languageUserDetected
-7. Generate user-friendly messages for each task in the user's request language
-8. Return a JSON object with the exact structure shown below
+1. Analyze the user request, available documents, and previous workflow rounds context
+2. If the user request appears to be a follow-up (like "try again", "versuche es nochmals", "retry", etc.),
+ use the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what the user wants to retry or continue
+3. Group related topics and sequential steps into single, comprehensive tasks
+4. Focus on business outcomes, not technical operations
+5. Each task should produce meaningful, usable outputs
+6. Ensure proper handover between tasks using result labels
+7. Detect the language of the user request and include it in languageUserDetected
+8. Generate user-friendly messages for each task in the user's request language
+9. Return a JSON object with the exact structure shown below
TASK GROUPING PRINCIPLES:
- COMBINE RELATED TOPICS: Group related subjects, sequential steps, or workflow-structured activities into single tasks
@@ -67,6 +154,21 @@ TASK PLANNING PRINCIPLES:
- Group related activities to minimize task fragmentation
- Only create multiple tasks when dealing with truly different, independent objectives
+FOLLOW-UP PROMPT HANDLING:
+- If the user request is a follow-up (e.g., "try again", "versuche es nochmals", "retry", "continue", "proceed"),
+ analyze the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what failed or was incomplete
+- Use the previous round's user requests and task outcomes to determine what the user wants to retry
+- If previous rounds failed due to missing documents, and documents are now available,
+ create tasks that use the newly available documents to accomplish the original request
+- Maintain the same business objective from previous rounds but adapt to current available resources
+
+SPECIFIC SCENARIO HANDLING:
+- If previous round failed with "documents missing" error and current round has documents available,
+ the user likely wants to retry the same operation with the newly provided documents
+- Example: Previous round "speichere mir die 3 dokumente im sharepoint unter xxx" failed due to missing documents,
+ current round "versuche es nochmals" with documents should retry the SharePoint save operation
+- Always check if the current request is a retry by looking for retry keywords and previous round context
+
REQUIRED JSON STRUCTURE:
{{
"overview": "Brief description of the overall plan",
diff --git a/modules/methods/methodSharepoint.py b/modules/methods/methodSharepoint.py
index a8257bc3..4312bf58 100644
--- a/modules/methods/methodSharepoint.py
+++ b/modules/methods/methodSharepoint.py
@@ -117,6 +117,52 @@ class MethodSharepoint(MethodBase):
logger.error(f"Error discovering SharePoint sites: {str(e)}")
return []
+ def _filter_sites_by_hint(self, sites: List[Dict[str, Any]], site_hint: str) -> List[Dict[str, Any]]:
+ """Filter discovered sites by a human-entered site hint (case-insensitive substring)."""
+ try:
+ if not site_hint:
+ return sites
+ hint = site_hint.strip().lower()
+ filtered: List[Dict[str, Any]] = []
+ for site in sites:
+ name = (site.get("displayName") or "").lower()
+ web_url = (site.get("webUrl") or "").lower()
+ if hint in name or hint in web_url:
+ filtered.append(site)
+ return filtered if filtered else sites
+ except Exception as e:
+ logger.error(f"Error filtering sites by hint '{site_hint}': {str(e)}")
+ return sites
+
+
+ def _parse_site_scoped_path(self, path_query: str) -> Optional[Dict[str, str]]:
+ """
+ Parse a site-scoped path of the form:
+ /site:KM LayerFinance/Documents/Work or /site:KM LayerFinance/Shared Documents/Work
+
+ Returns dict with keys: siteName, innerPath (no leading slash) on success, else None.
+ """
+ try:
+ if not path_query or not path_query.startswith('/'):
+ return None
+ # expected syntax prefix
+ prefix = '/site:'
+ if not path_query.startswith(prefix):
+ return None
+ remainder = path_query[len(prefix):]
+ # split once on the next '/'
+ if '/' not in remainder:
+ return None
+ site_name, inner = remainder.split('/', 1)
+ site_name = site_name.strip()
+ inner_path = inner.strip()
+ if not site_name or not inner_path:
+ return None
+ return {"siteName": site_name, "innerPath": inner_path}
+ except Exception as e:
+ logger.error(f"Error parsing site-scoped path '{path_query}': {str(e)}")
+ return None
+
def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]:
"""
Parse searchQuery to extract path, search terms, search type, and search options.
@@ -141,13 +187,48 @@ class MethodSharepoint(MethodBase):
searchQuery = searchQuery.strip()
searchOptions = {}
-
- # Check for search type specification (files:, folders:, all:)
+
+ # Check for search type specification (files:, folders:, all:) FIRST
searchType = "all" # Default
if searchQuery.startswith(("files:", "folders:", "all:")):
type_parts = searchQuery.split(':', 1)
searchType = type_parts[0].strip()
searchQuery = type_parts[1].strip()
+
+ # Extract optional site hint tokens: support "site=Name" or leading "site:Name"
+ def _extract_site_hint(q: str) -> tuple[str, Optional[str]]:
+ try:
+ q_strip = q.strip()
+ # Leading form: site:KM LayerFinance ...
+ if q_strip.lower().startswith("site:"):
+ after = q_strip[5:].lstrip()
+ # site name until next space or end
+ if ' ' in after:
+ site_name, rest = after.split(' ', 1)
+ else:
+ site_name, rest = after, ''
+ return rest.strip(), site_name.strip()
+ # Inline key=value form anywhere
+ m = re.search(r"\bsite=([^;\s]+)", q_strip, flags=re.IGNORECASE)
+ if m:
+ site_name = m.group(1).strip()
+ # remove the token from query
+ q_new = re.sub(r"\bsite=[^;\s]+;?", "", q_strip, flags=re.IGNORECASE).strip()
+ return q_new, site_name
+ except Exception:
+ pass
+ return q, None
+
+ searchQuery, extracted_site = _extract_site_hint(searchQuery)
+ if extracted_site:
+ searchOptions["site_hint"] = extracted_site
+ logger.info(f"Extracted site hint: '{extracted_site}'")
+
+ # Extract name="..." if present (for quoted multi-word names)
+ name_match = re.search(r"name=\"([^\"]+)\"", searchQuery)
+ if name_match:
+ searchQuery = name_match.group(1)
+ logger.info(f"Extracted name from quotes: '{searchQuery}'")
# Check for search mode specification (exact:, regex:, case:, and:)
if searchQuery.startswith(("exact:", "regex:", "case:", "and:")):
@@ -187,6 +268,7 @@ class MethodSharepoint(MethodBase):
else:
fileQuery = search_part
+ # Use search_part as fileQuery (name extraction already handled above)
return pathQuery, fileQuery, searchType, searchOptions
# No colon - check if it looks like a path
@@ -349,6 +431,7 @@ class MethodSharepoint(MethodBase):
Parameters:
connectionReference (str): Reference to the Microsoft connection
+ site (str, optional): SharePoint site name or hint to search within (e.g., "SSS", "KM XYZ"). If not provided, searches all accessible sites
searchQuery (str): [path:][type:][mode:]query - Enhanced search syntax:
- "budget", "/Documents:budget", "files:budget", "folders:DELTA", "*"
- "exact:\"Operations 2025\"" - exact phrase matching
@@ -356,7 +439,11 @@ class MethodSharepoint(MethodBase):
- "case:DELTA" - case-sensitive search
- "and:DELTA AND 2025 Mars AND Group" - all terms must be present
- "folders:and:DELTA AND 2025 Mars AND Group" - combined options
- Note: For storage locations, use "folders:" prefix. All search terms must be present by default.
+ - Site hint support: "folders:site=KM LayerFinance;name=Work" or "folders:site:KM LayerFinance Work"
+ - For quoted names: "folders:site=KM;name=\"page staten\""
+ - For folder search: words like "part1 part2" will search for folders containing BOTH terms
+ Note: For storage locations, use "folders:" prefix. When searching for folders, multiple words are treated as search terms that must all appear in the folder name or path.
+ Site hints help narrow search to specific SharePoint sites for better accuracy.
resultDocument (str, optional): JSON result document from previous findDocumentPath action to refine search
searchScope (str, optional): Search scope - options: "all" (default), "documents" (files only), "pages" (SharePoint pages only)
maxResults (int, optional): Maximum number of results to return (default: 100)
@@ -364,6 +451,7 @@ class MethodSharepoint(MethodBase):
"""
try:
connectionReference = parameters.get("connectionReference")
+ site = parameters.get("site")
searchQuery = parameters.get("searchQuery", "*")
resultDocument = parameters.get("resultDocument")
searchScope = parameters.get("searchScope", "all")
@@ -415,6 +503,13 @@ class MethodSharepoint(MethodBase):
if not sites:
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+ # Filter sites by site parameter if provided
+ if site:
+ sites = self._filter_sites_by_hint(sites, site)
+ logger.info(f"Filtered sites by site parameter: '{site}' -> {len(sites)} sites")
+ if not sites:
+ return ActionResult.isFailure(error=f"No SharePoint sites found matching '{site}'")
+
# Resolve path query into search paths
search_paths = self._resolvePathQuery(pathQuery)
@@ -423,80 +518,206 @@ class MethodSharepoint(MethodBase):
found_documents = []
all_sites_searched = []
- for site in sites:
+ # Apply site hint filtering if provided in search options
+ site_scoped_sites = sites
+ strict_folder_name: Optional[str] = None
+
+ # First check for explicit site hint in search options
+ if searchOptions.get("site_hint"):
+ site_scoped_sites = self._filter_sites_by_hint(sites, searchOptions["site_hint"])
+ logger.info(f"Filtered sites by explicit site hint: '{searchOptions['site_hint']}' -> {len(site_scoped_sites)} sites")
+
+ # Heuristic: if user searched for folders with pattern " ",
+ # prefer filtering sites by the first token(s) and match folder name exactly for the last token
+ elif searchType == "folders" and fileQuery and ' ' in fileQuery and not searchOptions.get("regex_match"):
+ # treat last token as folder name, preceding tokens combined as site hint
+ tokens = [t for t in fileQuery.split(' ') if t]
+ if len(tokens) >= 2:
+ strict_folder_name = tokens[-1]
+ site_hint = ' '.join(tokens[:-1])
+ site_scoped_sites = self._filter_sites_by_hint(sites, site_hint)
+ logger.info(f"Filtered sites by heuristic site hint: '{site_hint}' -> {len(site_scoped_sites)} sites")
+
+ for site in site_scoped_sites:
site_id = site["id"]
site_name = site["displayName"]
site_url = site["webUrl"]
logger.info(f"Searching in site: {site_name} ({site_url})")
- # Use Microsoft Graph search API for this specific site
+ # Use Microsoft Graph API for this specific site
# Handle empty or wildcard queries
if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
# For wildcard/empty queries, list all items in the drive
endpoint = f"sites/{site_id}/drive/root/children"
else:
- # For specific queries, use search API
- search_query = fileQuery.replace("'", "''") # Escape single quotes for OData
- endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
-
- # Make the search API call
- search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
-
- if "error" in search_result:
- logger.warning(f"Search failed for site {site_name}: {search_result['error']}")
- continue
-
- # Process search results for this site
- items = search_result.get("value", [])
+ # For specific queries, use different approaches based on search type
+ if searchType == "folders":
+ # Use Microsoft Graph unified search endpoint: POST /search/query
+ # Scope by all drives in the site (e.g., Shared Documents, Documents, language variants)
+ try:
+ import json
+ # Discover drives for the site to build precise path scopes
+ drives_resp = await self._makeGraphApiCall(connection["accessToken"], f"sites/{site_id}/drives")
+ path_filters = []
+ if not ("error" in drives_resp):
+ for drv in (drives_resp.get("value", []) or []):
+ web_url = (drv.get("webUrl") or "").rstrip('/') + '/'
+ if web_url:
+ # path:"/"
+ path_filters.append(f"path:\"{web_url}\"")
+ if not path_filters:
+ # fallback to site root if no drives found
+ scoped_path = site_url.rstrip('/') + '/'
+ path_filters = [f"path:\"{scoped_path}\""]
+
+ # Use KQL syntax for folder search
+ terms = [t for t in fileQuery.split() if t.strip()]
+ if len(terms) > 1:
+ # Multiple terms: first search for folders containing ANY of the terms (OR)
+ # This broadens the search to catch all potential matches
+ name_terms = " OR ".join([f"foldername:*{t}*" for t in terms])
+ name_filter = f"({name_terms})"
+ else:
+ # Single term: search for folders containing the term
+ single_term = terms[0] if terms else fileQuery
+ name_filter = f"foldername:*{single_term}*"
+
+ # Use KQL syntax with isFolder:true
+ query_string = f"isFolder:true AND {name_filter}"
+ logger.info(f"Using KQL query: {query_string}")
+
+ payload = {
+ "requests": [
+ {
+ "entityTypes": ["driveItem"],
+ "query": {"queryString": query_string},
+ "from": 0,
+ "size": 50
+ }
+ ]
+ }
+ logger.info(f"Using unified search API for folders with queryString: {query_string}")
+ logger.info(f"Payload: {json.dumps(payload, indent=2)}")
+ unified_result = await self._makeGraphApiCall(
+ connection["accessToken"],
+ "search/query",
+ method="POST",
+ data=json.dumps(payload).encode("utf-8")
+ )
+ logger.info(f"Unified search response: {json.dumps(unified_result, indent=2)}")
+ if "error" in unified_result:
+ logger.warning(f"Unified search failed for site {site_name}: {unified_result['error']}")
+ items = []
+ else:
+ # Flatten hits -> driveItem resources
+ items = []
+ for container in (unified_result.get("value", []) or []):
+ for hits_container in (container.get("hitsContainers", []) or []):
+ for hit in (hits_container.get("hits", []) or []):
+ resource = hit.get("resource")
+ if resource:
+ items.append(resource)
+ logger.info(f"Unified search returned {len(items)} items (pre-filter)")
+
+ # Post-filter: For multiple terms, filter results to only include folders that contain ALL terms
+ if len(terms) > 1:
+ filtered_items = []
+ for item in items:
+ folder_name = item.get("name", "").lower()
+ # Check if folder name contains ALL search terms
+ if all(term.lower() in folder_name for term in terms):
+ filtered_items.append(item)
+ items = filtered_items
+ logger.info(f"Post-filtered to {len(items)} items containing all terms: {terms}")
+
+ except Exception as e:
+ logger.error(f"Error performing unified folder search: {str(e)}")
+ items = []
+ else:
+ # For files, use regular search API
+ search_query = fileQuery.replace("'", "''") # Escape single quotes for OData
+ endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
+ logger.info(f"Using search API for files with query: '{search_query}'")
+
+ # Make the search API call (files)
+ search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
+ if "error" in search_result:
+ logger.warning(f"Search failed for site {site_name}: {search_result['error']}")
+ continue
+ # Process search results for this site (files)
+ items = search_result.get("value", [])
+ logger.info(f"Retrieved {len(items)} items from site {site_name}")
site_documents = []
for item in items:
+ item_name = item.get("name", "")
+ item_type = "folder" if "folder" in item else "file"
+ item_path = item.get("parentReference", {}).get("path", "")
+ logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")
+
# Filter by search scope if specified
if searchScope == "documents" and "folder" in item:
+ logger.debug(f"Skipping folder '{item_name}' due to documents scope")
continue
elif searchScope == "pages" and "file" in item and not item["file"].get("mimeType", "").startswith("text/html"):
+ logger.debug(f"Skipping file '{item_name}' due to pages scope")
continue
# Filter by search type (files, folders, all)
if searchType == "files" and "folder" in item:
+ logger.debug(f"Skipping folder '{item_name}' due to files search type")
continue
elif searchType == "folders" and "file" in item:
+ logger.debug(f"Skipping file '{item_name}' due to folders search type")
continue
# Enhanced post-filtering based on search options
- item_name = item.get("name", "")
- if fileQuery != "*" and fileQuery.strip():
+ if fileQuery != "*" and fileQuery.strip() and searchType != "folders":
+ # For non-folder searches, apply name filtering
+ # (Folder searches are already filtered by the recursive search)
+ search_target = item_name
+
# Apply different filtering based on search options
if searchOptions.get("exact_match"):
# Exact phrase matching
if searchOptions.get("case_sensitive"):
- if fileQuery not in item_name:
+ if fileQuery not in search_target:
continue
else:
- if fileQuery.lower() not in item_name.lower():
+ if fileQuery.lower() not in search_target.lower():
continue
elif searchOptions.get("regex_match"):
# Regex pattern matching
import re
flags = 0 if searchOptions.get("case_sensitive") else re.IGNORECASE
- if not re.search(fileQuery, item_name, flags):
+ if not re.search(fileQuery, search_target, flags):
continue
elif searchOptions.get("and_terms"):
# AND terms mode: Split by " AND " and ensure ALL terms are present
- search_name = item_name.lower() if not searchOptions.get("case_sensitive") else item_name
+ search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target
and_terms = [term.strip() for term in fileQuery.split(" AND ") if term.strip()]
and_terms = [term.lower() if not searchOptions.get("case_sensitive") else term for term in and_terms]
if not all(term in search_name for term in and_terms):
continue # Skip this item if not all AND terms match
else:
# Default: ALL search terms must be present (space-separated)
- search_name = item_name.lower() if not searchOptions.get("case_sensitive") else item_name
+ search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target
search_terms = [term.strip().lower() if not searchOptions.get("case_sensitive") else term.strip()
for term in fileQuery.split() if term.strip()]
if not all(term in search_name for term in search_terms):
continue # Skip this item if not all terms match
+ # If strict folder name requested, enforce exact (case-insensitive) match on folders
+ if strict_folder_name:
+ item_is_folder = "folder" in item
+ item_name_ci = (item.get("name") or "").strip().lower()
+ if item_is_folder and item_name_ci != strict_folder_name.lower():
+ logger.debug(f"Skipping folder '{item_name}' - doesn't match strict name '{strict_folder_name}'")
+ continue
+
+ logger.debug(f"Item '{item_name}' passed all filters - adding to results")
+
# Create minimal result with only essential reference information
doc_info = {
"id": item.get("id"),
@@ -804,15 +1025,17 @@ class MethodSharepoint(MethodBase):
Parameters:
connectionReference (str): Reference to the Microsoft connection
- pathQuery (str): Path query where to upload documents (e.g., "/Documents/Project1", "*" for default location)
+ sitePath (str): REQUIRED - Specific SharePoint path where to upload documents. Must be a valid SharePoint path format:
+ - For direct upload: "/site://" (e.g., "/site:KM XYZ/Documents/Work")
+ - If user provides words like "word1 word2", the system MUST call findDocumentPath first to locate the actual folder path, the result then to give to this parameter
documentList (str): Reference to the document list to upload
fileNames (List[str]): List of names for the uploaded files
- resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to pathQuery)
+ resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to sitePath)
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
"""
try:
connectionReference = parameters.get("connectionReference")
- pathQuery = parameters.get("pathQuery", "/Documents")
+ sitePath = parameters.get("sitePath", "/Documents")
documentList = parameters.get("documentList")
fileNames = parameters.get("fileNames")
resultDocument = parameters.get("resultDocument")
@@ -847,9 +1070,9 @@ class MethodSharepoint(MethodBase):
folder_ids.append(doc.get("id"))
if folder_ids:
- # Use the first folder ID found as pathQuery
- pathQuery = folder_ids[0]
- logger.info(f"Using folder ID from resultDocument: {pathQuery}")
+ # Use the first folder ID found as sitePath
+ sitePath = folder_ids[0]
+ logger.info(f"Using folder ID from resultDocument: {sitePath}")
else:
return ActionResult.isFailure(error="No folders found in resultDocument")
@@ -874,9 +1097,35 @@ class MethodSharepoint(MethodBase):
sites = await self._discoverSharePointSites(connection["accessToken"])
if not sites:
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
-
- # Resolve path query into upload paths
- upload_paths = self._resolvePathQuery(pathQuery)
+
+ # Enforce site-scoped path usage when using sitePath directly (without resultDocument)
+ upload_site_scope = None
+ if not resultDocument:
+ if not sitePath or not sitePath.startswith('/'):
+ return ActionResult.isFailure(error="sitePath must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work")
+
+ # Check if sitePath contains search terms (words without proper path structure)
+ if not sitePath.startswith('/site:') and not sitePath.startswith('/Documents') and not sitePath.startswith('/Shared Documents'):
+ # This looks like search terms, not a valid path
+ return ActionResult.isFailure(error=f"Invalid sitePath '{sitePath}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as sitePath.")
+
+ parsed = self._parse_site_scoped_path(sitePath)
+ if not parsed:
+ return ActionResult.isFailure(error="Invalid sitePath. Use /site:/")
+ # find matching site
+ candidate_sites = self._filter_sites_by_hint(sites, parsed["siteName"]) # substring match
+ # choose exact displayName match if available
+ exact = [s for s in candidate_sites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()]
+ selected_site = exact[0] if exact else (candidate_sites[0] if candidate_sites else None)
+ if not selected_site:
+ return ActionResult.isFailure(error=f"SharePoint site '{parsed['siteName']}' not found or not accessible")
+ upload_site_scope = selected_site
+ # Use the inner path portion as the actual upload target path
+ upload_paths = [f"/{parsed['innerPath'].lstrip('/')}"]
+ sites = [selected_site]
+ else:
+ # Resolve path query into upload paths (fallback behavior when using resultDocument)
+ upload_paths = self._resolvePathQuery(sitePath)
# Process each document upload
upload_results = []
@@ -963,7 +1212,7 @@ class MethodSharepoint(MethodBase):
# Create result data
result_data = {
"connectionReference": connectionReference,
- "pathQuery": pathQuery,
+ "sitePath": sitePath,
"documentList": documentList,
"fileNames": fileNames,
"sitesAvailable": len(sites),
diff --git a/modules/routes/routeSecurityGoogle.py b/modules/routes/routeSecurityGoogle.py
index 33f77c49..d3921b62 100644
--- a/modules/routes/routeSecurityGoogle.py
+++ b/modules/routes/routeSecurityGoogle.py
@@ -120,7 +120,7 @@ async def login(
access_type="offline",
include_granted_scopes="true",
state=state_param,
- prompt="select_account"
+ prompt="consent select_account"
)
logger.info(f"Generated Google OAuth URL using OAuth2Session: {auth_url}")
@@ -166,6 +166,33 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
"token_type": token_data.get("token_type", "bearer"),
"expires_in": token_data.get("expires_in", 0)
}
+
+ # If Google did not return a refresh_token, try to reuse an existing one for this user/connection
+ if not token_response.get("refresh_token"):
+ try:
+ rootInterface = getRootInterface()
+ # Prefer connection flow reuse; fallback to user access token
+ if connection_id:
+ existing_tokens = rootInterface.db.getRecordset("tokens", recordFilter={
+ "connectionId": connection_id,
+ "authority": AuthAuthority.GOOGLE
+ })
+ if existing_tokens:
+ # Use most recent by createdAt
+ existing_tokens.sort(key=lambda x: x.get("createdAt", 0), reverse=True)
+ token_response["refresh_token"] = existing_tokens[0].get("tokenRefresh", "")
+ if not token_response.get("refresh_token") and user_id:
+ existing_access_tokens = rootInterface.db.getRecordset("tokens", recordFilter={
+ "userId": user_id,
+ "connectionId": None,
+ "authority": AuthAuthority.GOOGLE
+ })
+ if existing_access_tokens:
+ existing_access_tokens.sort(key=lambda x: x.get("createdAt", 0), reverse=True)
+ token_response["refresh_token"] = existing_access_tokens[0].get("tokenRefresh", "")
+ except Exception:
+ # Non-fatal; continue without refresh token
+ pass
diff --git a/modules/security/tokenManager.py b/modules/security/tokenManager.py
index c7cf4cf0..ce34433a 100644
--- a/modules/security/tokenManager.py
+++ b/modules/security/tokenManager.py
@@ -98,8 +98,7 @@ class TokenManager:
"client_id": self.google_client_id,
"client_secret": self.google_client_secret,
"grant_type": "refresh_token",
- "refresh_token": refresh_token,
- "scope": "https://www.googleapis.com/auth/gmail.readonly https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email openid"
+ "refresh_token": refresh_token
}
# Make refresh request