From c2d3877b1e92751e4baf337b49602bd508a5f505 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Thu, 4 Sep 2025 16:46:56 +0200 Subject: [PATCH] fixes sharepoint search --- modules/chat/handling/promptFactory.py | 118 ++++++++- modules/methods/methodSharepoint.py | 319 ++++++++++++++++++++++--- modules/routes/routeSecurityGoogle.py | 29 ++- modules/security/tokenManager.py | 3 +- 4 files changed, 423 insertions(+), 46 deletions(-) diff --git a/modules/chat/handling/promptFactory.py b/modules/chat/handling/promptFactory.py index 9faa06b3..640aebba 100644 --- a/modules/chat/handling/promptFactory.py +++ b/modules/chat/handling/promptFactory.py @@ -11,6 +11,85 @@ logger = logging.getLogger(__name__) # Prompt creation helpers extracted from managerChat.py +def _getPreviousRoundContext(service, workflow) -> str: + """Get context from previous workflow rounds to help understand follow-up prompts""" + try: + if not workflow or not hasattr(workflow, 'messages') or not workflow.messages: + return "" + + # Get current round number + current_round = getattr(workflow, 'currentRound', 0) + + # If this is round 0 or 1, there's no previous context + if current_round <= 1: + return "" + + # Find messages from previous rounds (rounds before current) + previous_messages = [] + for message in workflow.messages: + message_round = getattr(message, 'roundNumber', 0) + if message_round > 0 and message_round < current_round: + previous_messages.append(message) + + if not previous_messages: + return "" + + # Sort by round number and sequence to get chronological order + previous_messages.sort(key=lambda msg: (getattr(msg, 'roundNumber', 0), getattr(msg, 'sequenceNr', 0))) + + # Build context summary + context_parts = [] + current_round_context = {} + + for message in previous_messages: + round_num = getattr(message, 'roundNumber', 0) + if round_num not in current_round_context: + current_round_context[round_num] = { + 'user_inputs': [], + 'assistant_responses': [], + 'task_outcomes': [], + 'documents_processed': [] + } + + # Categorize messages + if message.role == 'user': + current_round_context[round_num]['user_inputs'].append(message.message) + elif message.role == 'assistant': + # Check if it's a task completion or error message + if 'task' in message.message.lower() and ('completed' in message.message.lower() or 'failed' in message.message.lower() or 'error' in message.message.lower()): + current_round_context[round_num]['task_outcomes'].append(message.message) + else: + current_round_context[round_num]['assistant_responses'].append(message.message) + + # Check for document processing + if hasattr(message, 'documents') and message.documents: + doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')] + if doc_names: + current_round_context[round_num]['documents_processed'].extend(doc_names) + + # Build context summary + for round_num in sorted(current_round_context.keys()): + round_data = current_round_context[round_num] + context_parts.append(f"ROUND {round_num} CONTEXT:") + + if round_data['user_inputs']: + context_parts.append(f" User requests: {'; '.join(round_data['user_inputs'])}") + + if round_data['task_outcomes']: + context_parts.append(f" Task outcomes: {'; '.join(round_data['task_outcomes'])}") + + if round_data['documents_processed']: + context_parts.append(f" Documents processed: {', '.join(set(round_data['documents_processed']))}") + + if context_parts: + return "\n".join(context_parts) + else: + return "" + + except Exception as e: + logger.error(f"Error getting previous round context: {str(e)}") + return "" + def createTaskPlanningPrompt(context: TaskContext, service) -> str: """Create enhanced prompt for task planning with user-friendly message generation and language detection""" # Get user language directly from service.user.language @@ -22,21 +101,29 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str: # Extract available documents from context - use Pydantic model directly available_documents = context.available_documents or "No documents available" + # Get previous workflow round context for better understanding of follow-up prompts + previous_round_context = _getPreviousRoundContext(service, context.workflow) + return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages. USER REQUEST: {user_request} AVAILABLE DOCUMENTS: {available_documents} +PREVIOUS WORKFLOW ROUNDS CONTEXT: +{previous_round_context if previous_round_context else "No previous workflow rounds - this is the first round."} + INSTRUCTIONS: -1. Analyze the user request and available documents -2. Group related topics and sequential steps into single, comprehensive tasks -3. Focus on business outcomes, not technical operations -4. Each task should produce meaningful, usable outputs -5. Ensure proper handover between tasks using result labels -6. Detect the language of the user request and include it in languageUserDetected -7. Generate user-friendly messages for each task in the user's request language -8. Return a JSON object with the exact structure shown below +1. Analyze the user request, available documents, and previous workflow rounds context +2. If the user request appears to be a follow-up (like "try again", "versuche es nochmals", "retry", etc.), + use the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what the user wants to retry or continue +3. Group related topics and sequential steps into single, comprehensive tasks +4. Focus on business outcomes, not technical operations +5. Each task should produce meaningful, usable outputs +6. Ensure proper handover between tasks using result labels +7. Detect the language of the user request and include it in languageUserDetected +8. Generate user-friendly messages for each task in the user's request language +9. Return a JSON object with the exact structure shown below TASK GROUPING PRINCIPLES: - COMBINE RELATED TOPICS: Group related subjects, sequential steps, or workflow-structured activities into single tasks @@ -67,6 +154,21 @@ TASK PLANNING PRINCIPLES: - Group related activities to minimize task fragmentation - Only create multiple tasks when dealing with truly different, independent objectives +FOLLOW-UP PROMPT HANDLING: +- If the user request is a follow-up (e.g., "try again", "versuche es nochmals", "retry", "continue", "proceed"), + analyze the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what failed or was incomplete +- Use the previous round's user requests and task outcomes to determine what the user wants to retry +- If previous rounds failed due to missing documents, and documents are now available, + create tasks that use the newly available documents to accomplish the original request +- Maintain the same business objective from previous rounds but adapt to current available resources + +SPECIFIC SCENARIO HANDLING: +- If previous round failed with "documents missing" error and current round has documents available, + the user likely wants to retry the same operation with the newly provided documents +- Example: Previous round "speichere mir die 3 dokumente im sharepoint unter xxx" failed due to missing documents, + current round "versuche es nochmals" with documents should retry the SharePoint save operation +- Always check if the current request is a retry by looking for retry keywords and previous round context + REQUIRED JSON STRUCTURE: {{ "overview": "Brief description of the overall plan", diff --git a/modules/methods/methodSharepoint.py b/modules/methods/methodSharepoint.py index a8257bc3..4312bf58 100644 --- a/modules/methods/methodSharepoint.py +++ b/modules/methods/methodSharepoint.py @@ -117,6 +117,52 @@ class MethodSharepoint(MethodBase): logger.error(f"Error discovering SharePoint sites: {str(e)}") return [] + def _filter_sites_by_hint(self, sites: List[Dict[str, Any]], site_hint: str) -> List[Dict[str, Any]]: + """Filter discovered sites by a human-entered site hint (case-insensitive substring).""" + try: + if not site_hint: + return sites + hint = site_hint.strip().lower() + filtered: List[Dict[str, Any]] = [] + for site in sites: + name = (site.get("displayName") or "").lower() + web_url = (site.get("webUrl") or "").lower() + if hint in name or hint in web_url: + filtered.append(site) + return filtered if filtered else sites + except Exception as e: + logger.error(f"Error filtering sites by hint '{site_hint}': {str(e)}") + return sites + + + def _parse_site_scoped_path(self, path_query: str) -> Optional[Dict[str, str]]: + """ + Parse a site-scoped path of the form: + /site:KM LayerFinance/Documents/Work or /site:KM LayerFinance/Shared Documents/Work + + Returns dict with keys: siteName, innerPath (no leading slash) on success, else None. + """ + try: + if not path_query or not path_query.startswith('/'): + return None + # expected syntax prefix + prefix = '/site:' + if not path_query.startswith(prefix): + return None + remainder = path_query[len(prefix):] + # split once on the next '/' + if '/' not in remainder: + return None + site_name, inner = remainder.split('/', 1) + site_name = site_name.strip() + inner_path = inner.strip() + if not site_name or not inner_path: + return None + return {"siteName": site_name, "innerPath": inner_path} + except Exception as e: + logger.error(f"Error parsing site-scoped path '{path_query}': {str(e)}") + return None + def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]: """ Parse searchQuery to extract path, search terms, search type, and search options. @@ -141,13 +187,48 @@ class MethodSharepoint(MethodBase): searchQuery = searchQuery.strip() searchOptions = {} - - # Check for search type specification (files:, folders:, all:) + + # Check for search type specification (files:, folders:, all:) FIRST searchType = "all" # Default if searchQuery.startswith(("files:", "folders:", "all:")): type_parts = searchQuery.split(':', 1) searchType = type_parts[0].strip() searchQuery = type_parts[1].strip() + + # Extract optional site hint tokens: support "site=Name" or leading "site:Name" + def _extract_site_hint(q: str) -> tuple[str, Optional[str]]: + try: + q_strip = q.strip() + # Leading form: site:KM LayerFinance ... + if q_strip.lower().startswith("site:"): + after = q_strip[5:].lstrip() + # site name until next space or end + if ' ' in after: + site_name, rest = after.split(' ', 1) + else: + site_name, rest = after, '' + return rest.strip(), site_name.strip() + # Inline key=value form anywhere + m = re.search(r"\bsite=([^;\s]+)", q_strip, flags=re.IGNORECASE) + if m: + site_name = m.group(1).strip() + # remove the token from query + q_new = re.sub(r"\bsite=[^;\s]+;?", "", q_strip, flags=re.IGNORECASE).strip() + return q_new, site_name + except Exception: + pass + return q, None + + searchQuery, extracted_site = _extract_site_hint(searchQuery) + if extracted_site: + searchOptions["site_hint"] = extracted_site + logger.info(f"Extracted site hint: '{extracted_site}'") + + # Extract name="..." if present (for quoted multi-word names) + name_match = re.search(r"name=\"([^\"]+)\"", searchQuery) + if name_match: + searchQuery = name_match.group(1) + logger.info(f"Extracted name from quotes: '{searchQuery}'") # Check for search mode specification (exact:, regex:, case:, and:) if searchQuery.startswith(("exact:", "regex:", "case:", "and:")): @@ -187,6 +268,7 @@ class MethodSharepoint(MethodBase): else: fileQuery = search_part + # Use search_part as fileQuery (name extraction already handled above) return pathQuery, fileQuery, searchType, searchOptions # No colon - check if it looks like a path @@ -349,6 +431,7 @@ class MethodSharepoint(MethodBase): Parameters: connectionReference (str): Reference to the Microsoft connection + site (str, optional): SharePoint site name or hint to search within (e.g., "SSS", "KM XYZ"). If not provided, searches all accessible sites searchQuery (str): [path:][type:][mode:]query - Enhanced search syntax: - "budget", "/Documents:budget", "files:budget", "folders:DELTA", "*" - "exact:\"Operations 2025\"" - exact phrase matching @@ -356,7 +439,11 @@ class MethodSharepoint(MethodBase): - "case:DELTA" - case-sensitive search - "and:DELTA AND 2025 Mars AND Group" - all terms must be present - "folders:and:DELTA AND 2025 Mars AND Group" - combined options - Note: For storage locations, use "folders:" prefix. All search terms must be present by default. + - Site hint support: "folders:site=KM LayerFinance;name=Work" or "folders:site:KM LayerFinance Work" + - For quoted names: "folders:site=KM;name=\"page staten\"" + - For folder search: words like "part1 part2" will search for folders containing BOTH terms + Note: For storage locations, use "folders:" prefix. When searching for folders, multiple words are treated as search terms that must all appear in the folder name or path. + Site hints help narrow search to specific SharePoint sites for better accuracy. resultDocument (str, optional): JSON result document from previous findDocumentPath action to refine search searchScope (str, optional): Search scope - options: "all" (default), "documents" (files only), "pages" (SharePoint pages only) maxResults (int, optional): Maximum number of results to return (default: 100) @@ -364,6 +451,7 @@ class MethodSharepoint(MethodBase): """ try: connectionReference = parameters.get("connectionReference") + site = parameters.get("site") searchQuery = parameters.get("searchQuery", "*") resultDocument = parameters.get("resultDocument") searchScope = parameters.get("searchScope", "all") @@ -415,6 +503,13 @@ class MethodSharepoint(MethodBase): if not sites: return ActionResult.isFailure(error="No SharePoint sites found or accessible") + # Filter sites by site parameter if provided + if site: + sites = self._filter_sites_by_hint(sites, site) + logger.info(f"Filtered sites by site parameter: '{site}' -> {len(sites)} sites") + if not sites: + return ActionResult.isFailure(error=f"No SharePoint sites found matching '{site}'") + # Resolve path query into search paths search_paths = self._resolvePathQuery(pathQuery) @@ -423,80 +518,206 @@ class MethodSharepoint(MethodBase): found_documents = [] all_sites_searched = [] - for site in sites: + # Apply site hint filtering if provided in search options + site_scoped_sites = sites + strict_folder_name: Optional[str] = None + + # First check for explicit site hint in search options + if searchOptions.get("site_hint"): + site_scoped_sites = self._filter_sites_by_hint(sites, searchOptions["site_hint"]) + logger.info(f"Filtered sites by explicit site hint: '{searchOptions['site_hint']}' -> {len(site_scoped_sites)} sites") + + # Heuristic: if user searched for folders with pattern " ", + # prefer filtering sites by the first token(s) and match folder name exactly for the last token + elif searchType == "folders" and fileQuery and ' ' in fileQuery and not searchOptions.get("regex_match"): + # treat last token as folder name, preceding tokens combined as site hint + tokens = [t for t in fileQuery.split(' ') if t] + if len(tokens) >= 2: + strict_folder_name = tokens[-1] + site_hint = ' '.join(tokens[:-1]) + site_scoped_sites = self._filter_sites_by_hint(sites, site_hint) + logger.info(f"Filtered sites by heuristic site hint: '{site_hint}' -> {len(site_scoped_sites)} sites") + + for site in site_scoped_sites: site_id = site["id"] site_name = site["displayName"] site_url = site["webUrl"] logger.info(f"Searching in site: {site_name} ({site_url})") - # Use Microsoft Graph search API for this specific site + # Use Microsoft Graph API for this specific site # Handle empty or wildcard queries if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*": # For wildcard/empty queries, list all items in the drive endpoint = f"sites/{site_id}/drive/root/children" else: - # For specific queries, use search API - search_query = fileQuery.replace("'", "''") # Escape single quotes for OData - endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')" - - # Make the search API call - search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint) - - if "error" in search_result: - logger.warning(f"Search failed for site {site_name}: {search_result['error']}") - continue - - # Process search results for this site - items = search_result.get("value", []) + # For specific queries, use different approaches based on search type + if searchType == "folders": + # Use Microsoft Graph unified search endpoint: POST /search/query + # Scope by all drives in the site (e.g., Shared Documents, Documents, language variants) + try: + import json + # Discover drives for the site to build precise path scopes + drives_resp = await self._makeGraphApiCall(connection["accessToken"], f"sites/{site_id}/drives") + path_filters = [] + if not ("error" in drives_resp): + for drv in (drives_resp.get("value", []) or []): + web_url = (drv.get("webUrl") or "").rstrip('/') + '/' + if web_url: + # path:"/" + path_filters.append(f"path:\"{web_url}\"") + if not path_filters: + # fallback to site root if no drives found + scoped_path = site_url.rstrip('/') + '/' + path_filters = [f"path:\"{scoped_path}\""] + + # Use KQL syntax for folder search + terms = [t for t in fileQuery.split() if t.strip()] + if len(terms) > 1: + # Multiple terms: first search for folders containing ANY of the terms (OR) + # This broadens the search to catch all potential matches + name_terms = " OR ".join([f"foldername:*{t}*" for t in terms]) + name_filter = f"({name_terms})" + else: + # Single term: search for folders containing the term + single_term = terms[0] if terms else fileQuery + name_filter = f"foldername:*{single_term}*" + + # Use KQL syntax with isFolder:true + query_string = f"isFolder:true AND {name_filter}" + logger.info(f"Using KQL query: {query_string}") + + payload = { + "requests": [ + { + "entityTypes": ["driveItem"], + "query": {"queryString": query_string}, + "from": 0, + "size": 50 + } + ] + } + logger.info(f"Using unified search API for folders with queryString: {query_string}") + logger.info(f"Payload: {json.dumps(payload, indent=2)}") + unified_result = await self._makeGraphApiCall( + connection["accessToken"], + "search/query", + method="POST", + data=json.dumps(payload).encode("utf-8") + ) + logger.info(f"Unified search response: {json.dumps(unified_result, indent=2)}") + if "error" in unified_result: + logger.warning(f"Unified search failed for site {site_name}: {unified_result['error']}") + items = [] + else: + # Flatten hits -> driveItem resources + items = [] + for container in (unified_result.get("value", []) or []): + for hits_container in (container.get("hitsContainers", []) or []): + for hit in (hits_container.get("hits", []) or []): + resource = hit.get("resource") + if resource: + items.append(resource) + logger.info(f"Unified search returned {len(items)} items (pre-filter)") + + # Post-filter: For multiple terms, filter results to only include folders that contain ALL terms + if len(terms) > 1: + filtered_items = [] + for item in items: + folder_name = item.get("name", "").lower() + # Check if folder name contains ALL search terms + if all(term.lower() in folder_name for term in terms): + filtered_items.append(item) + items = filtered_items + logger.info(f"Post-filtered to {len(items)} items containing all terms: {terms}") + + except Exception as e: + logger.error(f"Error performing unified folder search: {str(e)}") + items = [] + else: + # For files, use regular search API + search_query = fileQuery.replace("'", "''") # Escape single quotes for OData + endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')" + logger.info(f"Using search API for files with query: '{search_query}'") + + # Make the search API call (files) + search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint) + if "error" in search_result: + logger.warning(f"Search failed for site {site_name}: {search_result['error']}") + continue + # Process search results for this site (files) + items = search_result.get("value", []) + logger.info(f"Retrieved {len(items)} items from site {site_name}") site_documents = [] for item in items: + item_name = item.get("name", "") + item_type = "folder" if "folder" in item else "file" + item_path = item.get("parentReference", {}).get("path", "") + logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'") + # Filter by search scope if specified if searchScope == "documents" and "folder" in item: + logger.debug(f"Skipping folder '{item_name}' due to documents scope") continue elif searchScope == "pages" and "file" in item and not item["file"].get("mimeType", "").startswith("text/html"): + logger.debug(f"Skipping file '{item_name}' due to pages scope") continue # Filter by search type (files, folders, all) if searchType == "files" and "folder" in item: + logger.debug(f"Skipping folder '{item_name}' due to files search type") continue elif searchType == "folders" and "file" in item: + logger.debug(f"Skipping file '{item_name}' due to folders search type") continue # Enhanced post-filtering based on search options - item_name = item.get("name", "") - if fileQuery != "*" and fileQuery.strip(): + if fileQuery != "*" and fileQuery.strip() and searchType != "folders": + # For non-folder searches, apply name filtering + # (Folder searches are already filtered by the recursive search) + search_target = item_name + # Apply different filtering based on search options if searchOptions.get("exact_match"): # Exact phrase matching if searchOptions.get("case_sensitive"): - if fileQuery not in item_name: + if fileQuery not in search_target: continue else: - if fileQuery.lower() not in item_name.lower(): + if fileQuery.lower() not in search_target.lower(): continue elif searchOptions.get("regex_match"): # Regex pattern matching import re flags = 0 if searchOptions.get("case_sensitive") else re.IGNORECASE - if not re.search(fileQuery, item_name, flags): + if not re.search(fileQuery, search_target, flags): continue elif searchOptions.get("and_terms"): # AND terms mode: Split by " AND " and ensure ALL terms are present - search_name = item_name.lower() if not searchOptions.get("case_sensitive") else item_name + search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target and_terms = [term.strip() for term in fileQuery.split(" AND ") if term.strip()] and_terms = [term.lower() if not searchOptions.get("case_sensitive") else term for term in and_terms] if not all(term in search_name for term in and_terms): continue # Skip this item if not all AND terms match else: # Default: ALL search terms must be present (space-separated) - search_name = item_name.lower() if not searchOptions.get("case_sensitive") else item_name + search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target search_terms = [term.strip().lower() if not searchOptions.get("case_sensitive") else term.strip() for term in fileQuery.split() if term.strip()] if not all(term in search_name for term in search_terms): continue # Skip this item if not all terms match + # If strict folder name requested, enforce exact (case-insensitive) match on folders + if strict_folder_name: + item_is_folder = "folder" in item + item_name_ci = (item.get("name") or "").strip().lower() + if item_is_folder and item_name_ci != strict_folder_name.lower(): + logger.debug(f"Skipping folder '{item_name}' - doesn't match strict name '{strict_folder_name}'") + continue + + logger.debug(f"Item '{item_name}' passed all filters - adding to results") + # Create minimal result with only essential reference information doc_info = { "id": item.get("id"), @@ -804,15 +1025,17 @@ class MethodSharepoint(MethodBase): Parameters: connectionReference (str): Reference to the Microsoft connection - pathQuery (str): Path query where to upload documents (e.g., "/Documents/Project1", "*" for default location) + sitePath (str): REQUIRED - Specific SharePoint path where to upload documents. Must be a valid SharePoint path format: + - For direct upload: "/site://" (e.g., "/site:KM XYZ/Documents/Work") + - If user provides words like "word1 word2", the system MUST call findDocumentPath first to locate the actual folder path, the result then to give to this parameter documentList (str): Reference to the document list to upload fileNames (List[str]): List of names for the uploaded files - resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to pathQuery) + resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to sitePath) expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description """ try: connectionReference = parameters.get("connectionReference") - pathQuery = parameters.get("pathQuery", "/Documents") + sitePath = parameters.get("sitePath", "/Documents") documentList = parameters.get("documentList") fileNames = parameters.get("fileNames") resultDocument = parameters.get("resultDocument") @@ -847,9 +1070,9 @@ class MethodSharepoint(MethodBase): folder_ids.append(doc.get("id")) if folder_ids: - # Use the first folder ID found as pathQuery - pathQuery = folder_ids[0] - logger.info(f"Using folder ID from resultDocument: {pathQuery}") + # Use the first folder ID found as sitePath + sitePath = folder_ids[0] + logger.info(f"Using folder ID from resultDocument: {sitePath}") else: return ActionResult.isFailure(error="No folders found in resultDocument") @@ -874,9 +1097,35 @@ class MethodSharepoint(MethodBase): sites = await self._discoverSharePointSites(connection["accessToken"]) if not sites: return ActionResult.isFailure(error="No SharePoint sites found or accessible") - - # Resolve path query into upload paths - upload_paths = self._resolvePathQuery(pathQuery) + + # Enforce site-scoped path usage when using sitePath directly (without resultDocument) + upload_site_scope = None + if not resultDocument: + if not sitePath or not sitePath.startswith('/'): + return ActionResult.isFailure(error="sitePath must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work") + + # Check if sitePath contains search terms (words without proper path structure) + if not sitePath.startswith('/site:') and not sitePath.startswith('/Documents') and not sitePath.startswith('/Shared Documents'): + # This looks like search terms, not a valid path + return ActionResult.isFailure(error=f"Invalid sitePath '{sitePath}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as sitePath.") + + parsed = self._parse_site_scoped_path(sitePath) + if not parsed: + return ActionResult.isFailure(error="Invalid sitePath. Use /site:/") + # find matching site + candidate_sites = self._filter_sites_by_hint(sites, parsed["siteName"]) # substring match + # choose exact displayName match if available + exact = [s for s in candidate_sites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()] + selected_site = exact[0] if exact else (candidate_sites[0] if candidate_sites else None) + if not selected_site: + return ActionResult.isFailure(error=f"SharePoint site '{parsed['siteName']}' not found or not accessible") + upload_site_scope = selected_site + # Use the inner path portion as the actual upload target path + upload_paths = [f"/{parsed['innerPath'].lstrip('/')}"] + sites = [selected_site] + else: + # Resolve path query into upload paths (fallback behavior when using resultDocument) + upload_paths = self._resolvePathQuery(sitePath) # Process each document upload upload_results = [] @@ -963,7 +1212,7 @@ class MethodSharepoint(MethodBase): # Create result data result_data = { "connectionReference": connectionReference, - "pathQuery": pathQuery, + "sitePath": sitePath, "documentList": documentList, "fileNames": fileNames, "sitesAvailable": len(sites), diff --git a/modules/routes/routeSecurityGoogle.py b/modules/routes/routeSecurityGoogle.py index 33f77c49..d3921b62 100644 --- a/modules/routes/routeSecurityGoogle.py +++ b/modules/routes/routeSecurityGoogle.py @@ -120,7 +120,7 @@ async def login( access_type="offline", include_granted_scopes="true", state=state_param, - prompt="select_account" + prompt="consent select_account" ) logger.info(f"Generated Google OAuth URL using OAuth2Session: {auth_url}") @@ -166,6 +166,33 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse "token_type": token_data.get("token_type", "bearer"), "expires_in": token_data.get("expires_in", 0) } + + # If Google did not return a refresh_token, try to reuse an existing one for this user/connection + if not token_response.get("refresh_token"): + try: + rootInterface = getRootInterface() + # Prefer connection flow reuse; fallback to user access token + if connection_id: + existing_tokens = rootInterface.db.getRecordset("tokens", recordFilter={ + "connectionId": connection_id, + "authority": AuthAuthority.GOOGLE + }) + if existing_tokens: + # Use most recent by createdAt + existing_tokens.sort(key=lambda x: x.get("createdAt", 0), reverse=True) + token_response["refresh_token"] = existing_tokens[0].get("tokenRefresh", "") + if not token_response.get("refresh_token") and user_id: + existing_access_tokens = rootInterface.db.getRecordset("tokens", recordFilter={ + "userId": user_id, + "connectionId": None, + "authority": AuthAuthority.GOOGLE + }) + if existing_access_tokens: + existing_access_tokens.sort(key=lambda x: x.get("createdAt", 0), reverse=True) + token_response["refresh_token"] = existing_access_tokens[0].get("tokenRefresh", "") + except Exception: + # Non-fatal; continue without refresh token + pass diff --git a/modules/security/tokenManager.py b/modules/security/tokenManager.py index c7cf4cf0..ce34433a 100644 --- a/modules/security/tokenManager.py +++ b/modules/security/tokenManager.py @@ -98,8 +98,7 @@ class TokenManager: "client_id": self.google_client_id, "client_secret": self.google_client_secret, "grant_type": "refresh_token", - "refresh_token": refresh_token, - "scope": "https://www.googleapis.com/auth/gmail.readonly https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email openid" + "refresh_token": refresh_token } # Make refresh request