fixes sharepoint search

2025-09-04 16:46:56 +02:00 · 2025-09-04 16:46:56 +02:00 · c2d3877b1e
commit c2d3877b1e
parent 9644514e60
4 changed files with 423 additions and 46 deletions
--- a/modules/chat/handling/promptFactory.py
+++ b/modules/chat/handling/promptFactory.py
@ -11,6 +11,85 @@ logger = logging.getLogger(__name__)

 # Prompt creation helpers extracted from managerChat.py

+def _getPreviousRoundContext(service, workflow) -> str:
+    """Get context from previous workflow rounds to help understand follow-up prompts"""
+    try:
+        if not workflow or not hasattr(workflow, 'messages') or not workflow.messages:
+            return ""
+        
+        # Get current round number
+        current_round = getattr(workflow, 'currentRound', 0)
+        
+        # If this is round 0 or 1, there's no previous context
+        if current_round <= 1:
+            return ""
+        
+        # Find messages from previous rounds (rounds before current)
+        previous_messages = []
+        for message in workflow.messages:
+            message_round = getattr(message, 'roundNumber', 0)
+            if message_round > 0 and message_round < current_round:
+                previous_messages.append(message)
+        
+        if not previous_messages:
+            return ""
+        
+        # Sort by round number and sequence to get chronological order
+        previous_messages.sort(key=lambda msg: (getattr(msg, 'roundNumber', 0), getattr(msg, 'sequenceNr', 0)))
+        
+        # Build context summary
+        context_parts = []
+        current_round_context = {}
+        
+        for message in previous_messages:
+            round_num = getattr(message, 'roundNumber', 0)
+            if round_num not in current_round_context:
+                current_round_context[round_num] = {
+                    'user_inputs': [],
+                    'assistant_responses': [],
+                    'task_outcomes': [],
+                    'documents_processed': []
+                }
+            
+            # Categorize messages
+            if message.role == 'user':
+                current_round_context[round_num]['user_inputs'].append(message.message)
+            elif message.role == 'assistant':
+                # Check if it's a task completion or error message
+                if 'task' in message.message.lower() and ('completed' in message.message.lower() or 'failed' in message.message.lower() or 'error' in message.message.lower()):
+                    current_round_context[round_num]['task_outcomes'].append(message.message)
+                else:
+                    current_round_context[round_num]['assistant_responses'].append(message.message)
+                
+                # Check for document processing
+                if hasattr(message, 'documents') and message.documents:
+                    doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')]
+                    if doc_names:
+                        current_round_context[round_num]['documents_processed'].extend(doc_names)
+        
+        # Build context summary
+        for round_num in sorted(current_round_context.keys()):
+            round_data = current_round_context[round_num]
+            context_parts.append(f"ROUND {round_num} CONTEXT:")
+            
+            if round_data['user_inputs']:
+                context_parts.append(f"  User requests: {'; '.join(round_data['user_inputs'])}")
+            
+            if round_data['task_outcomes']:
+                context_parts.append(f"  Task outcomes: {'; '.join(round_data['task_outcomes'])}")
+            
+            if round_data['documents_processed']:
+                context_parts.append(f"  Documents processed: {', '.join(set(round_data['documents_processed']))}")
+        
+        if context_parts:
+            return "\n".join(context_parts)
+        else:
+            return ""
+            
+    except Exception as e:
+        logger.error(f"Error getting previous round context: {str(e)}")
+        return ""
+
 def createTaskPlanningPrompt(context: TaskContext, service) -> str:
    """Create enhanced prompt for task planning with user-friendly message generation and language detection"""
    # Get user language directly from service.user.language
@ -22,21 +101,29 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str:
    # Extract available documents from context - use Pydantic model directly
    available_documents = context.available_documents or "No documents available"
    
+    # Get previous workflow round context for better understanding of follow-up prompts
+    previous_round_context = _getPreviousRoundContext(service, context.workflow)
+    
    return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages.

 USER REQUEST: {user_request}

 AVAILABLE DOCUMENTS: {available_documents}

+PREVIOUS WORKFLOW ROUNDS CONTEXT:
+{previous_round_context if previous_round_context else "No previous workflow rounds - this is the first round."}
+
 INSTRUCTIONS:
-1. Analyze the user request and available documents
-2. Group related topics and sequential steps into single, comprehensive tasks
-3. Focus on business outcomes, not technical operations
-4. Each task should produce meaningful, usable outputs
-5. Ensure proper handover between tasks using result labels
-6. Detect the language of the user request and include it in languageUserDetected
-7. Generate user-friendly messages for each task in the user's request language
-8. Return a JSON object with the exact structure shown below
+1. Analyze the user request, available documents, and previous workflow rounds context
+2. If the user request appears to be a follow-up (like "try again", "versuche es nochmals", "retry", etc.), 
+   use the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what the user wants to retry or continue
+3. Group related topics and sequential steps into single, comprehensive tasks
+4. Focus on business outcomes, not technical operations
+5. Each task should produce meaningful, usable outputs
+6. Ensure proper handover between tasks using result labels
+7. Detect the language of the user request and include it in languageUserDetected
+8. Generate user-friendly messages for each task in the user's request language
+9. Return a JSON object with the exact structure shown below

 TASK GROUPING PRINCIPLES:
 - COMBINE RELATED TOPICS: Group related subjects, sequential steps, or workflow-structured activities into single tasks
@ -67,6 +154,21 @@ TASK PLANNING PRINCIPLES:
 - Group related activities to minimize task fragmentation
 - Only create multiple tasks when dealing with truly different, independent objectives

+FOLLOW-UP PROMPT HANDLING:
+- If the user request is a follow-up (e.g., "try again", "versuche es nochmals", "retry", "continue", "proceed"), 
+  analyze the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what failed or was incomplete
+- Use the previous round's user requests and task outcomes to determine what the user wants to retry
+- If previous rounds failed due to missing documents, and documents are now available, 
+  create tasks that use the newly available documents to accomplish the original request
+- Maintain the same business objective from previous rounds but adapt to current available resources
+
+SPECIFIC SCENARIO HANDLING:
+- If previous round failed with "documents missing" error and current round has documents available,
+  the user likely wants to retry the same operation with the newly provided documents
+- Example: Previous round "speichere mir die 3 dokumente im sharepoint unter xxx" failed due to missing documents,
+  current round "versuche es nochmals" with documents should retry the SharePoint save operation
+- Always check if the current request is a retry by looking for retry keywords and previous round context
+
 REQUIRED JSON STRUCTURE:
 {{
    "overview": "Brief description of the overall plan",
--- a/modules/methods/methodSharepoint.py
+++ b/modules/methods/methodSharepoint.py
@ -117,6 +117,52 @@ class MethodSharepoint(MethodBase):
            logger.error(f"Error discovering SharePoint sites: {str(e)}")
            return []
    
+    def _filter_sites_by_hint(self, sites: List[Dict[str, Any]], site_hint: str) -> List[Dict[str, Any]]:
+        """Filter discovered sites by a human-entered site hint (case-insensitive substring)."""
+        try:
+            if not site_hint:
+                return sites
+            hint = site_hint.strip().lower()
+            filtered: List[Dict[str, Any]] = []
+            for site in sites:
+                name = (site.get("displayName") or "").lower()
+                web_url = (site.get("webUrl") or "").lower()
+                if hint in name or hint in web_url:
+                    filtered.append(site)
+            return filtered if filtered else sites
+        except Exception as e:
+            logger.error(f"Error filtering sites by hint '{site_hint}': {str(e)}")
+            return sites
+
+
+    def _parse_site_scoped_path(self, path_query: str) -> Optional[Dict[str, str]]:
+        """
+        Parse a site-scoped path of the form:
+        /site:KM LayerFinance/Documents/Work or /site:KM LayerFinance/Shared Documents/Work
+
+        Returns dict with keys: siteName, innerPath (no leading slash) on success, else None.
+        """
+        try:
+            if not path_query or not path_query.startswith('/'):
+                return None
+            # expected syntax prefix
+            prefix = '/site:'
+            if not path_query.startswith(prefix):
+                return None
+            remainder = path_query[len(prefix):]
+            # split once on the next '/'
+            if '/' not in remainder:
+                return None
+            site_name, inner = remainder.split('/', 1)
+            site_name = site_name.strip()
+            inner_path = inner.strip()
+            if not site_name or not inner_path:
+                return None
+            return {"siteName": site_name, "innerPath": inner_path}
+        except Exception as e:
+            logger.error(f"Error parsing site-scoped path '{path_query}': {str(e)}")
+            return None
+
    def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]:
        """
        Parse searchQuery to extract path, search terms, search type, and search options.
@ -141,13 +187,48 @@ class MethodSharepoint(MethodBase):
            
            searchQuery = searchQuery.strip()
            searchOptions = {}
-            
-            # Check for search type specification (files:, folders:, all:)
+
+            # Check for search type specification (files:, folders:, all:) FIRST
            searchType = "all"  # Default
            if searchQuery.startswith(("files:", "folders:", "all:")):
                type_parts = searchQuery.split(':', 1)
                searchType = type_parts[0].strip()
                searchQuery = type_parts[1].strip()
+
+            # Extract optional site hint tokens: support "site=Name" or leading "site:Name"
+            def _extract_site_hint(q: str) -> tuple[str, Optional[str]]:
+                try:
+                    q_strip = q.strip()
+                    # Leading form: site:KM LayerFinance ...
+                    if q_strip.lower().startswith("site:"):
+                        after = q_strip[5:].lstrip()
+                        # site name until next space or end
+                        if ' ' in after:
+                            site_name, rest = after.split(' ', 1)
+                        else:
+                            site_name, rest = after, ''
+                        return rest.strip(), site_name.strip()
+                    # Inline key=value form anywhere
+                    m = re.search(r"\bsite=([^;\s]+)", q_strip, flags=re.IGNORECASE)
+                    if m:
+                        site_name = m.group(1).strip()
+                        # remove the token from query
+                        q_new = re.sub(r"\bsite=[^;\s]+;?", "", q_strip, flags=re.IGNORECASE).strip()
+                        return q_new, site_name
+                except Exception:
+                    pass
+                return q, None
+
+            searchQuery, extracted_site = _extract_site_hint(searchQuery)
+            if extracted_site:
+                searchOptions["site_hint"] = extracted_site
+                logger.info(f"Extracted site hint: '{extracted_site}'")
+
+            # Extract name="..." if present (for quoted multi-word names)
+            name_match = re.search(r"name=\"([^\"]+)\"", searchQuery)
+            if name_match:
+                searchQuery = name_match.group(1)
+                logger.info(f"Extracted name from quotes: '{searchQuery}'")
            
            # Check for search mode specification (exact:, regex:, case:, and:)
            if searchQuery.startswith(("exact:", "regex:", "case:", "and:")):
@ -187,6 +268,7 @@ class MethodSharepoint(MethodBase):
                else:
                    fileQuery = search_part
                
+                # Use search_part as fileQuery (name extraction already handled above)
                return pathQuery, fileQuery, searchType, searchOptions
            
            # No colon - check if it looks like a path
@ -349,6 +431,7 @@ class MethodSharepoint(MethodBase):
        
        Parameters:
            connectionReference (str): Reference to the Microsoft connection
+            site (str, optional): SharePoint site name or hint to search within (e.g., "SSS", "KM XYZ"). If not provided, searches all accessible sites
            searchQuery (str): [path:][type:][mode:]query - Enhanced search syntax:
                - "budget", "/Documents:budget", "files:budget", "folders:DELTA", "*"
                - "exact:\"Operations 2025\"" - exact phrase matching
@ -356,7 +439,11 @@ class MethodSharepoint(MethodBase):
                - "case:DELTA" - case-sensitive search
                - "and:DELTA AND 2025 Mars AND Group" - all terms must be present
                - "folders:and:DELTA AND 2025 Mars AND Group" - combined options
-                Note: For storage locations, use "folders:" prefix. All search terms must be present by default.
+                - Site hint support: "folders:site=KM LayerFinance;name=Work" or "folders:site:KM LayerFinance Work"
+                - For quoted names: "folders:site=KM;name=\"page staten\""
+                - For folder search: words like "part1 part2" will search for folders containing BOTH terms
+                Note: For storage locations, use "folders:" prefix. When searching for folders, multiple words are treated as search terms that must all appear in the folder name or path.
+                Site hints help narrow search to specific SharePoint sites for better accuracy.
            resultDocument (str, optional): JSON result document from previous findDocumentPath action to refine search
            searchScope (str, optional): Search scope - options: "all" (default), "documents" (files only), "pages" (SharePoint pages only)
            maxResults (int, optional): Maximum number of results to return (default: 100)
@ -364,6 +451,7 @@ class MethodSharepoint(MethodBase):
        """
        try:
            connectionReference = parameters.get("connectionReference")
+            site = parameters.get("site")
            searchQuery = parameters.get("searchQuery", "*")
            resultDocument = parameters.get("resultDocument")
            searchScope = parameters.get("searchScope", "all")
@ -415,6 +503,13 @@ class MethodSharepoint(MethodBase):
            if not sites:
                return ActionResult.isFailure(error="No SharePoint sites found or accessible")
            
+            # Filter sites by site parameter if provided
+            if site:
+                sites = self._filter_sites_by_hint(sites, site)
+                logger.info(f"Filtered sites by site parameter: '{site}' -> {len(sites)} sites")
+                if not sites:
+                    return ActionResult.isFailure(error=f"No SharePoint sites found matching '{site}'")
+            
            # Resolve path query into search paths
            search_paths = self._resolvePathQuery(pathQuery)
            
@ -423,80 +518,206 @@ class MethodSharepoint(MethodBase):
                found_documents = []
                all_sites_searched = []
                
-                for site in sites:
+                # Apply site hint filtering if provided in search options
+                site_scoped_sites = sites
+                strict_folder_name: Optional[str] = None
+                
+                # First check for explicit site hint in search options
+                if searchOptions.get("site_hint"):
+                    site_scoped_sites = self._filter_sites_by_hint(sites, searchOptions["site_hint"])
+                    logger.info(f"Filtered sites by explicit site hint: '{searchOptions['site_hint']}' -> {len(site_scoped_sites)} sites")
+                
+                # Heuristic: if user searched for folders with pattern "<siteHint> <folderName>",
+                # prefer filtering sites by the first token(s) and match folder name exactly for the last token
+                elif searchType == "folders" and fileQuery and ' ' in fileQuery and not searchOptions.get("regex_match"):
+                    # treat last token as folder name, preceding tokens combined as site hint
+                    tokens = [t for t in fileQuery.split(' ') if t]
+                    if len(tokens) >= 2:
+                        strict_folder_name = tokens[-1]
+                        site_hint = ' '.join(tokens[:-1])
+                        site_scoped_sites = self._filter_sites_by_hint(sites, site_hint)
+                        logger.info(f"Filtered sites by heuristic site hint: '{site_hint}' -> {len(site_scoped_sites)} sites")
+
+                for site in site_scoped_sites:
                    site_id = site["id"]
                    site_name = site["displayName"]
                    site_url = site["webUrl"]
                    
                    logger.info(f"Searching in site: {site_name} ({site_url})")
                    
-                    # Use Microsoft Graph search API for this specific site
+                    # Use Microsoft Graph API for this specific site
                    # Handle empty or wildcard queries
                    if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
                        # For wildcard/empty queries, list all items in the drive
                        endpoint = f"sites/{site_id}/drive/root/children"
                    else:
-                        # For specific queries, use search API
-                        search_query = fileQuery.replace("'", "''")  # Escape single quotes for OData
-                        endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
-                    
-                    # Make the search API call
-                    search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
-                    
-                    if "error" in search_result:
-                        logger.warning(f"Search failed for site {site_name}: {search_result['error']}")
-                        continue
-                    
-                    # Process search results for this site
-                    items = search_result.get("value", [])
+                        # For specific queries, use different approaches based on search type
+                        if searchType == "folders":
+                            # Use Microsoft Graph unified search endpoint: POST /search/query
+                            # Scope by all drives in the site (e.g., Shared Documents, Documents, language variants)
+                            try:
+                                import json
+                                # Discover drives for the site to build precise path scopes
+                                drives_resp = await self._makeGraphApiCall(connection["accessToken"], f"sites/{site_id}/drives")
+                                path_filters = []
+                                if not ("error" in drives_resp):
+                                    for drv in (drives_resp.get("value", []) or []):
+                                        web_url = (drv.get("webUrl") or "").rstrip('/') + '/'
+                                        if web_url:
+                                            # path:"<drive webUrl>/"
+                                            path_filters.append(f"path:\"{web_url}\"")
+                                if not path_filters:
+                                    # fallback to site root if no drives found
+                                    scoped_path = site_url.rstrip('/') + '/'
+                                    path_filters = [f"path:\"{scoped_path}\""]
+
+                                # Use KQL syntax for folder search
+                                terms = [t for t in fileQuery.split() if t.strip()]
+                                if len(terms) > 1:
+                                    # Multiple terms: first search for folders containing ANY of the terms (OR)
+                                    # This broadens the search to catch all potential matches
+                                    name_terms = " OR ".join([f"foldername:*{t}*" for t in terms])
+                                    name_filter = f"({name_terms})"
+                                else:
+                                    # Single term: search for folders containing the term
+                                    single_term = terms[0] if terms else fileQuery
+                                    name_filter = f"foldername:*{single_term}*"
+
+                                # Use KQL syntax with isFolder:true
+                                query_string = f"isFolder:true AND {name_filter}"
+                                logger.info(f"Using KQL query: {query_string}")
+
+                                payload = {
+                                    "requests": [
+                                        {
+                                            "entityTypes": ["driveItem"],
+                                            "query": {"queryString": query_string},
+                                            "from": 0,
+                                            "size": 50
+                                        }
+                                    ]
+                                }
+                                logger.info(f"Using unified search API for folders with queryString: {query_string}")
+                                logger.info(f"Payload: {json.dumps(payload, indent=2)}")
+                                unified_result = await self._makeGraphApiCall(
+                                    connection["accessToken"],
+                                    "search/query",
+                                    method="POST",
+                                    data=json.dumps(payload).encode("utf-8")
+                                )
+                                logger.info(f"Unified search response: {json.dumps(unified_result, indent=2)}")
+                                if "error" in unified_result:
+                                    logger.warning(f"Unified search failed for site {site_name}: {unified_result['error']}")
+                                    items = []
+                                else:
+                                    # Flatten hits -> driveItem resources
+                                    items = []
+                                    for container in (unified_result.get("value", []) or []):
+                                        for hits_container in (container.get("hitsContainers", []) or []):
+                                            for hit in (hits_container.get("hits", []) or []):
+                                                resource = hit.get("resource")
+                                                if resource:
+                                                    items.append(resource)
+                                logger.info(f"Unified search returned {len(items)} items (pre-filter)")
+                                
+                                # Post-filter: For multiple terms, filter results to only include folders that contain ALL terms
+                                if len(terms) > 1:
+                                    filtered_items = []
+                                    for item in items:
+                                        folder_name = item.get("name", "").lower()
+                                        # Check if folder name contains ALL search terms
+                                        if all(term.lower() in folder_name for term in terms):
+                                            filtered_items.append(item)
+                                    items = filtered_items
+                                    logger.info(f"Post-filtered to {len(items)} items containing all terms: {terms}")
+                                
+                            except Exception as e:
+                                logger.error(f"Error performing unified folder search: {str(e)}")
+                                items = []
+                        else:
+                            # For files, use regular search API
+                            search_query = fileQuery.replace("'", "''")  # Escape single quotes for OData
+                            endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
+                            logger.info(f"Using search API for files with query: '{search_query}'")
+
+                            # Make the search API call (files)
+                            search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
+                            if "error" in search_result:
+                                logger.warning(f"Search failed for site {site_name}: {search_result['error']}")
+                                continue
+                            # Process search results for this site (files)
+                            items = search_result.get("value", [])
+                            logger.info(f"Retrieved {len(items)} items from site {site_name}")
                    site_documents = []
                    
                    for item in items:
+                        item_name = item.get("name", "")
+                        item_type = "folder" if "folder" in item else "file"
+                        item_path = item.get("parentReference", {}).get("path", "")
+                        logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")
+                        
                        # Filter by search scope if specified
                        if searchScope == "documents" and "folder" in item:
+                            logger.debug(f"Skipping folder '{item_name}' due to documents scope")
                            continue
                        elif searchScope == "pages" and "file" in item and not item["file"].get("mimeType", "").startswith("text/html"):
+                            logger.debug(f"Skipping file '{item_name}' due to pages scope")
                            continue
                        
                        # Filter by search type (files, folders, all)
                        if searchType == "files" and "folder" in item:
+                            logger.debug(f"Skipping folder '{item_name}' due to files search type")
                            continue
                        elif searchType == "folders" and "file" in item:
+                            logger.debug(f"Skipping file '{item_name}' due to folders search type")
                            continue
                        
                        # Enhanced post-filtering based on search options
-                        item_name = item.get("name", "")
-                        if fileQuery != "*" and fileQuery.strip():
+                        if fileQuery != "*" and fileQuery.strip() and searchType != "folders":
+                            # For non-folder searches, apply name filtering
+                            # (Folder searches are already filtered by the recursive search)
+                            search_target = item_name
+                            
                            # Apply different filtering based on search options
                            if searchOptions.get("exact_match"):
                                # Exact phrase matching
                                if searchOptions.get("case_sensitive"):
-                                    if fileQuery not in item_name:
+                                    if fileQuery not in search_target:
                                        continue
                                else:
-                                    if fileQuery.lower() not in item_name.lower():
+                                    if fileQuery.lower() not in search_target.lower():
                                        continue
                            elif searchOptions.get("regex_match"):
                                # Regex pattern matching
                                import re
                                flags = 0 if searchOptions.get("case_sensitive") else re.IGNORECASE
-                                if not re.search(fileQuery, item_name, flags):
+                                if not re.search(fileQuery, search_target, flags):
                                    continue
                            elif searchOptions.get("and_terms"):
                                # AND terms mode: Split by " AND " and ensure ALL terms are present
-                                search_name = item_name.lower() if not searchOptions.get("case_sensitive") else item_name
+                                search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target
                                and_terms = [term.strip() for term in fileQuery.split(" AND ") if term.strip()]
                                and_terms = [term.lower() if not searchOptions.get("case_sensitive") else term for term in and_terms]
                                if not all(term in search_name for term in and_terms):
                                    continue  # Skip this item if not all AND terms match
                            else:
                                # Default: ALL search terms must be present (space-separated)
-                                search_name = item_name.lower() if not searchOptions.get("case_sensitive") else item_name
+                                search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target
                                search_terms = [term.strip().lower() if not searchOptions.get("case_sensitive") else term.strip() 
                                              for term in fileQuery.split() if term.strip()]
                                if not all(term in search_name for term in search_terms):
                                    continue  # Skip this item if not all terms match
                        
+                        # If strict folder name requested, enforce exact (case-insensitive) match on folders
+                        if strict_folder_name:
+                            item_is_folder = "folder" in item
+                            item_name_ci = (item.get("name") or "").strip().lower()
+                            if item_is_folder and item_name_ci != strict_folder_name.lower():
+                                logger.debug(f"Skipping folder '{item_name}' - doesn't match strict name '{strict_folder_name}'")
+                                continue
+                        
+                        logger.debug(f"Item '{item_name}' passed all filters - adding to results")
+
                        # Create minimal result with only essential reference information
                        doc_info = {
                            "id": item.get("id"),
@ -804,15 +1025,17 @@ class MethodSharepoint(MethodBase):
        
        Parameters:
            connectionReference (str): Reference to the Microsoft connection
-            pathQuery (str): Path query where to upload documents (e.g., "/Documents/Project1", "*" for default location)
+            sitePath (str): REQUIRED - Specific SharePoint path where to upload documents. Must be a valid SharePoint path format:
+                - For direct upload: "/site:<Site Name>/<Library>/<Folder Path>" (e.g., "/site:KM XYZ/Documents/Work")
+                - If user provides words like "word1 word2", the system MUST call findDocumentPath first to locate the actual folder path, the result then to give to this parameter
            documentList (str): Reference to the document list to upload
            fileNames (List[str]): List of names for the uploaded files
-            resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to pathQuery)
+            resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to sitePath)
            expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
        """
        try:
            connectionReference = parameters.get("connectionReference")
-            pathQuery = parameters.get("pathQuery", "/Documents")
+            sitePath = parameters.get("sitePath", "/Documents")
            documentList = parameters.get("documentList")
            fileNames = parameters.get("fileNames")
            resultDocument = parameters.get("resultDocument")
@ -847,9 +1070,9 @@ class MethodSharepoint(MethodBase):
                            folder_ids.append(doc.get("id"))
                    
                    if folder_ids:
-                        # Use the first folder ID found as pathQuery
-                        pathQuery = folder_ids[0]
-                        logger.info(f"Using folder ID from resultDocument: {pathQuery}")
+                        # Use the first folder ID found as sitePath
+                        sitePath = folder_ids[0]
+                        logger.info(f"Using folder ID from resultDocument: {sitePath}")
                    else:
                        return ActionResult.isFailure(error="No folders found in resultDocument")
                        
@ -874,9 +1097,35 @@ class MethodSharepoint(MethodBase):
            sites = await self._discoverSharePointSites(connection["accessToken"])
            if not sites:
                return ActionResult.isFailure(error="No SharePoint sites found or accessible")
-            
-            # Resolve path query into upload paths
-            upload_paths = self._resolvePathQuery(pathQuery)
+
+            # Enforce site-scoped path usage when using sitePath directly (without resultDocument)
+            upload_site_scope = None
+            if not resultDocument:
+                if not sitePath or not sitePath.startswith('/'):
+                    return ActionResult.isFailure(error="sitePath must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")
+                
+                # Check if sitePath contains search terms (words without proper path structure)
+                if not sitePath.startswith('/site:') and not sitePath.startswith('/Documents') and not sitePath.startswith('/Shared Documents'):
+                    # This looks like search terms, not a valid path
+                    return ActionResult.isFailure(error=f"Invalid sitePath '{sitePath}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as sitePath.")
+                
+                parsed = self._parse_site_scoped_path(sitePath)
+                if not parsed:
+                    return ActionResult.isFailure(error="Invalid sitePath. Use /site:<Site Display Name>/<Library or Folder Path>")
+                # find matching site
+                candidate_sites = self._filter_sites_by_hint(sites, parsed["siteName"])  # substring match
+                # choose exact displayName match if available
+                exact = [s for s in candidate_sites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()]
+                selected_site = exact[0] if exact else (candidate_sites[0] if candidate_sites else None)
+                if not selected_site:
+                    return ActionResult.isFailure(error=f"SharePoint site '{parsed['siteName']}' not found or not accessible")
+                upload_site_scope = selected_site
+                # Use the inner path portion as the actual upload target path
+                upload_paths = [f"/{parsed['innerPath'].lstrip('/')}"]
+                sites = [selected_site]
+            else:
+                # Resolve path query into upload paths (fallback behavior when using resultDocument)
+                upload_paths = self._resolvePathQuery(sitePath)
            
            # Process each document upload
            upload_results = []
@ -963,7 +1212,7 @@ class MethodSharepoint(MethodBase):
            # Create result data
            result_data = {
                "connectionReference": connectionReference,
-                "pathQuery": pathQuery,
+                "sitePath": sitePath,
                "documentList": documentList,
                "fileNames": fileNames,
                "sitesAvailable": len(sites),
--- a/modules/routes/routeSecurityGoogle.py
+++ b/modules/routes/routeSecurityGoogle.py
@ -120,7 +120,7 @@ async def login(
            access_type="offline",
            include_granted_scopes="true",
            state=state_param,
-            prompt="select_account"
+            prompt="consent select_account"
        )
        
        logger.info(f"Generated Google OAuth URL using OAuth2Session: {auth_url}")
@ -166,6 +166,33 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
            "token_type": token_data.get("token_type", "bearer"),
            "expires_in": token_data.get("expires_in", 0)
        }
+
+        # If Google did not return a refresh_token, try to reuse an existing one for this user/connection
+        if not token_response.get("refresh_token"):
+            try:
+                rootInterface = getRootInterface()
+                # Prefer connection flow reuse; fallback to user access token
+                if connection_id:
+                    existing_tokens = rootInterface.db.getRecordset("tokens", recordFilter={
+                        "connectionId": connection_id,
+                        "authority": AuthAuthority.GOOGLE
+                    })
+                    if existing_tokens:
+                        # Use most recent by createdAt
+                        existing_tokens.sort(key=lambda x: x.get("createdAt", 0), reverse=True)
+                        token_response["refresh_token"] = existing_tokens[0].get("tokenRefresh", "")
+                if not token_response.get("refresh_token") and user_id:
+                    existing_access_tokens = rootInterface.db.getRecordset("tokens", recordFilter={
+                        "userId": user_id,
+                        "connectionId": None,
+                        "authority": AuthAuthority.GOOGLE
+                    })
+                    if existing_access_tokens:
+                        existing_access_tokens.sort(key=lambda x: x.get("createdAt", 0), reverse=True)
+                        token_response["refresh_token"] = existing_access_tokens[0].get("tokenRefresh", "")
+            except Exception:
+                # Non-fatal; continue without refresh token
+                pass
        

        
--- a/modules/security/tokenManager.py
+++ b/modules/security/tokenManager.py
@ -98,8 +98,7 @@ class TokenManager:
                "client_id": self.google_client_id,
                "client_secret": self.google_client_secret,
                "grant_type": "refresh_token",
-                "refresh_token": refresh_token,
-                "scope": "https://www.googleapis.com/auth/gmail.readonly https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email openid"
+                "refresh_token": refresh_token
            }
            
            # Make refresh request