fixes sharepoint search
This commit is contained in:
parent
9644514e60
commit
c2d3877b1e
4 changed files with 423 additions and 46 deletions
|
|
@ -11,6 +11,85 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
# Prompt creation helpers extracted from managerChat.py
|
||||
|
||||
def _getPreviousRoundContext(service, workflow) -> str:
|
||||
"""Get context from previous workflow rounds to help understand follow-up prompts"""
|
||||
try:
|
||||
if not workflow or not hasattr(workflow, 'messages') or not workflow.messages:
|
||||
return ""
|
||||
|
||||
# Get current round number
|
||||
current_round = getattr(workflow, 'currentRound', 0)
|
||||
|
||||
# If this is round 0 or 1, there's no previous context
|
||||
if current_round <= 1:
|
||||
return ""
|
||||
|
||||
# Find messages from previous rounds (rounds before current)
|
||||
previous_messages = []
|
||||
for message in workflow.messages:
|
||||
message_round = getattr(message, 'roundNumber', 0)
|
||||
if message_round > 0 and message_round < current_round:
|
||||
previous_messages.append(message)
|
||||
|
||||
if not previous_messages:
|
||||
return ""
|
||||
|
||||
# Sort by round number and sequence to get chronological order
|
||||
previous_messages.sort(key=lambda msg: (getattr(msg, 'roundNumber', 0), getattr(msg, 'sequenceNr', 0)))
|
||||
|
||||
# Build context summary
|
||||
context_parts = []
|
||||
current_round_context = {}
|
||||
|
||||
for message in previous_messages:
|
||||
round_num = getattr(message, 'roundNumber', 0)
|
||||
if round_num not in current_round_context:
|
||||
current_round_context[round_num] = {
|
||||
'user_inputs': [],
|
||||
'assistant_responses': [],
|
||||
'task_outcomes': [],
|
||||
'documents_processed': []
|
||||
}
|
||||
|
||||
# Categorize messages
|
||||
if message.role == 'user':
|
||||
current_round_context[round_num]['user_inputs'].append(message.message)
|
||||
elif message.role == 'assistant':
|
||||
# Check if it's a task completion or error message
|
||||
if 'task' in message.message.lower() and ('completed' in message.message.lower() or 'failed' in message.message.lower() or 'error' in message.message.lower()):
|
||||
current_round_context[round_num]['task_outcomes'].append(message.message)
|
||||
else:
|
||||
current_round_context[round_num]['assistant_responses'].append(message.message)
|
||||
|
||||
# Check for document processing
|
||||
if hasattr(message, 'documents') and message.documents:
|
||||
doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')]
|
||||
if doc_names:
|
||||
current_round_context[round_num]['documents_processed'].extend(doc_names)
|
||||
|
||||
# Build context summary
|
||||
for round_num in sorted(current_round_context.keys()):
|
||||
round_data = current_round_context[round_num]
|
||||
context_parts.append(f"ROUND {round_num} CONTEXT:")
|
||||
|
||||
if round_data['user_inputs']:
|
||||
context_parts.append(f" User requests: {'; '.join(round_data['user_inputs'])}")
|
||||
|
||||
if round_data['task_outcomes']:
|
||||
context_parts.append(f" Task outcomes: {'; '.join(round_data['task_outcomes'])}")
|
||||
|
||||
if round_data['documents_processed']:
|
||||
context_parts.append(f" Documents processed: {', '.join(set(round_data['documents_processed']))}")
|
||||
|
||||
if context_parts:
|
||||
return "\n".join(context_parts)
|
||||
else:
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting previous round context: {str(e)}")
|
||||
return ""
|
||||
|
||||
def createTaskPlanningPrompt(context: TaskContext, service) -> str:
|
||||
"""Create enhanced prompt for task planning with user-friendly message generation and language detection"""
|
||||
# Get user language directly from service.user.language
|
||||
|
|
@ -22,21 +101,29 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str:
|
|||
# Extract available documents from context - use Pydantic model directly
|
||||
available_documents = context.available_documents or "No documents available"
|
||||
|
||||
# Get previous workflow round context for better understanding of follow-up prompts
|
||||
previous_round_context = _getPreviousRoundContext(service, context.workflow)
|
||||
|
||||
return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages.
|
||||
|
||||
USER REQUEST: {user_request}
|
||||
|
||||
AVAILABLE DOCUMENTS: {available_documents}
|
||||
|
||||
PREVIOUS WORKFLOW ROUNDS CONTEXT:
|
||||
{previous_round_context if previous_round_context else "No previous workflow rounds - this is the first round."}
|
||||
|
||||
INSTRUCTIONS:
|
||||
1. Analyze the user request and available documents
|
||||
2. Group related topics and sequential steps into single, comprehensive tasks
|
||||
3. Focus on business outcomes, not technical operations
|
||||
4. Each task should produce meaningful, usable outputs
|
||||
5. Ensure proper handover between tasks using result labels
|
||||
6. Detect the language of the user request and include it in languageUserDetected
|
||||
7. Generate user-friendly messages for each task in the user's request language
|
||||
8. Return a JSON object with the exact structure shown below
|
||||
1. Analyze the user request, available documents, and previous workflow rounds context
|
||||
2. If the user request appears to be a follow-up (like "try again", "versuche es nochmals", "retry", etc.),
|
||||
use the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what the user wants to retry or continue
|
||||
3. Group related topics and sequential steps into single, comprehensive tasks
|
||||
4. Focus on business outcomes, not technical operations
|
||||
5. Each task should produce meaningful, usable outputs
|
||||
6. Ensure proper handover between tasks using result labels
|
||||
7. Detect the language of the user request and include it in languageUserDetected
|
||||
8. Generate user-friendly messages for each task in the user's request language
|
||||
9. Return a JSON object with the exact structure shown below
|
||||
|
||||
TASK GROUPING PRINCIPLES:
|
||||
- COMBINE RELATED TOPICS: Group related subjects, sequential steps, or workflow-structured activities into single tasks
|
||||
|
|
@ -67,6 +154,21 @@ TASK PLANNING PRINCIPLES:
|
|||
- Group related activities to minimize task fragmentation
|
||||
- Only create multiple tasks when dealing with truly different, independent objectives
|
||||
|
||||
FOLLOW-UP PROMPT HANDLING:
|
||||
- If the user request is a follow-up (e.g., "try again", "versuche es nochmals", "retry", "continue", "proceed"),
|
||||
analyze the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what failed or was incomplete
|
||||
- Use the previous round's user requests and task outcomes to determine what the user wants to retry
|
||||
- If previous rounds failed due to missing documents, and documents are now available,
|
||||
create tasks that use the newly available documents to accomplish the original request
|
||||
- Maintain the same business objective from previous rounds but adapt to current available resources
|
||||
|
||||
SPECIFIC SCENARIO HANDLING:
|
||||
- If previous round failed with "documents missing" error and current round has documents available,
|
||||
the user likely wants to retry the same operation with the newly provided documents
|
||||
- Example: Previous round "speichere mir die 3 dokumente im sharepoint unter xxx" failed due to missing documents,
|
||||
current round "versuche es nochmals" with documents should retry the SharePoint save operation
|
||||
- Always check if the current request is a retry by looking for retry keywords and previous round context
|
||||
|
||||
REQUIRED JSON STRUCTURE:
|
||||
{{
|
||||
"overview": "Brief description of the overall plan",
|
||||
|
|
|
|||
|
|
@ -117,6 +117,52 @@ class MethodSharepoint(MethodBase):
|
|||
logger.error(f"Error discovering SharePoint sites: {str(e)}")
|
||||
return []
|
||||
|
||||
def _filter_sites_by_hint(self, sites: List[Dict[str, Any]], site_hint: str) -> List[Dict[str, Any]]:
|
||||
"""Filter discovered sites by a human-entered site hint (case-insensitive substring)."""
|
||||
try:
|
||||
if not site_hint:
|
||||
return sites
|
||||
hint = site_hint.strip().lower()
|
||||
filtered: List[Dict[str, Any]] = []
|
||||
for site in sites:
|
||||
name = (site.get("displayName") or "").lower()
|
||||
web_url = (site.get("webUrl") or "").lower()
|
||||
if hint in name or hint in web_url:
|
||||
filtered.append(site)
|
||||
return filtered if filtered else sites
|
||||
except Exception as e:
|
||||
logger.error(f"Error filtering sites by hint '{site_hint}': {str(e)}")
|
||||
return sites
|
||||
|
||||
|
||||
def _parse_site_scoped_path(self, path_query: str) -> Optional[Dict[str, str]]:
|
||||
"""
|
||||
Parse a site-scoped path of the form:
|
||||
/site:KM LayerFinance/Documents/Work or /site:KM LayerFinance/Shared Documents/Work
|
||||
|
||||
Returns dict with keys: siteName, innerPath (no leading slash) on success, else None.
|
||||
"""
|
||||
try:
|
||||
if not path_query or not path_query.startswith('/'):
|
||||
return None
|
||||
# expected syntax prefix
|
||||
prefix = '/site:'
|
||||
if not path_query.startswith(prefix):
|
||||
return None
|
||||
remainder = path_query[len(prefix):]
|
||||
# split once on the next '/'
|
||||
if '/' not in remainder:
|
||||
return None
|
||||
site_name, inner = remainder.split('/', 1)
|
||||
site_name = site_name.strip()
|
||||
inner_path = inner.strip()
|
||||
if not site_name or not inner_path:
|
||||
return None
|
||||
return {"siteName": site_name, "innerPath": inner_path}
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing site-scoped path '{path_query}': {str(e)}")
|
||||
return None
|
||||
|
||||
def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]:
|
||||
"""
|
||||
Parse searchQuery to extract path, search terms, search type, and search options.
|
||||
|
|
@ -141,13 +187,48 @@ class MethodSharepoint(MethodBase):
|
|||
|
||||
searchQuery = searchQuery.strip()
|
||||
searchOptions = {}
|
||||
|
||||
# Check for search type specification (files:, folders:, all:)
|
||||
|
||||
# Check for search type specification (files:, folders:, all:) FIRST
|
||||
searchType = "all" # Default
|
||||
if searchQuery.startswith(("files:", "folders:", "all:")):
|
||||
type_parts = searchQuery.split(':', 1)
|
||||
searchType = type_parts[0].strip()
|
||||
searchQuery = type_parts[1].strip()
|
||||
|
||||
# Extract optional site hint tokens: support "site=Name" or leading "site:Name"
|
||||
def _extract_site_hint(q: str) -> tuple[str, Optional[str]]:
|
||||
try:
|
||||
q_strip = q.strip()
|
||||
# Leading form: site:KM LayerFinance ...
|
||||
if q_strip.lower().startswith("site:"):
|
||||
after = q_strip[5:].lstrip()
|
||||
# site name until next space or end
|
||||
if ' ' in after:
|
||||
site_name, rest = after.split(' ', 1)
|
||||
else:
|
||||
site_name, rest = after, ''
|
||||
return rest.strip(), site_name.strip()
|
||||
# Inline key=value form anywhere
|
||||
m = re.search(r"\bsite=([^;\s]+)", q_strip, flags=re.IGNORECASE)
|
||||
if m:
|
||||
site_name = m.group(1).strip()
|
||||
# remove the token from query
|
||||
q_new = re.sub(r"\bsite=[^;\s]+;?", "", q_strip, flags=re.IGNORECASE).strip()
|
||||
return q_new, site_name
|
||||
except Exception:
|
||||
pass
|
||||
return q, None
|
||||
|
||||
searchQuery, extracted_site = _extract_site_hint(searchQuery)
|
||||
if extracted_site:
|
||||
searchOptions["site_hint"] = extracted_site
|
||||
logger.info(f"Extracted site hint: '{extracted_site}'")
|
||||
|
||||
# Extract name="..." if present (for quoted multi-word names)
|
||||
name_match = re.search(r"name=\"([^\"]+)\"", searchQuery)
|
||||
if name_match:
|
||||
searchQuery = name_match.group(1)
|
||||
logger.info(f"Extracted name from quotes: '{searchQuery}'")
|
||||
|
||||
# Check for search mode specification (exact:, regex:, case:, and:)
|
||||
if searchQuery.startswith(("exact:", "regex:", "case:", "and:")):
|
||||
|
|
@ -187,6 +268,7 @@ class MethodSharepoint(MethodBase):
|
|||
else:
|
||||
fileQuery = search_part
|
||||
|
||||
# Use search_part as fileQuery (name extraction already handled above)
|
||||
return pathQuery, fileQuery, searchType, searchOptions
|
||||
|
||||
# No colon - check if it looks like a path
|
||||
|
|
@ -349,6 +431,7 @@ class MethodSharepoint(MethodBase):
|
|||
|
||||
Parameters:
|
||||
connectionReference (str): Reference to the Microsoft connection
|
||||
site (str, optional): SharePoint site name or hint to search within (e.g., "SSS", "KM XYZ"). If not provided, searches all accessible sites
|
||||
searchQuery (str): [path:][type:][mode:]query - Enhanced search syntax:
|
||||
- "budget", "/Documents:budget", "files:budget", "folders:DELTA", "*"
|
||||
- "exact:\"Operations 2025\"" - exact phrase matching
|
||||
|
|
@ -356,7 +439,11 @@ class MethodSharepoint(MethodBase):
|
|||
- "case:DELTA" - case-sensitive search
|
||||
- "and:DELTA AND 2025 Mars AND Group" - all terms must be present
|
||||
- "folders:and:DELTA AND 2025 Mars AND Group" - combined options
|
||||
Note: For storage locations, use "folders:" prefix. All search terms must be present by default.
|
||||
- Site hint support: "folders:site=KM LayerFinance;name=Work" or "folders:site:KM LayerFinance Work"
|
||||
- For quoted names: "folders:site=KM;name=\"page staten\""
|
||||
- For folder search: words like "part1 part2" will search for folders containing BOTH terms
|
||||
Note: For storage locations, use "folders:" prefix. When searching for folders, multiple words are treated as search terms that must all appear in the folder name or path.
|
||||
Site hints help narrow search to specific SharePoint sites for better accuracy.
|
||||
resultDocument (str, optional): JSON result document from previous findDocumentPath action to refine search
|
||||
searchScope (str, optional): Search scope - options: "all" (default), "documents" (files only), "pages" (SharePoint pages only)
|
||||
maxResults (int, optional): Maximum number of results to return (default: 100)
|
||||
|
|
@ -364,6 +451,7 @@ class MethodSharepoint(MethodBase):
|
|||
"""
|
||||
try:
|
||||
connectionReference = parameters.get("connectionReference")
|
||||
site = parameters.get("site")
|
||||
searchQuery = parameters.get("searchQuery", "*")
|
||||
resultDocument = parameters.get("resultDocument")
|
||||
searchScope = parameters.get("searchScope", "all")
|
||||
|
|
@ -415,6 +503,13 @@ class MethodSharepoint(MethodBase):
|
|||
if not sites:
|
||||
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
||||
|
||||
# Filter sites by site parameter if provided
|
||||
if site:
|
||||
sites = self._filter_sites_by_hint(sites, site)
|
||||
logger.info(f"Filtered sites by site parameter: '{site}' -> {len(sites)} sites")
|
||||
if not sites:
|
||||
return ActionResult.isFailure(error=f"No SharePoint sites found matching '{site}'")
|
||||
|
||||
# Resolve path query into search paths
|
||||
search_paths = self._resolvePathQuery(pathQuery)
|
||||
|
||||
|
|
@ -423,80 +518,206 @@ class MethodSharepoint(MethodBase):
|
|||
found_documents = []
|
||||
all_sites_searched = []
|
||||
|
||||
for site in sites:
|
||||
# Apply site hint filtering if provided in search options
|
||||
site_scoped_sites = sites
|
||||
strict_folder_name: Optional[str] = None
|
||||
|
||||
# First check for explicit site hint in search options
|
||||
if searchOptions.get("site_hint"):
|
||||
site_scoped_sites = self._filter_sites_by_hint(sites, searchOptions["site_hint"])
|
||||
logger.info(f"Filtered sites by explicit site hint: '{searchOptions['site_hint']}' -> {len(site_scoped_sites)} sites")
|
||||
|
||||
# Heuristic: if user searched for folders with pattern "<siteHint> <folderName>",
|
||||
# prefer filtering sites by the first token(s) and match folder name exactly for the last token
|
||||
elif searchType == "folders" and fileQuery and ' ' in fileQuery and not searchOptions.get("regex_match"):
|
||||
# treat last token as folder name, preceding tokens combined as site hint
|
||||
tokens = [t for t in fileQuery.split(' ') if t]
|
||||
if len(tokens) >= 2:
|
||||
strict_folder_name = tokens[-1]
|
||||
site_hint = ' '.join(tokens[:-1])
|
||||
site_scoped_sites = self._filter_sites_by_hint(sites, site_hint)
|
||||
logger.info(f"Filtered sites by heuristic site hint: '{site_hint}' -> {len(site_scoped_sites)} sites")
|
||||
|
||||
for site in site_scoped_sites:
|
||||
site_id = site["id"]
|
||||
site_name = site["displayName"]
|
||||
site_url = site["webUrl"]
|
||||
|
||||
logger.info(f"Searching in site: {site_name} ({site_url})")
|
||||
|
||||
# Use Microsoft Graph search API for this specific site
|
||||
# Use Microsoft Graph API for this specific site
|
||||
# Handle empty or wildcard queries
|
||||
if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
|
||||
# For wildcard/empty queries, list all items in the drive
|
||||
endpoint = f"sites/{site_id}/drive/root/children"
|
||||
else:
|
||||
# For specific queries, use search API
|
||||
search_query = fileQuery.replace("'", "''") # Escape single quotes for OData
|
||||
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
|
||||
|
||||
# Make the search API call
|
||||
search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
|
||||
|
||||
if "error" in search_result:
|
||||
logger.warning(f"Search failed for site {site_name}: {search_result['error']}")
|
||||
continue
|
||||
|
||||
# Process search results for this site
|
||||
items = search_result.get("value", [])
|
||||
# For specific queries, use different approaches based on search type
|
||||
if searchType == "folders":
|
||||
# Use Microsoft Graph unified search endpoint: POST /search/query
|
||||
# Scope by all drives in the site (e.g., Shared Documents, Documents, language variants)
|
||||
try:
|
||||
import json
|
||||
# Discover drives for the site to build precise path scopes
|
||||
drives_resp = await self._makeGraphApiCall(connection["accessToken"], f"sites/{site_id}/drives")
|
||||
path_filters = []
|
||||
if not ("error" in drives_resp):
|
||||
for drv in (drives_resp.get("value", []) or []):
|
||||
web_url = (drv.get("webUrl") or "").rstrip('/') + '/'
|
||||
if web_url:
|
||||
# path:"<drive webUrl>/"
|
||||
path_filters.append(f"path:\"{web_url}\"")
|
||||
if not path_filters:
|
||||
# fallback to site root if no drives found
|
||||
scoped_path = site_url.rstrip('/') + '/'
|
||||
path_filters = [f"path:\"{scoped_path}\""]
|
||||
|
||||
# Use KQL syntax for folder search
|
||||
terms = [t for t in fileQuery.split() if t.strip()]
|
||||
if len(terms) > 1:
|
||||
# Multiple terms: first search for folders containing ANY of the terms (OR)
|
||||
# This broadens the search to catch all potential matches
|
||||
name_terms = " OR ".join([f"foldername:*{t}*" for t in terms])
|
||||
name_filter = f"({name_terms})"
|
||||
else:
|
||||
# Single term: search for folders containing the term
|
||||
single_term = terms[0] if terms else fileQuery
|
||||
name_filter = f"foldername:*{single_term}*"
|
||||
|
||||
# Use KQL syntax with isFolder:true
|
||||
query_string = f"isFolder:true AND {name_filter}"
|
||||
logger.info(f"Using KQL query: {query_string}")
|
||||
|
||||
payload = {
|
||||
"requests": [
|
||||
{
|
||||
"entityTypes": ["driveItem"],
|
||||
"query": {"queryString": query_string},
|
||||
"from": 0,
|
||||
"size": 50
|
||||
}
|
||||
]
|
||||
}
|
||||
logger.info(f"Using unified search API for folders with queryString: {query_string}")
|
||||
logger.info(f"Payload: {json.dumps(payload, indent=2)}")
|
||||
unified_result = await self._makeGraphApiCall(
|
||||
connection["accessToken"],
|
||||
"search/query",
|
||||
method="POST",
|
||||
data=json.dumps(payload).encode("utf-8")
|
||||
)
|
||||
logger.info(f"Unified search response: {json.dumps(unified_result, indent=2)}")
|
||||
if "error" in unified_result:
|
||||
logger.warning(f"Unified search failed for site {site_name}: {unified_result['error']}")
|
||||
items = []
|
||||
else:
|
||||
# Flatten hits -> driveItem resources
|
||||
items = []
|
||||
for container in (unified_result.get("value", []) or []):
|
||||
for hits_container in (container.get("hitsContainers", []) or []):
|
||||
for hit in (hits_container.get("hits", []) or []):
|
||||
resource = hit.get("resource")
|
||||
if resource:
|
||||
items.append(resource)
|
||||
logger.info(f"Unified search returned {len(items)} items (pre-filter)")
|
||||
|
||||
# Post-filter: For multiple terms, filter results to only include folders that contain ALL terms
|
||||
if len(terms) > 1:
|
||||
filtered_items = []
|
||||
for item in items:
|
||||
folder_name = item.get("name", "").lower()
|
||||
# Check if folder name contains ALL search terms
|
||||
if all(term.lower() in folder_name for term in terms):
|
||||
filtered_items.append(item)
|
||||
items = filtered_items
|
||||
logger.info(f"Post-filtered to {len(items)} items containing all terms: {terms}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error performing unified folder search: {str(e)}")
|
||||
items = []
|
||||
else:
|
||||
# For files, use regular search API
|
||||
search_query = fileQuery.replace("'", "''") # Escape single quotes for OData
|
||||
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
|
||||
logger.info(f"Using search API for files with query: '{search_query}'")
|
||||
|
||||
# Make the search API call (files)
|
||||
search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
|
||||
if "error" in search_result:
|
||||
logger.warning(f"Search failed for site {site_name}: {search_result['error']}")
|
||||
continue
|
||||
# Process search results for this site (files)
|
||||
items = search_result.get("value", [])
|
||||
logger.info(f"Retrieved {len(items)} items from site {site_name}")
|
||||
site_documents = []
|
||||
|
||||
for item in items:
|
||||
item_name = item.get("name", "")
|
||||
item_type = "folder" if "folder" in item else "file"
|
||||
item_path = item.get("parentReference", {}).get("path", "")
|
||||
logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")
|
||||
|
||||
# Filter by search scope if specified
|
||||
if searchScope == "documents" and "folder" in item:
|
||||
logger.debug(f"Skipping folder '{item_name}' due to documents scope")
|
||||
continue
|
||||
elif searchScope == "pages" and "file" in item and not item["file"].get("mimeType", "").startswith("text/html"):
|
||||
logger.debug(f"Skipping file '{item_name}' due to pages scope")
|
||||
continue
|
||||
|
||||
# Filter by search type (files, folders, all)
|
||||
if searchType == "files" and "folder" in item:
|
||||
logger.debug(f"Skipping folder '{item_name}' due to files search type")
|
||||
continue
|
||||
elif searchType == "folders" and "file" in item:
|
||||
logger.debug(f"Skipping file '{item_name}' due to folders search type")
|
||||
continue
|
||||
|
||||
# Enhanced post-filtering based on search options
|
||||
item_name = item.get("name", "")
|
||||
if fileQuery != "*" and fileQuery.strip():
|
||||
if fileQuery != "*" and fileQuery.strip() and searchType != "folders":
|
||||
# For non-folder searches, apply name filtering
|
||||
# (Folder searches are already filtered by the recursive search)
|
||||
search_target = item_name
|
||||
|
||||
# Apply different filtering based on search options
|
||||
if searchOptions.get("exact_match"):
|
||||
# Exact phrase matching
|
||||
if searchOptions.get("case_sensitive"):
|
||||
if fileQuery not in item_name:
|
||||
if fileQuery not in search_target:
|
||||
continue
|
||||
else:
|
||||
if fileQuery.lower() not in item_name.lower():
|
||||
if fileQuery.lower() not in search_target.lower():
|
||||
continue
|
||||
elif searchOptions.get("regex_match"):
|
||||
# Regex pattern matching
|
||||
import re
|
||||
flags = 0 if searchOptions.get("case_sensitive") else re.IGNORECASE
|
||||
if not re.search(fileQuery, item_name, flags):
|
||||
if not re.search(fileQuery, search_target, flags):
|
||||
continue
|
||||
elif searchOptions.get("and_terms"):
|
||||
# AND terms mode: Split by " AND " and ensure ALL terms are present
|
||||
search_name = item_name.lower() if not searchOptions.get("case_sensitive") else item_name
|
||||
search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target
|
||||
and_terms = [term.strip() for term in fileQuery.split(" AND ") if term.strip()]
|
||||
and_terms = [term.lower() if not searchOptions.get("case_sensitive") else term for term in and_terms]
|
||||
if not all(term in search_name for term in and_terms):
|
||||
continue # Skip this item if not all AND terms match
|
||||
else:
|
||||
# Default: ALL search terms must be present (space-separated)
|
||||
search_name = item_name.lower() if not searchOptions.get("case_sensitive") else item_name
|
||||
search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target
|
||||
search_terms = [term.strip().lower() if not searchOptions.get("case_sensitive") else term.strip()
|
||||
for term in fileQuery.split() if term.strip()]
|
||||
if not all(term in search_name for term in search_terms):
|
||||
continue # Skip this item if not all terms match
|
||||
|
||||
# If strict folder name requested, enforce exact (case-insensitive) match on folders
|
||||
if strict_folder_name:
|
||||
item_is_folder = "folder" in item
|
||||
item_name_ci = (item.get("name") or "").strip().lower()
|
||||
if item_is_folder and item_name_ci != strict_folder_name.lower():
|
||||
logger.debug(f"Skipping folder '{item_name}' - doesn't match strict name '{strict_folder_name}'")
|
||||
continue
|
||||
|
||||
logger.debug(f"Item '{item_name}' passed all filters - adding to results")
|
||||
|
||||
# Create minimal result with only essential reference information
|
||||
doc_info = {
|
||||
"id": item.get("id"),
|
||||
|
|
@ -804,15 +1025,17 @@ class MethodSharepoint(MethodBase):
|
|||
|
||||
Parameters:
|
||||
connectionReference (str): Reference to the Microsoft connection
|
||||
pathQuery (str): Path query where to upload documents (e.g., "/Documents/Project1", "*" for default location)
|
||||
sitePath (str): REQUIRED - Specific SharePoint path where to upload documents. Must be a valid SharePoint path format:
|
||||
- For direct upload: "/site:<Site Name>/<Library>/<Folder Path>" (e.g., "/site:KM XYZ/Documents/Work")
|
||||
- If user provides words like "word1 word2", the system MUST call findDocumentPath first to locate the actual folder path, the result then to give to this parameter
|
||||
documentList (str): Reference to the document list to upload
|
||||
fileNames (List[str]): List of names for the uploaded files
|
||||
resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to pathQuery)
|
||||
resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to sitePath)
|
||||
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||
"""
|
||||
try:
|
||||
connectionReference = parameters.get("connectionReference")
|
||||
pathQuery = parameters.get("pathQuery", "/Documents")
|
||||
sitePath = parameters.get("sitePath", "/Documents")
|
||||
documentList = parameters.get("documentList")
|
||||
fileNames = parameters.get("fileNames")
|
||||
resultDocument = parameters.get("resultDocument")
|
||||
|
|
@ -847,9 +1070,9 @@ class MethodSharepoint(MethodBase):
|
|||
folder_ids.append(doc.get("id"))
|
||||
|
||||
if folder_ids:
|
||||
# Use the first folder ID found as pathQuery
|
||||
pathQuery = folder_ids[0]
|
||||
logger.info(f"Using folder ID from resultDocument: {pathQuery}")
|
||||
# Use the first folder ID found as sitePath
|
||||
sitePath = folder_ids[0]
|
||||
logger.info(f"Using folder ID from resultDocument: {sitePath}")
|
||||
else:
|
||||
return ActionResult.isFailure(error="No folders found in resultDocument")
|
||||
|
||||
|
|
@ -874,9 +1097,35 @@ class MethodSharepoint(MethodBase):
|
|||
sites = await self._discoverSharePointSites(connection["accessToken"])
|
||||
if not sites:
|
||||
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
||||
|
||||
# Resolve path query into upload paths
|
||||
upload_paths = self._resolvePathQuery(pathQuery)
|
||||
|
||||
# Enforce site-scoped path usage when using sitePath directly (without resultDocument)
|
||||
upload_site_scope = None
|
||||
if not resultDocument:
|
||||
if not sitePath or not sitePath.startswith('/'):
|
||||
return ActionResult.isFailure(error="sitePath must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")
|
||||
|
||||
# Check if sitePath contains search terms (words without proper path structure)
|
||||
if not sitePath.startswith('/site:') and not sitePath.startswith('/Documents') and not sitePath.startswith('/Shared Documents'):
|
||||
# This looks like search terms, not a valid path
|
||||
return ActionResult.isFailure(error=f"Invalid sitePath '{sitePath}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as sitePath.")
|
||||
|
||||
parsed = self._parse_site_scoped_path(sitePath)
|
||||
if not parsed:
|
||||
return ActionResult.isFailure(error="Invalid sitePath. Use /site:<Site Display Name>/<Library or Folder Path>")
|
||||
# find matching site
|
||||
candidate_sites = self._filter_sites_by_hint(sites, parsed["siteName"]) # substring match
|
||||
# choose exact displayName match if available
|
||||
exact = [s for s in candidate_sites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()]
|
||||
selected_site = exact[0] if exact else (candidate_sites[0] if candidate_sites else None)
|
||||
if not selected_site:
|
||||
return ActionResult.isFailure(error=f"SharePoint site '{parsed['siteName']}' not found or not accessible")
|
||||
upload_site_scope = selected_site
|
||||
# Use the inner path portion as the actual upload target path
|
||||
upload_paths = [f"/{parsed['innerPath'].lstrip('/')}"]
|
||||
sites = [selected_site]
|
||||
else:
|
||||
# Resolve path query into upload paths (fallback behavior when using resultDocument)
|
||||
upload_paths = self._resolvePathQuery(sitePath)
|
||||
|
||||
# Process each document upload
|
||||
upload_results = []
|
||||
|
|
@ -963,7 +1212,7 @@ class MethodSharepoint(MethodBase):
|
|||
# Create result data
|
||||
result_data = {
|
||||
"connectionReference": connectionReference,
|
||||
"pathQuery": pathQuery,
|
||||
"sitePath": sitePath,
|
||||
"documentList": documentList,
|
||||
"fileNames": fileNames,
|
||||
"sitesAvailable": len(sites),
|
||||
|
|
|
|||
|
|
@ -120,7 +120,7 @@ async def login(
|
|||
access_type="offline",
|
||||
include_granted_scopes="true",
|
||||
state=state_param,
|
||||
prompt="select_account"
|
||||
prompt="consent select_account"
|
||||
)
|
||||
|
||||
logger.info(f"Generated Google OAuth URL using OAuth2Session: {auth_url}")
|
||||
|
|
@ -166,6 +166,33 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
"token_type": token_data.get("token_type", "bearer"),
|
||||
"expires_in": token_data.get("expires_in", 0)
|
||||
}
|
||||
|
||||
# If Google did not return a refresh_token, try to reuse an existing one for this user/connection
|
||||
if not token_response.get("refresh_token"):
|
||||
try:
|
||||
rootInterface = getRootInterface()
|
||||
# Prefer connection flow reuse; fallback to user access token
|
||||
if connection_id:
|
||||
existing_tokens = rootInterface.db.getRecordset("tokens", recordFilter={
|
||||
"connectionId": connection_id,
|
||||
"authority": AuthAuthority.GOOGLE
|
||||
})
|
||||
if existing_tokens:
|
||||
# Use most recent by createdAt
|
||||
existing_tokens.sort(key=lambda x: x.get("createdAt", 0), reverse=True)
|
||||
token_response["refresh_token"] = existing_tokens[0].get("tokenRefresh", "")
|
||||
if not token_response.get("refresh_token") and user_id:
|
||||
existing_access_tokens = rootInterface.db.getRecordset("tokens", recordFilter={
|
||||
"userId": user_id,
|
||||
"connectionId": None,
|
||||
"authority": AuthAuthority.GOOGLE
|
||||
})
|
||||
if existing_access_tokens:
|
||||
existing_access_tokens.sort(key=lambda x: x.get("createdAt", 0), reverse=True)
|
||||
token_response["refresh_token"] = existing_access_tokens[0].get("tokenRefresh", "")
|
||||
except Exception:
|
||||
# Non-fatal; continue without refresh token
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -98,8 +98,7 @@ class TokenManager:
|
|||
"client_id": self.google_client_id,
|
||||
"client_secret": self.google_client_secret,
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token,
|
||||
"scope": "https://www.googleapis.com/auth/gmail.readonly https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email openid"
|
||||
"refresh_token": refresh_token
|
||||
}
|
||||
|
||||
# Make refresh request
|
||||
|
|
|
|||
Loading…
Reference in a new issue