fixes sharepoint search
This commit is contained in:
parent
9644514e60
commit
c2d3877b1e
4 changed files with 423 additions and 46 deletions
|
|
@ -11,6 +11,85 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Prompt creation helpers extracted from managerChat.py
|
# Prompt creation helpers extracted from managerChat.py
|
||||||
|
|
||||||
|
def _getPreviousRoundContext(service, workflow) -> str:
|
||||||
|
"""Get context from previous workflow rounds to help understand follow-up prompts"""
|
||||||
|
try:
|
||||||
|
if not workflow or not hasattr(workflow, 'messages') or not workflow.messages:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Get current round number
|
||||||
|
current_round = getattr(workflow, 'currentRound', 0)
|
||||||
|
|
||||||
|
# If this is round 0 or 1, there's no previous context
|
||||||
|
if current_round <= 1:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Find messages from previous rounds (rounds before current)
|
||||||
|
previous_messages = []
|
||||||
|
for message in workflow.messages:
|
||||||
|
message_round = getattr(message, 'roundNumber', 0)
|
||||||
|
if message_round > 0 and message_round < current_round:
|
||||||
|
previous_messages.append(message)
|
||||||
|
|
||||||
|
if not previous_messages:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Sort by round number and sequence to get chronological order
|
||||||
|
previous_messages.sort(key=lambda msg: (getattr(msg, 'roundNumber', 0), getattr(msg, 'sequenceNr', 0)))
|
||||||
|
|
||||||
|
# Build context summary
|
||||||
|
context_parts = []
|
||||||
|
current_round_context = {}
|
||||||
|
|
||||||
|
for message in previous_messages:
|
||||||
|
round_num = getattr(message, 'roundNumber', 0)
|
||||||
|
if round_num not in current_round_context:
|
||||||
|
current_round_context[round_num] = {
|
||||||
|
'user_inputs': [],
|
||||||
|
'assistant_responses': [],
|
||||||
|
'task_outcomes': [],
|
||||||
|
'documents_processed': []
|
||||||
|
}
|
||||||
|
|
||||||
|
# Categorize messages
|
||||||
|
if message.role == 'user':
|
||||||
|
current_round_context[round_num]['user_inputs'].append(message.message)
|
||||||
|
elif message.role == 'assistant':
|
||||||
|
# Check if it's a task completion or error message
|
||||||
|
if 'task' in message.message.lower() and ('completed' in message.message.lower() or 'failed' in message.message.lower() or 'error' in message.message.lower()):
|
||||||
|
current_round_context[round_num]['task_outcomes'].append(message.message)
|
||||||
|
else:
|
||||||
|
current_round_context[round_num]['assistant_responses'].append(message.message)
|
||||||
|
|
||||||
|
# Check for document processing
|
||||||
|
if hasattr(message, 'documents') and message.documents:
|
||||||
|
doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')]
|
||||||
|
if doc_names:
|
||||||
|
current_round_context[round_num]['documents_processed'].extend(doc_names)
|
||||||
|
|
||||||
|
# Build context summary
|
||||||
|
for round_num in sorted(current_round_context.keys()):
|
||||||
|
round_data = current_round_context[round_num]
|
||||||
|
context_parts.append(f"ROUND {round_num} CONTEXT:")
|
||||||
|
|
||||||
|
if round_data['user_inputs']:
|
||||||
|
context_parts.append(f" User requests: {'; '.join(round_data['user_inputs'])}")
|
||||||
|
|
||||||
|
if round_data['task_outcomes']:
|
||||||
|
context_parts.append(f" Task outcomes: {'; '.join(round_data['task_outcomes'])}")
|
||||||
|
|
||||||
|
if round_data['documents_processed']:
|
||||||
|
context_parts.append(f" Documents processed: {', '.join(set(round_data['documents_processed']))}")
|
||||||
|
|
||||||
|
if context_parts:
|
||||||
|
return "\n".join(context_parts)
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting previous round context: {str(e)}")
|
||||||
|
return ""
|
||||||
|
|
||||||
def createTaskPlanningPrompt(context: TaskContext, service) -> str:
|
def createTaskPlanningPrompt(context: TaskContext, service) -> str:
|
||||||
"""Create enhanced prompt for task planning with user-friendly message generation and language detection"""
|
"""Create enhanced prompt for task planning with user-friendly message generation and language detection"""
|
||||||
# Get user language directly from service.user.language
|
# Get user language directly from service.user.language
|
||||||
|
|
@ -22,21 +101,29 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str:
|
||||||
# Extract available documents from context - use Pydantic model directly
|
# Extract available documents from context - use Pydantic model directly
|
||||||
available_documents = context.available_documents or "No documents available"
|
available_documents = context.available_documents or "No documents available"
|
||||||
|
|
||||||
|
# Get previous workflow round context for better understanding of follow-up prompts
|
||||||
|
previous_round_context = _getPreviousRoundContext(service, context.workflow)
|
||||||
|
|
||||||
return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages.
|
return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages.
|
||||||
|
|
||||||
USER REQUEST: {user_request}
|
USER REQUEST: {user_request}
|
||||||
|
|
||||||
AVAILABLE DOCUMENTS: {available_documents}
|
AVAILABLE DOCUMENTS: {available_documents}
|
||||||
|
|
||||||
|
PREVIOUS WORKFLOW ROUNDS CONTEXT:
|
||||||
|
{previous_round_context if previous_round_context else "No previous workflow rounds - this is the first round."}
|
||||||
|
|
||||||
INSTRUCTIONS:
|
INSTRUCTIONS:
|
||||||
1. Analyze the user request and available documents
|
1. Analyze the user request, available documents, and previous workflow rounds context
|
||||||
2. Group related topics and sequential steps into single, comprehensive tasks
|
2. If the user request appears to be a follow-up (like "try again", "versuche es nochmals", "retry", etc.),
|
||||||
3. Focus on business outcomes, not technical operations
|
use the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what the user wants to retry or continue
|
||||||
4. Each task should produce meaningful, usable outputs
|
3. Group related topics and sequential steps into single, comprehensive tasks
|
||||||
5. Ensure proper handover between tasks using result labels
|
4. Focus on business outcomes, not technical operations
|
||||||
6. Detect the language of the user request and include it in languageUserDetected
|
5. Each task should produce meaningful, usable outputs
|
||||||
7. Generate user-friendly messages for each task in the user's request language
|
6. Ensure proper handover between tasks using result labels
|
||||||
8. Return a JSON object with the exact structure shown below
|
7. Detect the language of the user request and include it in languageUserDetected
|
||||||
|
8. Generate user-friendly messages for each task in the user's request language
|
||||||
|
9. Return a JSON object with the exact structure shown below
|
||||||
|
|
||||||
TASK GROUPING PRINCIPLES:
|
TASK GROUPING PRINCIPLES:
|
||||||
- COMBINE RELATED TOPICS: Group related subjects, sequential steps, or workflow-structured activities into single tasks
|
- COMBINE RELATED TOPICS: Group related subjects, sequential steps, or workflow-structured activities into single tasks
|
||||||
|
|
@ -67,6 +154,21 @@ TASK PLANNING PRINCIPLES:
|
||||||
- Group related activities to minimize task fragmentation
|
- Group related activities to minimize task fragmentation
|
||||||
- Only create multiple tasks when dealing with truly different, independent objectives
|
- Only create multiple tasks when dealing with truly different, independent objectives
|
||||||
|
|
||||||
|
FOLLOW-UP PROMPT HANDLING:
|
||||||
|
- If the user request is a follow-up (e.g., "try again", "versuche es nochmals", "retry", "continue", "proceed"),
|
||||||
|
analyze the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what failed or was incomplete
|
||||||
|
- Use the previous round's user requests and task outcomes to determine what the user wants to retry
|
||||||
|
- If previous rounds failed due to missing documents, and documents are now available,
|
||||||
|
create tasks that use the newly available documents to accomplish the original request
|
||||||
|
- Maintain the same business objective from previous rounds but adapt to current available resources
|
||||||
|
|
||||||
|
SPECIFIC SCENARIO HANDLING:
|
||||||
|
- If previous round failed with "documents missing" error and current round has documents available,
|
||||||
|
the user likely wants to retry the same operation with the newly provided documents
|
||||||
|
- Example: Previous round "speichere mir die 3 dokumente im sharepoint unter xxx" failed due to missing documents,
|
||||||
|
current round "versuche es nochmals" with documents should retry the SharePoint save operation
|
||||||
|
- Always check if the current request is a retry by looking for retry keywords and previous round context
|
||||||
|
|
||||||
REQUIRED JSON STRUCTURE:
|
REQUIRED JSON STRUCTURE:
|
||||||
{{
|
{{
|
||||||
"overview": "Brief description of the overall plan",
|
"overview": "Brief description of the overall plan",
|
||||||
|
|
|
||||||
|
|
@ -117,6 +117,52 @@ class MethodSharepoint(MethodBase):
|
||||||
logger.error(f"Error discovering SharePoint sites: {str(e)}")
|
logger.error(f"Error discovering SharePoint sites: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
def _filter_sites_by_hint(self, sites: List[Dict[str, Any]], site_hint: str) -> List[Dict[str, Any]]:
|
||||||
|
"""Filter discovered sites by a human-entered site hint (case-insensitive substring)."""
|
||||||
|
try:
|
||||||
|
if not site_hint:
|
||||||
|
return sites
|
||||||
|
hint = site_hint.strip().lower()
|
||||||
|
filtered: List[Dict[str, Any]] = []
|
||||||
|
for site in sites:
|
||||||
|
name = (site.get("displayName") or "").lower()
|
||||||
|
web_url = (site.get("webUrl") or "").lower()
|
||||||
|
if hint in name or hint in web_url:
|
||||||
|
filtered.append(site)
|
||||||
|
return filtered if filtered else sites
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error filtering sites by hint '{site_hint}': {str(e)}")
|
||||||
|
return sites
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_site_scoped_path(self, path_query: str) -> Optional[Dict[str, str]]:
|
||||||
|
"""
|
||||||
|
Parse a site-scoped path of the form:
|
||||||
|
/site:KM LayerFinance/Documents/Work or /site:KM LayerFinance/Shared Documents/Work
|
||||||
|
|
||||||
|
Returns dict with keys: siteName, innerPath (no leading slash) on success, else None.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not path_query or not path_query.startswith('/'):
|
||||||
|
return None
|
||||||
|
# expected syntax prefix
|
||||||
|
prefix = '/site:'
|
||||||
|
if not path_query.startswith(prefix):
|
||||||
|
return None
|
||||||
|
remainder = path_query[len(prefix):]
|
||||||
|
# split once on the next '/'
|
||||||
|
if '/' not in remainder:
|
||||||
|
return None
|
||||||
|
site_name, inner = remainder.split('/', 1)
|
||||||
|
site_name = site_name.strip()
|
||||||
|
inner_path = inner.strip()
|
||||||
|
if not site_name or not inner_path:
|
||||||
|
return None
|
||||||
|
return {"siteName": site_name, "innerPath": inner_path}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error parsing site-scoped path '{path_query}': {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]:
|
def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]:
|
||||||
"""
|
"""
|
||||||
Parse searchQuery to extract path, search terms, search type, and search options.
|
Parse searchQuery to extract path, search terms, search type, and search options.
|
||||||
|
|
@ -141,13 +187,48 @@ class MethodSharepoint(MethodBase):
|
||||||
|
|
||||||
searchQuery = searchQuery.strip()
|
searchQuery = searchQuery.strip()
|
||||||
searchOptions = {}
|
searchOptions = {}
|
||||||
|
|
||||||
# Check for search type specification (files:, folders:, all:)
|
# Check for search type specification (files:, folders:, all:) FIRST
|
||||||
searchType = "all" # Default
|
searchType = "all" # Default
|
||||||
if searchQuery.startswith(("files:", "folders:", "all:")):
|
if searchQuery.startswith(("files:", "folders:", "all:")):
|
||||||
type_parts = searchQuery.split(':', 1)
|
type_parts = searchQuery.split(':', 1)
|
||||||
searchType = type_parts[0].strip()
|
searchType = type_parts[0].strip()
|
||||||
searchQuery = type_parts[1].strip()
|
searchQuery = type_parts[1].strip()
|
||||||
|
|
||||||
|
# Extract optional site hint tokens: support "site=Name" or leading "site:Name"
|
||||||
|
def _extract_site_hint(q: str) -> tuple[str, Optional[str]]:
|
||||||
|
try:
|
||||||
|
q_strip = q.strip()
|
||||||
|
# Leading form: site:KM LayerFinance ...
|
||||||
|
if q_strip.lower().startswith("site:"):
|
||||||
|
after = q_strip[5:].lstrip()
|
||||||
|
# site name until next space or end
|
||||||
|
if ' ' in after:
|
||||||
|
site_name, rest = after.split(' ', 1)
|
||||||
|
else:
|
||||||
|
site_name, rest = after, ''
|
||||||
|
return rest.strip(), site_name.strip()
|
||||||
|
# Inline key=value form anywhere
|
||||||
|
m = re.search(r"\bsite=([^;\s]+)", q_strip, flags=re.IGNORECASE)
|
||||||
|
if m:
|
||||||
|
site_name = m.group(1).strip()
|
||||||
|
# remove the token from query
|
||||||
|
q_new = re.sub(r"\bsite=[^;\s]+;?", "", q_strip, flags=re.IGNORECASE).strip()
|
||||||
|
return q_new, site_name
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return q, None
|
||||||
|
|
||||||
|
searchQuery, extracted_site = _extract_site_hint(searchQuery)
|
||||||
|
if extracted_site:
|
||||||
|
searchOptions["site_hint"] = extracted_site
|
||||||
|
logger.info(f"Extracted site hint: '{extracted_site}'")
|
||||||
|
|
||||||
|
# Extract name="..." if present (for quoted multi-word names)
|
||||||
|
name_match = re.search(r"name=\"([^\"]+)\"", searchQuery)
|
||||||
|
if name_match:
|
||||||
|
searchQuery = name_match.group(1)
|
||||||
|
logger.info(f"Extracted name from quotes: '{searchQuery}'")
|
||||||
|
|
||||||
# Check for search mode specification (exact:, regex:, case:, and:)
|
# Check for search mode specification (exact:, regex:, case:, and:)
|
||||||
if searchQuery.startswith(("exact:", "regex:", "case:", "and:")):
|
if searchQuery.startswith(("exact:", "regex:", "case:", "and:")):
|
||||||
|
|
@ -187,6 +268,7 @@ class MethodSharepoint(MethodBase):
|
||||||
else:
|
else:
|
||||||
fileQuery = search_part
|
fileQuery = search_part
|
||||||
|
|
||||||
|
# Use search_part as fileQuery (name extraction already handled above)
|
||||||
return pathQuery, fileQuery, searchType, searchOptions
|
return pathQuery, fileQuery, searchType, searchOptions
|
||||||
|
|
||||||
# No colon - check if it looks like a path
|
# No colon - check if it looks like a path
|
||||||
|
|
@ -349,6 +431,7 @@ class MethodSharepoint(MethodBase):
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
connectionReference (str): Reference to the Microsoft connection
|
connectionReference (str): Reference to the Microsoft connection
|
||||||
|
site (str, optional): SharePoint site name or hint to search within (e.g., "SSS", "KM XYZ"). If not provided, searches all accessible sites
|
||||||
searchQuery (str): [path:][type:][mode:]query - Enhanced search syntax:
|
searchQuery (str): [path:][type:][mode:]query - Enhanced search syntax:
|
||||||
- "budget", "/Documents:budget", "files:budget", "folders:DELTA", "*"
|
- "budget", "/Documents:budget", "files:budget", "folders:DELTA", "*"
|
||||||
- "exact:\"Operations 2025\"" - exact phrase matching
|
- "exact:\"Operations 2025\"" - exact phrase matching
|
||||||
|
|
@ -356,7 +439,11 @@ class MethodSharepoint(MethodBase):
|
||||||
- "case:DELTA" - case-sensitive search
|
- "case:DELTA" - case-sensitive search
|
||||||
- "and:DELTA AND 2025 Mars AND Group" - all terms must be present
|
- "and:DELTA AND 2025 Mars AND Group" - all terms must be present
|
||||||
- "folders:and:DELTA AND 2025 Mars AND Group" - combined options
|
- "folders:and:DELTA AND 2025 Mars AND Group" - combined options
|
||||||
Note: For storage locations, use "folders:" prefix. All search terms must be present by default.
|
- Site hint support: "folders:site=KM LayerFinance;name=Work" or "folders:site:KM LayerFinance Work"
|
||||||
|
- For quoted names: "folders:site=KM;name=\"page staten\""
|
||||||
|
- For folder search: words like "part1 part2" will search for folders containing BOTH terms
|
||||||
|
Note: For storage locations, use "folders:" prefix. When searching for folders, multiple words are treated as search terms that must all appear in the folder name or path.
|
||||||
|
Site hints help narrow search to specific SharePoint sites for better accuracy.
|
||||||
resultDocument (str, optional): JSON result document from previous findDocumentPath action to refine search
|
resultDocument (str, optional): JSON result document from previous findDocumentPath action to refine search
|
||||||
searchScope (str, optional): Search scope - options: "all" (default), "documents" (files only), "pages" (SharePoint pages only)
|
searchScope (str, optional): Search scope - options: "all" (default), "documents" (files only), "pages" (SharePoint pages only)
|
||||||
maxResults (int, optional): Maximum number of results to return (default: 100)
|
maxResults (int, optional): Maximum number of results to return (default: 100)
|
||||||
|
|
@ -364,6 +451,7 @@ class MethodSharepoint(MethodBase):
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
|
site = parameters.get("site")
|
||||||
searchQuery = parameters.get("searchQuery", "*")
|
searchQuery = parameters.get("searchQuery", "*")
|
||||||
resultDocument = parameters.get("resultDocument")
|
resultDocument = parameters.get("resultDocument")
|
||||||
searchScope = parameters.get("searchScope", "all")
|
searchScope = parameters.get("searchScope", "all")
|
||||||
|
|
@ -415,6 +503,13 @@ class MethodSharepoint(MethodBase):
|
||||||
if not sites:
|
if not sites:
|
||||||
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
||||||
|
|
||||||
|
# Filter sites by site parameter if provided
|
||||||
|
if site:
|
||||||
|
sites = self._filter_sites_by_hint(sites, site)
|
||||||
|
logger.info(f"Filtered sites by site parameter: '{site}' -> {len(sites)} sites")
|
||||||
|
if not sites:
|
||||||
|
return ActionResult.isFailure(error=f"No SharePoint sites found matching '{site}'")
|
||||||
|
|
||||||
# Resolve path query into search paths
|
# Resolve path query into search paths
|
||||||
search_paths = self._resolvePathQuery(pathQuery)
|
search_paths = self._resolvePathQuery(pathQuery)
|
||||||
|
|
||||||
|
|
@ -423,80 +518,206 @@ class MethodSharepoint(MethodBase):
|
||||||
found_documents = []
|
found_documents = []
|
||||||
all_sites_searched = []
|
all_sites_searched = []
|
||||||
|
|
||||||
for site in sites:
|
# Apply site hint filtering if provided in search options
|
||||||
|
site_scoped_sites = sites
|
||||||
|
strict_folder_name: Optional[str] = None
|
||||||
|
|
||||||
|
# First check for explicit site hint in search options
|
||||||
|
if searchOptions.get("site_hint"):
|
||||||
|
site_scoped_sites = self._filter_sites_by_hint(sites, searchOptions["site_hint"])
|
||||||
|
logger.info(f"Filtered sites by explicit site hint: '{searchOptions['site_hint']}' -> {len(site_scoped_sites)} sites")
|
||||||
|
|
||||||
|
# Heuristic: if user searched for folders with pattern "<siteHint> <folderName>",
|
||||||
|
# prefer filtering sites by the first token(s) and match folder name exactly for the last token
|
||||||
|
elif searchType == "folders" and fileQuery and ' ' in fileQuery and not searchOptions.get("regex_match"):
|
||||||
|
# treat last token as folder name, preceding tokens combined as site hint
|
||||||
|
tokens = [t for t in fileQuery.split(' ') if t]
|
||||||
|
if len(tokens) >= 2:
|
||||||
|
strict_folder_name = tokens[-1]
|
||||||
|
site_hint = ' '.join(tokens[:-1])
|
||||||
|
site_scoped_sites = self._filter_sites_by_hint(sites, site_hint)
|
||||||
|
logger.info(f"Filtered sites by heuristic site hint: '{site_hint}' -> {len(site_scoped_sites)} sites")
|
||||||
|
|
||||||
|
for site in site_scoped_sites:
|
||||||
site_id = site["id"]
|
site_id = site["id"]
|
||||||
site_name = site["displayName"]
|
site_name = site["displayName"]
|
||||||
site_url = site["webUrl"]
|
site_url = site["webUrl"]
|
||||||
|
|
||||||
logger.info(f"Searching in site: {site_name} ({site_url})")
|
logger.info(f"Searching in site: {site_name} ({site_url})")
|
||||||
|
|
||||||
# Use Microsoft Graph search API for this specific site
|
# Use Microsoft Graph API for this specific site
|
||||||
# Handle empty or wildcard queries
|
# Handle empty or wildcard queries
|
||||||
if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
|
if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
|
||||||
# For wildcard/empty queries, list all items in the drive
|
# For wildcard/empty queries, list all items in the drive
|
||||||
endpoint = f"sites/{site_id}/drive/root/children"
|
endpoint = f"sites/{site_id}/drive/root/children"
|
||||||
else:
|
else:
|
||||||
# For specific queries, use search API
|
# For specific queries, use different approaches based on search type
|
||||||
search_query = fileQuery.replace("'", "''") # Escape single quotes for OData
|
if searchType == "folders":
|
||||||
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
|
# Use Microsoft Graph unified search endpoint: POST /search/query
|
||||||
|
# Scope by all drives in the site (e.g., Shared Documents, Documents, language variants)
|
||||||
# Make the search API call
|
try:
|
||||||
search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
|
import json
|
||||||
|
# Discover drives for the site to build precise path scopes
|
||||||
if "error" in search_result:
|
drives_resp = await self._makeGraphApiCall(connection["accessToken"], f"sites/{site_id}/drives")
|
||||||
logger.warning(f"Search failed for site {site_name}: {search_result['error']}")
|
path_filters = []
|
||||||
continue
|
if not ("error" in drives_resp):
|
||||||
|
for drv in (drives_resp.get("value", []) or []):
|
||||||
# Process search results for this site
|
web_url = (drv.get("webUrl") or "").rstrip('/') + '/'
|
||||||
items = search_result.get("value", [])
|
if web_url:
|
||||||
|
# path:"<drive webUrl>/"
|
||||||
|
path_filters.append(f"path:\"{web_url}\"")
|
||||||
|
if not path_filters:
|
||||||
|
# fallback to site root if no drives found
|
||||||
|
scoped_path = site_url.rstrip('/') + '/'
|
||||||
|
path_filters = [f"path:\"{scoped_path}\""]
|
||||||
|
|
||||||
|
# Use KQL syntax for folder search
|
||||||
|
terms = [t for t in fileQuery.split() if t.strip()]
|
||||||
|
if len(terms) > 1:
|
||||||
|
# Multiple terms: first search for folders containing ANY of the terms (OR)
|
||||||
|
# This broadens the search to catch all potential matches
|
||||||
|
name_terms = " OR ".join([f"foldername:*{t}*" for t in terms])
|
||||||
|
name_filter = f"({name_terms})"
|
||||||
|
else:
|
||||||
|
# Single term: search for folders containing the term
|
||||||
|
single_term = terms[0] if terms else fileQuery
|
||||||
|
name_filter = f"foldername:*{single_term}*"
|
||||||
|
|
||||||
|
# Use KQL syntax with isFolder:true
|
||||||
|
query_string = f"isFolder:true AND {name_filter}"
|
||||||
|
logger.info(f"Using KQL query: {query_string}")
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"requests": [
|
||||||
|
{
|
||||||
|
"entityTypes": ["driveItem"],
|
||||||
|
"query": {"queryString": query_string},
|
||||||
|
"from": 0,
|
||||||
|
"size": 50
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
logger.info(f"Using unified search API for folders with queryString: {query_string}")
|
||||||
|
logger.info(f"Payload: {json.dumps(payload, indent=2)}")
|
||||||
|
unified_result = await self._makeGraphApiCall(
|
||||||
|
connection["accessToken"],
|
||||||
|
"search/query",
|
||||||
|
method="POST",
|
||||||
|
data=json.dumps(payload).encode("utf-8")
|
||||||
|
)
|
||||||
|
logger.info(f"Unified search response: {json.dumps(unified_result, indent=2)}")
|
||||||
|
if "error" in unified_result:
|
||||||
|
logger.warning(f"Unified search failed for site {site_name}: {unified_result['error']}")
|
||||||
|
items = []
|
||||||
|
else:
|
||||||
|
# Flatten hits -> driveItem resources
|
||||||
|
items = []
|
||||||
|
for container in (unified_result.get("value", []) or []):
|
||||||
|
for hits_container in (container.get("hitsContainers", []) or []):
|
||||||
|
for hit in (hits_container.get("hits", []) or []):
|
||||||
|
resource = hit.get("resource")
|
||||||
|
if resource:
|
||||||
|
items.append(resource)
|
||||||
|
logger.info(f"Unified search returned {len(items)} items (pre-filter)")
|
||||||
|
|
||||||
|
# Post-filter: For multiple terms, filter results to only include folders that contain ALL terms
|
||||||
|
if len(terms) > 1:
|
||||||
|
filtered_items = []
|
||||||
|
for item in items:
|
||||||
|
folder_name = item.get("name", "").lower()
|
||||||
|
# Check if folder name contains ALL search terms
|
||||||
|
if all(term.lower() in folder_name for term in terms):
|
||||||
|
filtered_items.append(item)
|
||||||
|
items = filtered_items
|
||||||
|
logger.info(f"Post-filtered to {len(items)} items containing all terms: {terms}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error performing unified folder search: {str(e)}")
|
||||||
|
items = []
|
||||||
|
else:
|
||||||
|
# For files, use regular search API
|
||||||
|
search_query = fileQuery.replace("'", "''") # Escape single quotes for OData
|
||||||
|
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
|
||||||
|
logger.info(f"Using search API for files with query: '{search_query}'")
|
||||||
|
|
||||||
|
# Make the search API call (files)
|
||||||
|
search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
|
||||||
|
if "error" in search_result:
|
||||||
|
logger.warning(f"Search failed for site {site_name}: {search_result['error']}")
|
||||||
|
continue
|
||||||
|
# Process search results for this site (files)
|
||||||
|
items = search_result.get("value", [])
|
||||||
|
logger.info(f"Retrieved {len(items)} items from site {site_name}")
|
||||||
site_documents = []
|
site_documents = []
|
||||||
|
|
||||||
for item in items:
|
for item in items:
|
||||||
|
item_name = item.get("name", "")
|
||||||
|
item_type = "folder" if "folder" in item else "file"
|
||||||
|
item_path = item.get("parentReference", {}).get("path", "")
|
||||||
|
logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")
|
||||||
|
|
||||||
# Filter by search scope if specified
|
# Filter by search scope if specified
|
||||||
if searchScope == "documents" and "folder" in item:
|
if searchScope == "documents" and "folder" in item:
|
||||||
|
logger.debug(f"Skipping folder '{item_name}' due to documents scope")
|
||||||
continue
|
continue
|
||||||
elif searchScope == "pages" and "file" in item and not item["file"].get("mimeType", "").startswith("text/html"):
|
elif searchScope == "pages" and "file" in item and not item["file"].get("mimeType", "").startswith("text/html"):
|
||||||
|
logger.debug(f"Skipping file '{item_name}' due to pages scope")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Filter by search type (files, folders, all)
|
# Filter by search type (files, folders, all)
|
||||||
if searchType == "files" and "folder" in item:
|
if searchType == "files" and "folder" in item:
|
||||||
|
logger.debug(f"Skipping folder '{item_name}' due to files search type")
|
||||||
continue
|
continue
|
||||||
elif searchType == "folders" and "file" in item:
|
elif searchType == "folders" and "file" in item:
|
||||||
|
logger.debug(f"Skipping file '{item_name}' due to folders search type")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Enhanced post-filtering based on search options
|
# Enhanced post-filtering based on search options
|
||||||
item_name = item.get("name", "")
|
if fileQuery != "*" and fileQuery.strip() and searchType != "folders":
|
||||||
if fileQuery != "*" and fileQuery.strip():
|
# For non-folder searches, apply name filtering
|
||||||
|
# (Folder searches are already filtered by the recursive search)
|
||||||
|
search_target = item_name
|
||||||
|
|
||||||
# Apply different filtering based on search options
|
# Apply different filtering based on search options
|
||||||
if searchOptions.get("exact_match"):
|
if searchOptions.get("exact_match"):
|
||||||
# Exact phrase matching
|
# Exact phrase matching
|
||||||
if searchOptions.get("case_sensitive"):
|
if searchOptions.get("case_sensitive"):
|
||||||
if fileQuery not in item_name:
|
if fileQuery not in search_target:
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
if fileQuery.lower() not in item_name.lower():
|
if fileQuery.lower() not in search_target.lower():
|
||||||
continue
|
continue
|
||||||
elif searchOptions.get("regex_match"):
|
elif searchOptions.get("regex_match"):
|
||||||
# Regex pattern matching
|
# Regex pattern matching
|
||||||
import re
|
import re
|
||||||
flags = 0 if searchOptions.get("case_sensitive") else re.IGNORECASE
|
flags = 0 if searchOptions.get("case_sensitive") else re.IGNORECASE
|
||||||
if not re.search(fileQuery, item_name, flags):
|
if not re.search(fileQuery, search_target, flags):
|
||||||
continue
|
continue
|
||||||
elif searchOptions.get("and_terms"):
|
elif searchOptions.get("and_terms"):
|
||||||
# AND terms mode: Split by " AND " and ensure ALL terms are present
|
# AND terms mode: Split by " AND " and ensure ALL terms are present
|
||||||
search_name = item_name.lower() if not searchOptions.get("case_sensitive") else item_name
|
search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target
|
||||||
and_terms = [term.strip() for term in fileQuery.split(" AND ") if term.strip()]
|
and_terms = [term.strip() for term in fileQuery.split(" AND ") if term.strip()]
|
||||||
and_terms = [term.lower() if not searchOptions.get("case_sensitive") else term for term in and_terms]
|
and_terms = [term.lower() if not searchOptions.get("case_sensitive") else term for term in and_terms]
|
||||||
if not all(term in search_name for term in and_terms):
|
if not all(term in search_name for term in and_terms):
|
||||||
continue # Skip this item if not all AND terms match
|
continue # Skip this item if not all AND terms match
|
||||||
else:
|
else:
|
||||||
# Default: ALL search terms must be present (space-separated)
|
# Default: ALL search terms must be present (space-separated)
|
||||||
search_name = item_name.lower() if not searchOptions.get("case_sensitive") else item_name
|
search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target
|
||||||
search_terms = [term.strip().lower() if not searchOptions.get("case_sensitive") else term.strip()
|
search_terms = [term.strip().lower() if not searchOptions.get("case_sensitive") else term.strip()
|
||||||
for term in fileQuery.split() if term.strip()]
|
for term in fileQuery.split() if term.strip()]
|
||||||
if not all(term in search_name for term in search_terms):
|
if not all(term in search_name for term in search_terms):
|
||||||
continue # Skip this item if not all terms match
|
continue # Skip this item if not all terms match
|
||||||
|
|
||||||
|
# If strict folder name requested, enforce exact (case-insensitive) match on folders
|
||||||
|
if strict_folder_name:
|
||||||
|
item_is_folder = "folder" in item
|
||||||
|
item_name_ci = (item.get("name") or "").strip().lower()
|
||||||
|
if item_is_folder and item_name_ci != strict_folder_name.lower():
|
||||||
|
logger.debug(f"Skipping folder '{item_name}' - doesn't match strict name '{strict_folder_name}'")
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.debug(f"Item '{item_name}' passed all filters - adding to results")
|
||||||
|
|
||||||
# Create minimal result with only essential reference information
|
# Create minimal result with only essential reference information
|
||||||
doc_info = {
|
doc_info = {
|
||||||
"id": item.get("id"),
|
"id": item.get("id"),
|
||||||
|
|
@ -804,15 +1025,17 @@ class MethodSharepoint(MethodBase):
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
connectionReference (str): Reference to the Microsoft connection
|
connectionReference (str): Reference to the Microsoft connection
|
||||||
pathQuery (str): Path query where to upload documents (e.g., "/Documents/Project1", "*" for default location)
|
sitePath (str): REQUIRED - Specific SharePoint path where to upload documents. Must be a valid SharePoint path format:
|
||||||
|
- For direct upload: "/site:<Site Name>/<Library>/<Folder Path>" (e.g., "/site:KM XYZ/Documents/Work")
|
||||||
|
- If user provides words like "word1 word2", the system MUST call findDocumentPath first to locate the actual folder path, the result then to give to this parameter
|
||||||
documentList (str): Reference to the document list to upload
|
documentList (str): Reference to the document list to upload
|
||||||
fileNames (List[str]): List of names for the uploaded files
|
fileNames (List[str]): List of names for the uploaded files
|
||||||
resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to pathQuery)
|
resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to sitePath)
|
||||||
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
connectionReference = parameters.get("connectionReference")
|
connectionReference = parameters.get("connectionReference")
|
||||||
pathQuery = parameters.get("pathQuery", "/Documents")
|
sitePath = parameters.get("sitePath", "/Documents")
|
||||||
documentList = parameters.get("documentList")
|
documentList = parameters.get("documentList")
|
||||||
fileNames = parameters.get("fileNames")
|
fileNames = parameters.get("fileNames")
|
||||||
resultDocument = parameters.get("resultDocument")
|
resultDocument = parameters.get("resultDocument")
|
||||||
|
|
@ -847,9 +1070,9 @@ class MethodSharepoint(MethodBase):
|
||||||
folder_ids.append(doc.get("id"))
|
folder_ids.append(doc.get("id"))
|
||||||
|
|
||||||
if folder_ids:
|
if folder_ids:
|
||||||
# Use the first folder ID found as pathQuery
|
# Use the first folder ID found as sitePath
|
||||||
pathQuery = folder_ids[0]
|
sitePath = folder_ids[0]
|
||||||
logger.info(f"Using folder ID from resultDocument: {pathQuery}")
|
logger.info(f"Using folder ID from resultDocument: {sitePath}")
|
||||||
else:
|
else:
|
||||||
return ActionResult.isFailure(error="No folders found in resultDocument")
|
return ActionResult.isFailure(error="No folders found in resultDocument")
|
||||||
|
|
||||||
|
|
@ -874,9 +1097,35 @@ class MethodSharepoint(MethodBase):
|
||||||
sites = await self._discoverSharePointSites(connection["accessToken"])
|
sites = await self._discoverSharePointSites(connection["accessToken"])
|
||||||
if not sites:
|
if not sites:
|
||||||
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
||||||
|
|
||||||
# Resolve path query into upload paths
|
# Enforce site-scoped path usage when using sitePath directly (without resultDocument)
|
||||||
upload_paths = self._resolvePathQuery(pathQuery)
|
upload_site_scope = None
|
||||||
|
if not resultDocument:
|
||||||
|
if not sitePath or not sitePath.startswith('/'):
|
||||||
|
return ActionResult.isFailure(error="sitePath must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")
|
||||||
|
|
||||||
|
# Check if sitePath contains search terms (words without proper path structure)
|
||||||
|
if not sitePath.startswith('/site:') and not sitePath.startswith('/Documents') and not sitePath.startswith('/Shared Documents'):
|
||||||
|
# This looks like search terms, not a valid path
|
||||||
|
return ActionResult.isFailure(error=f"Invalid sitePath '{sitePath}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as sitePath.")
|
||||||
|
|
||||||
|
parsed = self._parse_site_scoped_path(sitePath)
|
||||||
|
if not parsed:
|
||||||
|
return ActionResult.isFailure(error="Invalid sitePath. Use /site:<Site Display Name>/<Library or Folder Path>")
|
||||||
|
# find matching site
|
||||||
|
candidate_sites = self._filter_sites_by_hint(sites, parsed["siteName"]) # substring match
|
||||||
|
# choose exact displayName match if available
|
||||||
|
exact = [s for s in candidate_sites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()]
|
||||||
|
selected_site = exact[0] if exact else (candidate_sites[0] if candidate_sites else None)
|
||||||
|
if not selected_site:
|
||||||
|
return ActionResult.isFailure(error=f"SharePoint site '{parsed['siteName']}' not found or not accessible")
|
||||||
|
upload_site_scope = selected_site
|
||||||
|
# Use the inner path portion as the actual upload target path
|
||||||
|
upload_paths = [f"/{parsed['innerPath'].lstrip('/')}"]
|
||||||
|
sites = [selected_site]
|
||||||
|
else:
|
||||||
|
# Resolve path query into upload paths (fallback behavior when using resultDocument)
|
||||||
|
upload_paths = self._resolvePathQuery(sitePath)
|
||||||
|
|
||||||
# Process each document upload
|
# Process each document upload
|
||||||
upload_results = []
|
upload_results = []
|
||||||
|
|
@ -963,7 +1212,7 @@ class MethodSharepoint(MethodBase):
|
||||||
# Create result data
|
# Create result data
|
||||||
result_data = {
|
result_data = {
|
||||||
"connectionReference": connectionReference,
|
"connectionReference": connectionReference,
|
||||||
"pathQuery": pathQuery,
|
"sitePath": sitePath,
|
||||||
"documentList": documentList,
|
"documentList": documentList,
|
||||||
"fileNames": fileNames,
|
"fileNames": fileNames,
|
||||||
"sitesAvailable": len(sites),
|
"sitesAvailable": len(sites),
|
||||||
|
|
|
||||||
|
|
@ -120,7 +120,7 @@ async def login(
|
||||||
access_type="offline",
|
access_type="offline",
|
||||||
include_granted_scopes="true",
|
include_granted_scopes="true",
|
||||||
state=state_param,
|
state=state_param,
|
||||||
prompt="select_account"
|
prompt="consent select_account"
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"Generated Google OAuth URL using OAuth2Session: {auth_url}")
|
logger.info(f"Generated Google OAuth URL using OAuth2Session: {auth_url}")
|
||||||
|
|
@ -166,6 +166,33 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
||||||
"token_type": token_data.get("token_type", "bearer"),
|
"token_type": token_data.get("token_type", "bearer"),
|
||||||
"expires_in": token_data.get("expires_in", 0)
|
"expires_in": token_data.get("expires_in", 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# If Google did not return a refresh_token, try to reuse an existing one for this user/connection
|
||||||
|
if not token_response.get("refresh_token"):
|
||||||
|
try:
|
||||||
|
rootInterface = getRootInterface()
|
||||||
|
# Prefer connection flow reuse; fallback to user access token
|
||||||
|
if connection_id:
|
||||||
|
existing_tokens = rootInterface.db.getRecordset("tokens", recordFilter={
|
||||||
|
"connectionId": connection_id,
|
||||||
|
"authority": AuthAuthority.GOOGLE
|
||||||
|
})
|
||||||
|
if existing_tokens:
|
||||||
|
# Use most recent by createdAt
|
||||||
|
existing_tokens.sort(key=lambda x: x.get("createdAt", 0), reverse=True)
|
||||||
|
token_response["refresh_token"] = existing_tokens[0].get("tokenRefresh", "")
|
||||||
|
if not token_response.get("refresh_token") and user_id:
|
||||||
|
existing_access_tokens = rootInterface.db.getRecordset("tokens", recordFilter={
|
||||||
|
"userId": user_id,
|
||||||
|
"connectionId": None,
|
||||||
|
"authority": AuthAuthority.GOOGLE
|
||||||
|
})
|
||||||
|
if existing_access_tokens:
|
||||||
|
existing_access_tokens.sort(key=lambda x: x.get("createdAt", 0), reverse=True)
|
||||||
|
token_response["refresh_token"] = existing_access_tokens[0].get("tokenRefresh", "")
|
||||||
|
except Exception:
|
||||||
|
# Non-fatal; continue without refresh token
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -98,8 +98,7 @@ class TokenManager:
|
||||||
"client_id": self.google_client_id,
|
"client_id": self.google_client_id,
|
||||||
"client_secret": self.google_client_secret,
|
"client_secret": self.google_client_secret,
|
||||||
"grant_type": "refresh_token",
|
"grant_type": "refresh_token",
|
||||||
"refresh_token": refresh_token,
|
"refresh_token": refresh_token
|
||||||
"scope": "https://www.googleapis.com/auth/gmail.readonly https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email openid"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Make refresh request
|
# Make refresh request
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue