commit
08f0e34a7c
1 changed files with 254 additions and 153 deletions
|
|
@ -503,102 +503,202 @@ class MethodSharepoint(MethodBase):
|
|||
found_documents = []
|
||||
all_sites_searched = []
|
||||
|
||||
# Use simple approach like test file - no complex filtering
|
||||
site_scoped_sites = sites
|
||||
# Handle different search approaches based on search type
|
||||
if searchType == "folders" and fileQuery and fileQuery.strip() != "" and fileQuery.strip() != "*":
|
||||
# Use unified search for folders - this is global and searches all sites
|
||||
try:
|
||||
import json
|
||||
|
||||
# Use Microsoft Graph Search API syntax (simple term search only)
|
||||
terms = [t for t in fileQuery.split() if t.strip()]
|
||||
|
||||
if len(terms) > 1:
|
||||
# Multiple terms: search for ALL terms (AND) - more specific results
|
||||
query_string = " AND ".join(terms)
|
||||
else:
|
||||
# Single term: search for the term
|
||||
query_string = terms[0] if terms else fileQuery
|
||||
logger.info(f"Using unified search for folders: {query_string}")
|
||||
|
||||
for site in site_scoped_sites:
|
||||
site_id = site["id"]
|
||||
site_name = site["displayName"]
|
||||
site_url = site["webUrl"]
|
||||
|
||||
logger.info(f"Searching in site: {site_name} ({site_url})")
|
||||
|
||||
# Use Microsoft Graph API for this specific site
|
||||
# Handle empty or wildcard queries
|
||||
if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
|
||||
# For wildcard/empty queries, list all items in the drive
|
||||
endpoint = f"sites/{site_id}/drive/root/children"
|
||||
else:
|
||||
# For specific queries, use different approaches based on search type
|
||||
if searchType == "folders":
|
||||
# Use Microsoft Graph unified search endpoint: POST /search/query
|
||||
# This approach works reliably for finding folders
|
||||
try:
|
||||
import json
|
||||
|
||||
# Use Microsoft Graph Search API syntax (simple term search only)
|
||||
terms = [t for t in fileQuery.split() if t.strip()]
|
||||
|
||||
if len(terms) > 1:
|
||||
# Multiple terms: search for ALL terms (AND) - more specific results
|
||||
query_string = " AND ".join(terms)
|
||||
else:
|
||||
# Single term: search for the term
|
||||
query_string = terms[0] if terms else fileQuery
|
||||
logger.info(f"Using search query for folders: {query_string}")
|
||||
|
||||
payload = {
|
||||
"requests": [
|
||||
{
|
||||
"entityTypes": ["driveItem"],
|
||||
"query": {"queryString": query_string},
|
||||
"from": 0,
|
||||
"size": 50
|
||||
}
|
||||
]
|
||||
payload = {
|
||||
"requests": [
|
||||
{
|
||||
"entityTypes": ["driveItem"],
|
||||
"query": {"queryString": query_string},
|
||||
"from": 0,
|
||||
"size": 50
|
||||
}
|
||||
logger.info(f"Using unified search API for folders with queryString: {query_string}")
|
||||
]
|
||||
}
|
||||
logger.info(f"Using unified search API for folders with queryString: {query_string}")
|
||||
|
||||
# Use global search endpoint (site-specific search not available)
|
||||
unified_result = await self._makeGraphApiCall(
|
||||
connection["accessToken"],
|
||||
"search/query",
|
||||
method="POST",
|
||||
data=json.dumps(payload).encode("utf-8")
|
||||
)
|
||||
|
||||
if "error" in unified_result:
|
||||
logger.warning(f"Unified search failed: {unified_result['error']}")
|
||||
items = []
|
||||
else:
|
||||
# Flatten hits -> driveItem resources
|
||||
items = []
|
||||
for container in (unified_result.get("value", []) or []):
|
||||
for hits_container in (container.get("hitsContainers", []) or []):
|
||||
for hit in (hits_container.get("hits", []) or []):
|
||||
resource = hit.get("resource")
|
||||
if resource:
|
||||
items.append(resource)
|
||||
|
||||
logger.info(f"Unified search returned {len(items)} items (pre-filter)")
|
||||
|
||||
# Apply our improved folder detection logic
|
||||
folder_items = []
|
||||
for item in items:
|
||||
resource = item
|
||||
|
||||
# Use global search endpoint (site-specific search not available)
|
||||
unified_result = await self._makeGraphApiCall(
|
||||
connection["accessToken"],
|
||||
"search/query",
|
||||
method="POST",
|
||||
data=json.dumps(payload).encode("utf-8")
|
||||
)
|
||||
|
||||
if "error" in unified_result:
|
||||
logger.warning(f"Unified search failed for site {site_name}: {unified_result['error']}")
|
||||
items = []
|
||||
# Use the same detection logic as our test
|
||||
is_folder = False
|
||||
if 'folder' in resource:
|
||||
is_folder = True
|
||||
else:
|
||||
# Flatten hits -> driveItem resources
|
||||
items = []
|
||||
for container in (unified_result.get("value", []) or []):
|
||||
for hits_container in (container.get("hitsContainers", []) or []):
|
||||
for hit in (hits_container.get("hits", []) or []):
|
||||
resource = hit.get("resource")
|
||||
if resource:
|
||||
items.append(resource)
|
||||
# Try to detect by URL pattern or other indicators
|
||||
web_url = resource.get('webUrl', '')
|
||||
name = resource.get('name', '')
|
||||
|
||||
logger.info(f"Unified search returned {len(items)} items (pre-filter)")
|
||||
|
||||
# Apply our improved folder detection logic
|
||||
folder_items = []
|
||||
for item in items:
|
||||
resource = item
|
||||
|
||||
# Use the same detection logic as our test
|
||||
is_folder = False
|
||||
if 'folder' in resource:
|
||||
is_folder = True
|
||||
else:
|
||||
# Try to detect by URL pattern or other indicators
|
||||
web_url = resource.get('webUrl', '')
|
||||
name = resource.get('name', '')
|
||||
|
||||
# Check if URL has no file extension and looks like a folder path
|
||||
if '.' not in name and ('/' in web_url or '\\' in web_url):
|
||||
is_folder = True
|
||||
|
||||
if is_folder:
|
||||
folder_items.append(item)
|
||||
|
||||
items = folder_items
|
||||
logger.info(f"Filtered to {len(items)} folders using improved detection logic")
|
||||
# Check if URL has no file extension and looks like a folder path
|
||||
if '.' not in name and ('/' in web_url or '\\' in web_url):
|
||||
is_folder = True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error performing unified folder search: {str(e)}")
|
||||
items = []
|
||||
if is_folder:
|
||||
folder_items.append(item)
|
||||
|
||||
items = folder_items
|
||||
logger.info(f"Filtered to {len(items)} folders using improved detection logic")
|
||||
|
||||
# Process unified search results - extract site information from webUrl
|
||||
for item in items:
|
||||
item_name = item.get("name", "")
|
||||
web_url = item.get("webUrl", "")
|
||||
|
||||
# Extract site information from webUrl
|
||||
site_name = "Unknown Site"
|
||||
site_id = "unknown"
|
||||
|
||||
if web_url and '/sites/' in web_url:
|
||||
try:
|
||||
# Extract site name from URL: https://pcuster.sharepoint.com/sites/SiteName/...
|
||||
url_parts = web_url.split('/sites/')
|
||||
if len(url_parts) > 1:
|
||||
site_path = url_parts[1].split('/')[0]
|
||||
# Find matching site from discovered sites
|
||||
# First try to match by site name (URL path)
|
||||
for site in sites:
|
||||
if site.get("name") == site_path:
|
||||
site_name = site.get("displayName", site_path)
|
||||
site_id = site.get("id", "unknown")
|
||||
break
|
||||
else:
|
||||
# If no match by name, try to match by displayName
|
||||
for site in sites:
|
||||
if site.get("displayName") == site_path:
|
||||
site_name = site.get("displayName", site_path)
|
||||
site_id = site.get("id", "unknown")
|
||||
break
|
||||
else:
|
||||
# If no exact match, use the site path as site name
|
||||
site_name = site_path
|
||||
# Try to find a site with similar name
|
||||
for site in sites:
|
||||
if site_path.lower() in site.get("name", "").lower() or site_path.lower() in site.get("displayName", "").lower():
|
||||
site_name = site.get("displayName", site_path)
|
||||
site_id = site.get("id", "unknown")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"Error extracting site info from URL {web_url}: {e}")
|
||||
|
||||
# Use improved folder detection logic
|
||||
is_folder = False
|
||||
if 'folder' in item:
|
||||
is_folder = True
|
||||
else:
|
||||
# Try to detect by URL pattern or other indicators
|
||||
name = item.get('name', '')
|
||||
|
||||
# Check if URL has no file extension and looks like a folder path
|
||||
if '.' not in name and ('/' in web_url or '\\' in web_url):
|
||||
is_folder = True
|
||||
|
||||
item_type = "folder" if is_folder else "file"
|
||||
item_path = item.get("parentReference", {}).get("path", "")
|
||||
logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")
|
||||
|
||||
# Simple filtering like test file - just check search type
|
||||
if searchType == "files" and is_folder:
|
||||
continue # Skip folders when searching for files
|
||||
elif searchType == "folders" and not is_folder:
|
||||
continue # Skip files when searching for folders
|
||||
|
||||
# Simple approach like test file - no complex filtering
|
||||
logger.debug(f"Item '{item_name}' found - adding to results")
|
||||
|
||||
# Create result with full path information for proper action chaining
|
||||
parent_path = item.get("parentReference", {}).get("path", "")
|
||||
|
||||
# Extract the full SharePoint path from webUrl or parentReference
|
||||
full_path = ""
|
||||
if web_url:
|
||||
# Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
|
||||
if '/sites/' in web_url:
|
||||
path_part = web_url.split('/sites/')[1]
|
||||
# Decode URL encoding and convert to backslash format
|
||||
import urllib.parse
|
||||
decoded_path = urllib.parse.unquote(path_part)
|
||||
full_path = "\\" + decoded_path.replace('/', '\\')
|
||||
elif parent_path:
|
||||
# Use parentReference path if available
|
||||
full_path = parent_path.replace('/', '\\')
|
||||
|
||||
doc_info = {
|
||||
"id": item.get("id"),
|
||||
"name": item.get("name"),
|
||||
"type": "folder" if is_folder else "file",
|
||||
"siteName": site_name,
|
||||
"siteId": site_id,
|
||||
"webUrl": web_url,
|
||||
"fullPath": full_path,
|
||||
"parentPath": parent_path
|
||||
}
|
||||
|
||||
found_documents.append(doc_info)
|
||||
|
||||
logger.info(f"Found {len(found_documents)} documents from unified search")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error performing unified folder search: {str(e)}")
|
||||
# Fallback to site-by-site search
|
||||
pass
|
||||
|
||||
# If no unified search was performed or it failed, fall back to site-by-site search
|
||||
if not found_documents:
|
||||
# Use simple approach like test file - no complex filtering
|
||||
site_scoped_sites = sites
|
||||
|
||||
for site in site_scoped_sites:
|
||||
site_id = site["id"]
|
||||
site_name = site["displayName"]
|
||||
site_url = site["webUrl"]
|
||||
|
||||
logger.info(f"Searching in site: {site_name} ({site_url})")
|
||||
|
||||
# Use Microsoft Graph API for this specific site
|
||||
# Handle empty or wildcard queries
|
||||
if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
|
||||
# For wildcard/empty queries, list all items in the drive
|
||||
endpoint = f"sites/{site_id}/drive/root/children"
|
||||
else:
|
||||
# For files, use regular search API
|
||||
search_query = fileQuery.replace("'", "''") # Escape single quotes for OData
|
||||
|
|
@ -613,77 +713,78 @@ class MethodSharepoint(MethodBase):
|
|||
# Process search results for this site (files)
|
||||
items = search_result.get("value", [])
|
||||
logger.info(f"Retrieved {len(items)} items from site {site_name}")
|
||||
site_documents = []
|
||||
|
||||
for item in items:
|
||||
item_name = item.get("name", "")
|
||||
|
||||
# Use improved folder detection logic
|
||||
is_folder = False
|
||||
if 'folder' in item:
|
||||
is_folder = True
|
||||
else:
|
||||
# Try to detect by URL pattern or other indicators
|
||||
web_url = item.get('webUrl', '')
|
||||
name = item.get('name', '')
|
||||
site_documents = []
|
||||
|
||||
for item in items:
|
||||
item_name = item.get("name", "")
|
||||
|
||||
# Check if URL has no file extension and looks like a folder path
|
||||
if '.' not in name and ('/' in web_url or '\\' in web_url):
|
||||
# Use improved folder detection logic
|
||||
is_folder = False
|
||||
if 'folder' in item:
|
||||
is_folder = True
|
||||
|
||||
item_type = "folder" if is_folder else "file"
|
||||
item_path = item.get("parentReference", {}).get("path", "")
|
||||
logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")
|
||||
|
||||
# Simple filtering like test file - just check search type
|
||||
if searchType == "files" and is_folder:
|
||||
continue # Skip folders when searching for files
|
||||
elif searchType == "folders" and not is_folder:
|
||||
continue # Skip files when searching for folders
|
||||
|
||||
# Simple approach like test file - no complex filtering
|
||||
logger.debug(f"Item '{item_name}' found - adding to results")
|
||||
else:
|
||||
# Try to detect by URL pattern or other indicators
|
||||
web_url = item.get('webUrl', '')
|
||||
name = item.get('name', '')
|
||||
|
||||
# Check if URL has no file extension and looks like a folder path
|
||||
if '.' not in name and ('/' in web_url or '\\' in web_url):
|
||||
is_folder = True
|
||||
|
||||
item_type = "folder" if is_folder else "file"
|
||||
item_path = item.get("parentReference", {}).get("path", "")
|
||||
logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")
|
||||
|
||||
# Simple filtering like test file - just check search type
|
||||
if searchType == "files" and is_folder:
|
||||
continue # Skip folders when searching for files
|
||||
elif searchType == "folders" and not is_folder:
|
||||
continue # Skip files when searching for folders
|
||||
|
||||
# Simple approach like test file - no complex filtering
|
||||
logger.debug(f"Item '{item_name}' found - adding to results")
|
||||
|
||||
# Create result with full path information for proper action chaining
|
||||
web_url = item.get("webUrl", "")
|
||||
parent_path = item.get("parentReference", {}).get("path", "")
|
||||
# Create result with full path information for proper action chaining
|
||||
web_url = item.get("webUrl", "")
|
||||
parent_path = item.get("parentReference", {}).get("path", "")
|
||||
|
||||
# Extract the full SharePoint path from webUrl or parentReference
|
||||
full_path = ""
|
||||
if web_url:
|
||||
# Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
|
||||
if '/sites/' in web_url:
|
||||
path_part = web_url.split('/sites/')[1]
|
||||
# Decode URL encoding and convert to backslash format
|
||||
import urllib.parse
|
||||
decoded_path = urllib.parse.unquote(path_part)
|
||||
full_path = "\\" + decoded_path.replace('/', '\\')
|
||||
elif parent_path:
|
||||
# Use parentReference path if available
|
||||
full_path = parent_path.replace('/', '\\')
|
||||
|
||||
doc_info = {
|
||||
"id": item.get("id"),
|
||||
"name": item.get("name"),
|
||||
"type": "folder" if is_folder else "file",
|
||||
"siteName": site_name,
|
||||
"siteId": site_id,
|
||||
"webUrl": web_url,
|
||||
"fullPath": full_path,
|
||||
"parentPath": parent_path
|
||||
}
|
||||
|
||||
site_documents.append(doc_info)
|
||||
|
||||
# Extract the full SharePoint path from webUrl or parentReference
|
||||
full_path = ""
|
||||
if web_url:
|
||||
# Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
|
||||
if '/sites/' in web_url:
|
||||
path_part = web_url.split('/sites/')[1]
|
||||
# Decode URL encoding and convert to backslash format
|
||||
import urllib.parse
|
||||
decoded_path = urllib.parse.unquote(path_part)
|
||||
full_path = "\\" + decoded_path.replace('/', '\\')
|
||||
elif parent_path:
|
||||
# Use parentReference path if available
|
||||
full_path = parent_path.replace('/', '\\')
|
||||
|
||||
doc_info = {
|
||||
"id": item.get("id"),
|
||||
"name": item.get("name"),
|
||||
"type": "folder" if is_folder else "file",
|
||||
found_documents.extend(site_documents)
|
||||
all_sites_searched.append({
|
||||
"siteName": site_name,
|
||||
"siteUrl": site_url,
|
||||
"siteId": site_id,
|
||||
"webUrl": web_url,
|
||||
"fullPath": full_path,
|
||||
"parentPath": parent_path
|
||||
}
|
||||
"documentsFound": len(site_documents)
|
||||
})
|
||||
|
||||
site_documents.append(doc_info)
|
||||
|
||||
found_documents.extend(site_documents)
|
||||
all_sites_searched.append({
|
||||
"siteName": site_name,
|
||||
"siteUrl": site_url,
|
||||
"siteId": site_id,
|
||||
"documentsFound": len(site_documents)
|
||||
})
|
||||
|
||||
logger.info(f"Found {len(site_documents)} documents in site {site_name}")
|
||||
logger.info(f"Found {len(site_documents)} documents in site {site_name}")
|
||||
|
||||
# Limit total results to maxResults
|
||||
if len(found_documents) > maxResults:
|
||||
|
|
|
|||
Loading…
Reference in a new issue