478 lines
27 KiB
Python
478 lines
27 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
|
|
import logging
|
|
import time
|
|
import json
|
|
import urllib.parse
|
|
from typing import Dict, Any
|
|
from modules.aichat.datamodelFeatureAiChat import ActionResult, ActionDocument
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
operationId = None
|
|
try:
|
|
# Init progress logger
|
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
operationId = f"sharepoint_find_{workflowId}_{int(time.time())}"
|
|
|
|
# Start progress tracking
|
|
parentOperationId = parameters.get('parentOperationId')
|
|
self.services.chat.progressLogStart(
|
|
operationId,
|
|
"Find Document Path",
|
|
"SharePoint Search",
|
|
f"Query: {parameters.get('searchQuery', '*')}",
|
|
parentOperationId=parentOperationId
|
|
)
|
|
|
|
connectionReference = parameters.get("connectionReference")
|
|
site = parameters.get("site")
|
|
searchQuery = parameters.get("searchQuery", "*")
|
|
maxResults = parameters.get("maxResults", 1000)
|
|
|
|
if not connectionReference:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="Connection reference is required")
|
|
|
|
# Parse searchQuery to extract path, search terms, search type, and options
|
|
pathQuery, fileQuery, searchType, searchOptions = self.pathProcessing.parseSearchQuery(searchQuery)
|
|
logger.debug(f"Parsed searchQuery '{searchQuery}' -> pathQuery='{pathQuery}', fileQuery='{fileQuery}', searchType='{searchType}'")
|
|
|
|
self.services.chat.progressLogUpdate(operationId, 0.2, "Getting Microsoft connection")
|
|
connection = self.connection.getMicrosoftConnection(connectionReference)
|
|
if not connection:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
|
|
|
|
# Extract site name from pathQuery if it contains Microsoft-standard path (/sites/SiteName/...)
|
|
siteFromPath = None
|
|
directSite = None
|
|
if pathQuery and pathQuery.startswith('/sites/'):
|
|
parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQuery)
|
|
if parsedPath:
|
|
siteFromPath = parsedPath.get("siteName")
|
|
logger.info(f"Extracted site from Microsoft-standard pathQuery '{pathQuery}': '{siteFromPath}'")
|
|
|
|
# Try to get site directly by path (optimization - no need to load all 60 sites)
|
|
directSite = await self.siteDiscovery.getSiteByStandardPath(siteFromPath)
|
|
if directSite:
|
|
logger.info(f"Got site directly by standard path - no need to discover all sites")
|
|
sites = [directSite]
|
|
else:
|
|
logger.warning(f"Could not get site directly, falling back to site discovery")
|
|
directSite = None
|
|
else:
|
|
logger.warning(f"Failed to parse site from standard pathQuery '{pathQuery}'")
|
|
|
|
# If we didn't get the site directly, use discovery and filtering
|
|
if not directSite:
|
|
# Determine which site hint to use (priority: site parameter > site from pathQuery > site_hint from searchOptions)
|
|
siteHintToUse = site or siteFromPath or searchOptions.get("site_hint")
|
|
|
|
# Discover SharePoint sites - use targeted approach when site hint is available
|
|
self.services.chat.progressLogUpdate(operationId, 0.3, "Discovering SharePoint sites")
|
|
if siteHintToUse:
|
|
# When site hint is available, discover all sites first, then filter
|
|
allSites = await self.siteDiscovery.discoverSharePointSites()
|
|
if not allSites:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
|
|
|
sites = self.siteDiscovery.filterSitesByHint(allSites, siteHintToUse)
|
|
logger.info(f"Filtered sites by site hint '{siteHintToUse}' -> {len(sites)} sites")
|
|
if not sites:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error=f"No SharePoint sites found matching '{siteHintToUse}'")
|
|
else:
|
|
# No site hint - discover all sites
|
|
sites = await self.siteDiscovery.discoverSharePointSites()
|
|
if not sites:
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error="No SharePoint sites found or accessible")
|
|
|
|
# Resolve path query into search paths
|
|
searchPaths = self.pathProcessing.resolvePathQuery(pathQuery)
|
|
|
|
self.services.chat.progressLogUpdate(operationId, 0.5, f"Searching across {len(sites)} site(s)")
|
|
|
|
try:
|
|
# Search across all discovered sites
|
|
foundDocuments = []
|
|
allSitesSearched = []
|
|
|
|
# Handle different search approaches based on search type
|
|
if searchType == "folders" and fileQuery and fileQuery.strip() != "" and fileQuery.strip() != "*":
|
|
# Use unified search for folders - this is global and searches all sites
|
|
try:
|
|
|
|
# Use Microsoft Graph Search API syntax (simple term search only)
|
|
terms = [t for t in fileQuery.split() if t.strip()]
|
|
|
|
if len(terms) > 1:
|
|
# Multiple terms: search for ALL terms (AND) - more specific results
|
|
queryString = " AND ".join(terms)
|
|
else:
|
|
# Single term: search for the term
|
|
queryString = terms[0] if terms else fileQuery
|
|
logger.info(f"Using unified search for folders: {queryString}")
|
|
|
|
payload = {
|
|
"requests": [
|
|
{
|
|
"entityTypes": ["driveItem"],
|
|
"query": {"queryString": queryString},
|
|
"from": 0,
|
|
"size": 50
|
|
}
|
|
]
|
|
}
|
|
logger.info(f"Using unified search API for folders with queryString: {queryString}")
|
|
|
|
# Use global search endpoint (site-specific search not available)
|
|
unifiedResult = await self.apiClient.makeGraphApiCall(
|
|
"search/query",
|
|
method="POST",
|
|
data=json.dumps(payload).encode("utf-8")
|
|
)
|
|
|
|
if "error" in unifiedResult:
|
|
logger.warning(f"Unified search failed: {unifiedResult['error']}")
|
|
items = []
|
|
else:
|
|
# Flatten hits -> driveItem resources
|
|
items = []
|
|
for container in (unifiedResult.get("value", []) or []):
|
|
for hitsContainer in (container.get("hitsContainers", []) or []):
|
|
for hit in (hitsContainer.get("hits", []) or []):
|
|
resource = hit.get("resource")
|
|
if resource:
|
|
items.append(resource)
|
|
|
|
logger.info(f"Unified search returned {len(items)} items (pre-filter)")
|
|
|
|
# Apply our improved folder detection logic
|
|
folderItems = []
|
|
for item in items:
|
|
resource = item
|
|
|
|
# Use the same detection logic as our test
|
|
isFolder = self.services.sharepoint.detectFolderType(resource)
|
|
|
|
if isFolder:
|
|
folderItems.append(item)
|
|
|
|
items = folderItems
|
|
logger.info(f"Filtered to {len(items)} folders using improved detection logic")
|
|
|
|
# Process unified search results - extract site information from webUrl
|
|
for item in items:
|
|
itemName = item.get("name", "")
|
|
webUrl = item.get("webUrl", "")
|
|
|
|
# Extract site information from webUrl
|
|
siteName = "Unknown Site"
|
|
siteId = "unknown"
|
|
|
|
if webUrl and '/sites/' in webUrl:
|
|
try:
|
|
# Extract site name from URL: https://pcuster.sharepoint.com/sites/SiteName/...
|
|
urlParts = webUrl.split('/sites/')
|
|
if len(urlParts) > 1:
|
|
sitePath = urlParts[1].split('/')[0]
|
|
# Find matching site from discovered sites
|
|
# First try to match by site name (URL path)
|
|
for site in sites:
|
|
if site.get("name") == sitePath:
|
|
siteName = site.get("displayName", sitePath)
|
|
siteId = site.get("id", "unknown")
|
|
break
|
|
else:
|
|
# If no match by name, try to match by displayName
|
|
for site in sites:
|
|
if site.get("displayName") == sitePath:
|
|
siteName = site.get("displayName", sitePath)
|
|
siteId = site.get("id", "unknown")
|
|
break
|
|
else:
|
|
# If no exact match, use the site path as site name
|
|
siteName = sitePath
|
|
# Try to find a site with similar name
|
|
for site in sites:
|
|
if sitePath.lower() in site.get("name", "").lower() or sitePath.lower() in site.get("displayName", "").lower():
|
|
siteName = site.get("displayName", sitePath)
|
|
siteId = site.get("id", "unknown")
|
|
break
|
|
except Exception as e:
|
|
logger.warning(f"Error extracting site info from URL {webUrl}: {e}")
|
|
|
|
# Use improved folder detection logic
|
|
isFolder = self.services.sharepoint.detectFolderType(item)
|
|
itemType = "folder" if isFolder else "file"
|
|
itemPath = item.get("parentReference", {}).get("path", "")
|
|
logger.debug(f"Processing {itemType}: '{itemName}' at path: '{itemPath}'")
|
|
|
|
# Simple filtering like test file - just check search type
|
|
if searchType == "files" and isFolder:
|
|
continue # Skip folders when searching for files
|
|
elif searchType == "folders" and not isFolder:
|
|
continue # Skip files when searching for folders
|
|
|
|
# Simple approach like test file - no complex filtering
|
|
logger.debug(f"Item '{itemName}' found - adding to results")
|
|
|
|
# Create result with full path information for proper action chaining
|
|
parentPath = item.get("parentReference", {}).get("path", "")
|
|
|
|
# Extract the full SharePoint path from webUrl or parentReference
|
|
fullPath = ""
|
|
if webUrl:
|
|
# Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
|
|
if '/sites/' in webUrl:
|
|
pathPart = webUrl.split('/sites/')[1]
|
|
# Decode URL encoding and convert to backslash format
|
|
decodedPath = urllib.parse.unquote(pathPart)
|
|
fullPath = "\\" + decodedPath.replace('/', '\\')
|
|
elif parentPath:
|
|
# Use parentReference path if available
|
|
fullPath = parentPath.replace('/', '\\')
|
|
|
|
docInfo = {
|
|
"id": item.get("id"),
|
|
"name": item.get("name"),
|
|
"type": "folder" if isFolder else "file",
|
|
"siteName": siteName,
|
|
"siteId": siteId,
|
|
"webUrl": webUrl,
|
|
"fullPath": fullPath,
|
|
"parentPath": parentPath
|
|
}
|
|
|
|
foundDocuments.append(docInfo)
|
|
|
|
logger.info(f"Found {len(foundDocuments)} documents from unified search")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error performing unified folder search: {str(e)}")
|
|
# Fallback to site-by-site search
|
|
pass
|
|
|
|
# If no unified search was performed or it failed, fall back to site-by-site search
|
|
if not foundDocuments:
|
|
# Use simple approach like test file - no complex filtering
|
|
siteScopedSites = sites
|
|
|
|
for site in siteScopedSites:
|
|
siteId = site["id"]
|
|
siteName = site["displayName"]
|
|
siteUrl = site["webUrl"]
|
|
|
|
logger.info(f"Searching in site: {siteName} ({siteUrl})")
|
|
|
|
# Check if pathQuery contains a specific folder path (not just /sites/SiteName)
|
|
folderPath = None
|
|
if pathQuery and pathQuery.startswith('/sites/'):
|
|
parsedPath = self.siteDiscovery.extractSiteFromStandardPath(pathQuery)
|
|
if parsedPath:
|
|
innerPath = parsedPath.get("innerPath", "")
|
|
if innerPath and innerPath.strip():
|
|
# Remove leading slash if present
|
|
folderPath = innerPath.lstrip('/')
|
|
|
|
# Generic approach: Try to find the folder, if it fails, remove first segment
|
|
# This works for all languages because we test the actual API response
|
|
# In SharePoint Graph API, /drive/root already points to the default document library,
|
|
# so library names in paths should be removed
|
|
pathSegments = [s for s in folderPath.split('/') if s.strip()]
|
|
if len(pathSegments) > 1:
|
|
# Try with first segment removed (first segment is likely the document library)
|
|
testPath = '/'.join(pathSegments[1:])
|
|
# Quick test: try to get folder info (this is fast and doesn't require full search)
|
|
testEndpoint = f"sites/{siteId}/drive/root:/{urllib.parse.quote(testPath, safe='')}:"
|
|
testResult = await self.apiClient.makeGraphApiCall(testEndpoint)
|
|
if testResult and "error" not in testResult:
|
|
# Path without first segment works - first segment was likely the document library
|
|
folderPath = testPath
|
|
logger.info(f"Removed document library name '{pathSegments[0]}' from folder path (tested via API)")
|
|
else:
|
|
# Keep original path - first segment is not a document library
|
|
logger.info(f"Keeping original folder path '{folderPath}' (first segment is not a document library)")
|
|
elif len(pathSegments) == 1:
|
|
# Only one segment - likely the document library itself, use root
|
|
folderPath = None
|
|
logger.info(f"Only one segment '{pathSegments[0]}' found, likely document library - using root")
|
|
|
|
if folderPath:
|
|
logger.info(f"Extracted folder path from pathQuery: '{folderPath}'")
|
|
else:
|
|
logger.info(f"Folder path resolved to root (only document library in path)")
|
|
|
|
# Use Microsoft Graph API for this specific site
|
|
# Handle empty or wildcard queries
|
|
if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
|
|
# For wildcard/empty queries, list all items
|
|
if folderPath:
|
|
# List items in specific folder
|
|
encodedPath = urllib.parse.quote(folderPath, safe='')
|
|
endpoint = f"sites/{siteId}/drive/root:/{encodedPath}:/children"
|
|
logger.info(f"Listing items in folder: '{folderPath}'")
|
|
else:
|
|
# List all items in the drive root
|
|
endpoint = f"sites/{siteId}/drive/root/children"
|
|
|
|
# Make the API call to list items
|
|
listResult = await self.apiClient.makeGraphApiCall(endpoint)
|
|
if "error" in listResult:
|
|
logger.warning(f"List failed for site {siteName}: {listResult['error']}")
|
|
continue
|
|
# Process list results for this site
|
|
items = listResult.get("value", [])
|
|
logger.info(f"Retrieved {len(items)} items from site {siteName}")
|
|
else:
|
|
# For files, use regular search API
|
|
# Clean the query: remove path-like syntax and invalid KQL syntax
|
|
searchQueryCleaned = self.pathProcessing.cleanSearchQuery(fileQuery)
|
|
# URL-encode the query parameter
|
|
encodedQuery = urllib.parse.quote(searchQueryCleaned, safe='')
|
|
|
|
if folderPath:
|
|
# Search in specific folder
|
|
encodedPath = urllib.parse.quote(folderPath, safe='')
|
|
endpoint = f"sites/{siteId}/drive/root:/{encodedPath}:/search(q='{encodedQuery}')"
|
|
logger.info(f"Searching in folder '{folderPath}' with query: '{searchQueryCleaned}' (encoded: '{encodedQuery}')")
|
|
else:
|
|
# Search in drive root
|
|
endpoint = f"sites/{siteId}/drive/root/search(q='{encodedQuery}')"
|
|
logger.info(f"Using search API for files with query: '{searchQueryCleaned}' (encoded: '{encodedQuery}')")
|
|
|
|
# Make the search API call (files)
|
|
searchResult = await self.apiClient.makeGraphApiCall(endpoint)
|
|
if "error" in searchResult:
|
|
logger.warning(f"Search failed for site {siteName}: {searchResult['error']}")
|
|
continue
|
|
# Process search results for this site (files)
|
|
items = searchResult.get("value", [])
|
|
logger.info(f"Retrieved {len(items)} items from site {siteName}")
|
|
|
|
siteDocuments = []
|
|
|
|
for item in items:
|
|
itemName = item.get("name", "")
|
|
|
|
# Use improved folder detection logic
|
|
isFolder = self.services.sharepoint.detectFolderType(item)
|
|
|
|
itemType = "folder" if isFolder else "file"
|
|
itemPath = item.get("parentReference", {}).get("path", "")
|
|
logger.debug(f"Processing {itemType}: '{itemName}' at path: '{itemPath}'")
|
|
|
|
# Simple filtering like test file - just check search type
|
|
if searchType == "files" and isFolder:
|
|
continue # Skip folders when searching for files
|
|
elif searchType == "folders" and not isFolder:
|
|
continue # Skip files when searching for folders
|
|
|
|
# Simple approach like test file - no complex filtering
|
|
logger.debug(f"Item '{itemName}' found - adding to results")
|
|
|
|
# Create result with full path information for proper action chaining
|
|
webUrl = item.get("webUrl", "")
|
|
parentPath = item.get("parentReference", {}).get("path", "")
|
|
|
|
# Extract the full SharePoint path from webUrl or parentReference
|
|
fullPath = ""
|
|
if webUrl:
|
|
# Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
|
|
if '/sites/' in webUrl:
|
|
pathPart = webUrl.split('/sites/')[1]
|
|
# Decode URL encoding and convert to backslash format
|
|
decodedPath = urllib.parse.unquote(pathPart)
|
|
fullPath = "\\" + decodedPath.replace('/', '\\')
|
|
elif parentPath:
|
|
# Use parentReference path if available
|
|
fullPath = parentPath.replace('/', '\\')
|
|
|
|
docInfo = {
|
|
"id": item.get("id"),
|
|
"name": item.get("name"),
|
|
"type": "folder" if isFolder else "file",
|
|
"siteName": siteName,
|
|
"siteId": siteId,
|
|
"webUrl": webUrl,
|
|
"fullPath": fullPath,
|
|
"parentPath": parentPath
|
|
}
|
|
|
|
siteDocuments.append(docInfo)
|
|
|
|
foundDocuments.extend(siteDocuments)
|
|
allSitesSearched.append({
|
|
"siteName": siteName,
|
|
"siteUrl": siteUrl,
|
|
"siteId": siteId,
|
|
"documentsFound": len(siteDocuments)
|
|
})
|
|
|
|
logger.info(f"Found {len(siteDocuments)} documents in site {siteName}")
|
|
|
|
# Limit total results to maxResults
|
|
if len(foundDocuments) > maxResults:
|
|
foundDocuments = foundDocuments[:maxResults]
|
|
logger.info(f"Limited results to {maxResults} items")
|
|
|
|
self.services.chat.progressLogUpdate(operationId, 0.9, f"Found {len(foundDocuments)} document(s)")
|
|
|
|
resultData = {
|
|
"searchQuery": searchQuery,
|
|
"totalResults": len(foundDocuments),
|
|
"maxResults": maxResults,
|
|
"foundDocuments": foundDocuments,
|
|
"timestamp": self.services.utils.timestampGetUtc()
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error searching SharePoint: {str(e)}")
|
|
if operationId:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
return ActionResult.isFailure(error=str(e))
|
|
|
|
# Use default JSON format for output
|
|
outputExtension = ".json" # Default
|
|
outputMimeType = "application/json" # Default
|
|
|
|
validationMetadata = {
|
|
"actionType": "sharepoint.findDocumentPath",
|
|
"searchQuery": searchQuery,
|
|
"maxResults": maxResults,
|
|
"totalResults": len(foundDocuments),
|
|
"hasResults": len(foundDocuments) > 0
|
|
}
|
|
|
|
self.services.chat.progressLogFinish(operationId, True)
|
|
return ActionResult(
|
|
success=True,
|
|
documents=[
|
|
ActionDocument(
|
|
documentName=self._generateMeaningfulFileName("sharepoint_find_path", "json", None, "findDocumentPath"),
|
|
documentData=json.dumps(resultData, indent=2),
|
|
mimeType=outputMimeType,
|
|
validationMetadata=validationMetadata
|
|
)
|
|
]
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error finding document path: {str(e)}")
|
|
if operationId:
|
|
try:
|
|
self.services.chat.progressLogFinish(operationId, False)
|
|
except:
|
|
pass
|
|
return ActionResult.isFailure(error=str(e))
|
|
|