From 236a85a99bdbe8efa907e1b4e06c279dc6832601 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Tue, 4 Nov 2025 23:59:29 +0100
Subject: [PATCH] Fixed calling chain sharepoint
---
.../20251029-223021-api_sent_message_0.txt | 72 -
.../20251029-223033-api_received_response.txt | 200 --
modules/interfaces/interfaceDbChatObjects.py | 8 +-
modules/workflows/methods/methodSharepoint.py | 1902 ++++++++++-------
.../workflows/processing/core/taskPlanner.py | 29 +-
.../processing/modes/modeAutomation.py | 81 +-
modules/workflows/workflowManager.py | 205 +-
7 files changed, 1332 insertions(+), 1165 deletions(-)
delete mode 100644 local/logs/debug/prompts/20251029-223021-api_sent_message_0.txt
delete mode 100644 local/logs/debug/prompts/20251029-223033-api_received_response.txt
diff --git a/local/logs/debug/prompts/20251029-223021-api_sent_message_0.txt b/local/logs/debug/prompts/20251029-223021-api_sent_message_0.txt
deleted file mode 100644
index 7a71526d..00000000
--- a/local/logs/debug/prompts/20251029-223021-api_sent_message_0.txt
+++ /dev/null
@@ -1,72 +0,0 @@
-Message 0 (user)
-Length: 2015 chars
-================================================================================
-User request: "Generate the first 1000 prime numbers."
-
-Generate a NEW, COMPLETE JSON response. The template below shows ONLY the structure pattern - it is NOT existing content. Start from the beginning.
-
-JSON structure template (reference only - shows the pattern):
-{
- "metadata": {
- "split_strategy": "single_document",
- "source_documents": [],
- "extraction_method": "ai_generation"
- },
- "documents": [
- {
- "id": "doc_1",
- "title": "Generated Document",
- "filename": "document.json",
- "sections": [
- {
- "id": "section_heading_example",
- "content_type": "heading",
- "elements": [
- {"level": 1, "text": "Heading Text"}
- ],
- "order": 0
- },
- {
- "id": "section_paragraph_example",
- "content_type": "paragraph",
- "elements": [
- {"text": "Paragraph text content"}
- ],
- "order": 0
- },
- {
- "id": "section_list_example",
- "content_type": "list",
- "elements": [
- {
- "items": [
- {"text": "Item 1"},
- {"text": "Item 2"}
- ],
- "list_type": "numbered"
- }
- ],
- "order": 0
- },
- {
- "id": "section_table_example",
- "content_type": "table",
- "elements": [
- {
- "headers": ["Column 1", "Column 2"],
- "rows": [
- ["Row 1 Col 1", "Row 1 Col 2"],
- ["Row 2 Col 1", "Row 2 Col 2"]
- ],
- "caption": "Table caption"
- }
- ],
- "order": 0
- },
- {
- "id": "section_code_example",
- "content_type": "code",
- "elements": [
- {
- "code... [truncated]
-================================================================================
diff --git a/local/logs/debug/prompts/20251029-223033-api_received_response.txt b/local/logs/debug/prompts/20251029-223033-api_received_response.txt
deleted file mode 100644
index 31f8964c..00000000
--- a/local/logs/debug/prompts/20251029-223033-api_received_response.txt
+++ /dev/null
@@ -1,200 +0,0 @@
-API Response
-Length: 6082 chars
-================================================================================
-{
- "metadata": {
- "split_strategy": "single_document",
- "source_documents": [],
- "extraction_method": "ai_generation"
- },
- "documents": [
- {
- "id": "doc_1",
- "title": "List of First 1000 Prime Numbers",
- "filename": "prime_numbers.json",
- "sections": [
- {
- "id": "section_prime_numbers",
- "content_type": "list",
- "elements": [
- {
- "items": [
- {"number": 2},
- {"number": 3},
- {"number": 5},
- {"number": 7},
- {"number": 11},
- {"number": 13},
- {"number": 17},
- {"number": 19},
- {"number": 23},
- {"number": 29},
- {"number": 31},
- {"number": 37},
- {"number": 41},
- {"number": 43},
- {"number": 47},
- {"number": 53},
- {"number": 59},
- {"number": 61},
- {"number": 67},
- {"number": 71},
- {"number": 73},
- {"number": 79},
- {"number": 83},
- {"number": 89},
- {"number": 97},
- {"number": 101},
- {"number": 103},
- {"number": 107},
- {"number": 109},
- {"number": 113},
- {"number": 127},
- {"number": 131},
- {"number": 137},
- {"number": 139},
- {"number": 149},
- {"number": 151},
- {"number": 157},
- {"number": 163},
- {"number": 167},
- {"number": 173},
- {"number": 179},
- {"number": 181},
- {"number": 191},
- {"number": 193},
- {"number": 197},
- {"number": 199},
- {"number": 211},
- {"number": 223},
- {"number": 227},
- {"number": 229},
- {"number": 233},
- {"number": 239},
- {"number": 241},
- {"number": 251},
- {"number": 257},
- {"number": 263},
- {"number": 269},
- {"number": 271},
- {"number": 277},
- {"number": 281},
- {"number": 283},
- {"number": 293},
- {"number": 307},
- {"number": 311},
- {"number": 313},
- {"number": 317},
- {"number": 331},
- {"number": 337},
- {"number": 347},
- {"number": 349},
- {"number": 353},
- {"number": 359},
- {"number": 367},
- {"number": 373},
- {"number": 379},
- {"number": 383},
- {"number": 389},
- {"number": 397},
- {"number": 401},
- {"number": 409},
- {"number": 419},
- {"number": 421},
- {"number": 431},
- {"number": 433},
- {"number": 439},
- {"number": 443},
- {"number": 449},
- {"number": 457},
- {"number": 461},
- {"number": 463},
- {"number": 467},
- {"number": 479},
- {"number": 487},
- {"number": 491},
- {"number": 499},
- {"number": 503},
- {"number": 509},
- {"number": 521},
- {"number": 523},
- {"number": 541},
- {"number": 547},
- {"number": 557},
- {"number": 563},
- {"number": 569},
- {"number": 571},
- {"number": 577},
- {"number": 587},
- {"number": 593},
- {"number": 599},
- {"number": 601},
- {"number": 607},
- {"number": 613},
- {"number": 617},
- {"number": 619},
- {"number": 631},
- {"number": 641},
- {"number": 643},
- {"number": 647},
- {"number": 653},
- {"number": 659},
- {"number": 661},
- {"number": 673},
- {"number": 677},
- {"number": 683},
- {"number": 691},
- {"number": 701},
- {"number": 709},
- {"number": 719},
- {"number": 727},
- {"number": 733},
- {"number": 739},
- {"number": 743},
- {"number": 751},
- {"number": 757},
- {"number": 761},
- {"number": 769},
- {"number": 773},
- {"number": 787},
- {"number": 797},
- {"number": 809},
- {"number": 811},
- {"number": 821},
- {"number": 823},
- {"number": 827},
- {"number": 829},
- {"number": 839},
- {"number": 853},
- {"number": 857},
- {"number": 859},
- {"number": 863},
- {"number": 877},
- {"number": 881},
- {"number": 883},
- {"number": 887},
- {"number": 907},
- {"number": 911},
- {"number": 919},
- {"number": 929},
- {"number": 937},
- {"number": 941},
- {"number": 947},
- {"number": 953},
- {"number": 967},
- {"number": 971},
- {"number": 977},
- {"number": 983},
- {"number": 991},
- {"number": 997}
- ],
- "list_type": "numbered"
- }
- ],
- "order": 0
- }
- ]
- }
- ]
-}
-================================================================================
diff --git a/modules/interfaces/interfaceDbChatObjects.py b/modules/interfaces/interfaceDbChatObjects.py
index 35ed2f32..94cbe974 100644
--- a/modules/interfaces/interfaceDbChatObjects.py
+++ b/modules/interfaces/interfaceDbChatObjects.py
@@ -1573,13 +1573,7 @@ class ChatObjects:
executionLog["workflowId"] = workflow.id
executionLog["status"] = "completed"
executionLog["messages"].append(f"Workflow {workflow.id} started successfully")
-
- # Also store plan in module-level cache as backup (keyed by workflow ID)
- from modules.workflows.processing.modes import modeAutomation
- if not hasattr(modeAutomation, '_templatePlanCache'):
- modeAutomation._templatePlanCache = {}
- modeAutomation._templatePlanCache[workflow.id] = plan
- logger.info(f"Stored template plan for workflow {workflow.id} (cache + prompt) with {len(plan.get('tasks', []))} tasks")
+ logger.info(f"Started workflow {workflow.id} with plan containing {len(plan.get('tasks', []))} tasks (plan embedded in userInput)")
# Update automation with execution log
executionLogs = automation.get("executionLogs", [])
diff --git a/modules/workflows/methods/methodSharepoint.py b/modules/workflows/methods/methodSharepoint.py
index e53b43a5..6dabdaf5 100644
--- a/modules/workflows/methods/methodSharepoint.py
+++ b/modules/workflows/methods/methodSharepoint.py
@@ -62,16 +62,22 @@ class MethodSharepoint(MethodBase):
logger.error(f"Error getting Microsoft connection: {str(e)}")
return None
- async def _discoverSharePointSites(self) -> List[Dict[str, Any]]:
+ async def _discoverSharePointSites(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
"""
- Discover all SharePoint sites accessible to the user via Microsoft Graph API
+ Discover SharePoint sites accessible to the user via Microsoft Graph API
+
+ Parameters:
+ limit (Optional[int]): Limit number of sites to return (for optimization when only hostname is needed)
Returns:
List[Dict[str, Any]]: List of SharePoint site information
"""
try:
- # Query Microsoft Graph to get all sites the user has access to
+ # Query Microsoft Graph to get sites the user has access to
endpoint = "sites?search=*"
+ if limit:
+ endpoint += f"&$top={limit}"
+
result = await self._makeGraphApiCall(endpoint)
if "error" in result:
@@ -79,12 +85,15 @@ class MethodSharepoint(MethodBase):
return []
sites = result.get("value", [])
- logger.info(f"Discovered {len(sites)} SharePoint sites")
+ if limit:
+ sites = sites[:limit]
+
+ logger.info(f"Discovered {len(sites)} SharePoint sites" + (f" (limited to {limit})" if limit else ""))
# Process and return site information
- processed_sites = []
+ processedSites = []
for site in sites:
- site_info = {
+ siteInfo = {
"id": site.get("id"),
"displayName": site.get("displayName"),
"name": site.get("name"),
@@ -93,59 +102,122 @@ class MethodSharepoint(MethodBase):
"createdDateTime": site.get("createdDateTime"),
"lastModifiedDateTime": site.get("lastModifiedDateTime")
}
- processed_sites.append(site_info)
- logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}")
+ processedSites.append(siteInfo)
+ logger.debug(f"Site: {siteInfo['displayName']} - {siteInfo['webUrl']}")
- return processed_sites
+ return processedSites
except Exception as e:
logger.error(f"Error discovering SharePoint sites: {str(e)}")
return []
- def _filter_sites_by_hint(self, sites: List[Dict[str, Any]], site_hint: str) -> List[Dict[str, Any]]:
+ def _extractHostnameFromWebUrl(self, webUrl: str) -> Optional[str]:
+ """Extract hostname from SharePoint webUrl (e.g., https://pcuster.sharepoint.com)"""
+ try:
+ if not webUrl:
+ return None
+ parsed = urllib.parse.urlparse(webUrl)
+ return parsed.hostname
+ except Exception as e:
+ logger.error(f"Error extracting hostname from webUrl '{webUrl}': {str(e)}")
+ return None
+
+ async def _getSiteByStandardPath(self, sitePath: str) -> Optional[Dict[str, Any]]:
+ """
+ Get SharePoint site directly by Microsoft-standard path (/sites/SiteName)
+ without loading all sites. Uses hostname from first available site.
+
+ Parameters:
+ sitePath (str): Site path like 'company-share' (without /sites/ prefix)
+
+ Returns:
+ Optional[Dict[str, Any]]: Site information if found, None otherwise
+ """
+ try:
+ # Get hostname from first available site (minimal load - only 1 site)
+ minimalSites = await self._discoverSharePointSites(limit=1)
+ if not minimalSites:
+ logger.warning("No sites available to extract hostname")
+ return None
+
+ hostname = self._extractHostnameFromWebUrl(minimalSites[0].get("webUrl"))
+ if not hostname:
+ logger.warning("Could not extract hostname from site")
+ return None
+
+ logger.info(f"Extracted hostname '{hostname}' from first site, now getting site by path: {sitePath}")
+
+ # Get site directly using hostname + path
+ endpoint = f"sites/{hostname}:/sites/{sitePath}"
+ result = await self._makeGraphApiCall(endpoint)
+
+ if "error" in result:
+ logger.warning(f"Could not get site directly by path '{sitePath}': {result['error']}")
+ return None
+
+ siteInfo = {
+ "id": result.get("id"),
+ "displayName": result.get("displayName"),
+ "name": result.get("name"),
+ "webUrl": result.get("webUrl"),
+ "description": result.get("description"),
+ "createdDateTime": result.get("createdDateTime"),
+ "lastModifiedDateTime": result.get("lastModifiedDateTime")
+ }
+
+ logger.info(f"Successfully got site by standard path: {siteInfo['displayName']} (ID: {siteInfo['id']})")
+ return siteInfo
+
+ except Exception as e:
+ logger.error(f"Error getting site by standard path '{sitePath}': {str(e)}")
+ return None
+
+ def _filterSitesByHint(self, sites: List[Dict[str, Any]], siteHint: str) -> List[Dict[str, Any]]:
"""Filter discovered sites by a human-entered site hint (case-insensitive substring)."""
try:
- if not site_hint:
+ if not siteHint:
return sites
- hint = site_hint.strip().lower()
+ hint = siteHint.strip().lower()
filtered: List[Dict[str, Any]] = []
for site in sites:
name = (site.get("displayName") or "").lower()
- web_url = (site.get("webUrl") or "").lower()
- if hint in name or hint in web_url:
+ webUrl = (site.get("webUrl") or "").lower()
+ if hint in name or hint in webUrl:
filtered.append(site)
return filtered if filtered else sites
except Exception as e:
- logger.error(f"Error filtering sites by hint '{site_hint}': {str(e)}")
+ logger.error(f"Error filtering sites by hint '{siteHint}': {str(e)}")
return sites
-
- def _parse_site_scoped_path(self, path_query: str) -> Optional[Dict[str, str]]:
+ def _extractSiteFromStandardPath(self, pathQuery: str) -> Optional[Dict[str, str]]:
"""
- Parse a site-scoped path of the form:
- /site:KM LayerFinance/Documents/Work or /site:KM LayerFinance/Shared Documents/Work
-
+ Extract site name from Microsoft-standard server-relative path:
+ /sites/company-share/Freigegebene Dokumente/...
+
Returns dict with keys: siteName, innerPath (no leading slash) on success, else None.
"""
try:
- if not path_query or not path_query.startswith('/'):
+ if not pathQuery or not pathQuery.startswith('/sites/'):
return None
- # expected syntax prefix
- prefix = '/site:'
- if not path_query.startswith(prefix):
- return None
- remainder = path_query[len(prefix):]
- # split once on the next '/'
+
+ # Remove leading /sites/ prefix
+ remainder = pathQuery[7:] # len('/sites/') = 7
+
+ # Split on first '/' to get site name
if '/' not in remainder:
+ # Only site name, no inner path
+ return {"siteName": remainder, "innerPath": ""}
+
+ siteName, inner = remainder.split('/', 1)
+ siteName = siteName.strip()
+ innerPath = inner.strip()
+
+ if not siteName:
return None
- site_name, inner = remainder.split('/', 1)
- site_name = site_name.strip()
- inner_path = inner.strip()
- if not site_name or not inner_path:
- return None
- return {"siteName": site_name, "innerPath": inner_path}
+
+ return {"siteName": siteName, "innerPath": innerPath}
except Exception as e:
- logger.error(f"Error parsing site-scoped path '{path_query}': {str(e)}")
+ logger.error(f"Error extracting site from standard path '{pathQuery}': {str(e)}")
return None
def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]:
@@ -187,50 +259,50 @@ class MethodSharepoint(MethodBase):
# Check for search type specification (files:, folders:, all:) FIRST
searchType = "all" # Default
if searchQuery.startswith(("files:", "folders:", "all:")):
- type_parts = searchQuery.split(':', 1)
- searchType = type_parts[0].strip()
- searchQuery = type_parts[1].strip()
+ typeParts = searchQuery.split(':', 1)
+ searchType = typeParts[0].strip()
+ searchQuery = typeParts[1].strip()
# Extract optional site hint tokens: support "site=Name" or leading "site:Name"
- def _extract_site_hint(q: str) -> tuple[str, Optional[str]]:
+ def _extractSiteHint(q: str) -> tuple[str, Optional[str]]:
try:
- q_strip = q.strip()
+ qStrip = q.strip()
# Leading form: site:KM LayerFinance ...
- if q_strip.lower().startswith("site:"):
- after = q_strip[5:].lstrip()
+ if qStrip.lower().startswith("site:"):
+ after = qStrip[5:].lstrip()
# site name until next space or end
if ' ' in after:
- site_name, rest = after.split(' ', 1)
+ siteName, rest = after.split(' ', 1)
else:
- site_name, rest = after, ''
- return rest.strip(), site_name.strip()
+ siteName, rest = after, ''
+ return rest.strip(), siteName.strip()
# Inline key=value form anywhere
- m = re.search(r"\bsite=([^;\s]+)", q_strip, flags=re.IGNORECASE)
+ m = re.search(r"\bsite=([^;\s]+)", qStrip, flags=re.IGNORECASE)
if m:
- site_name = m.group(1).strip()
+ siteName = m.group(1).strip()
# remove the token from query
- q_new = re.sub(r"\bsite=[^;\s]+;?", "", q_strip, flags=re.IGNORECASE).strip()
- return q_new, site_name
+ qNew = re.sub(r"\bsite=[^;\s]+;?", "", qStrip, flags=re.IGNORECASE).strip()
+ return qNew, siteName
except Exception:
pass
return q, None
- searchQuery, extracted_site = _extract_site_hint(searchQuery)
- if extracted_site:
- searchOptions["site_hint"] = extracted_site
- logger.info(f"Extracted site hint: '{extracted_site}'")
+ searchQuery, extractedSite = _extractSiteHint(searchQuery)
+ if extractedSite:
+ searchOptions["site_hint"] = extractedSite
+ logger.info(f"Extracted site hint: '{extractedSite}'")
# Extract name="..." if present (for quoted multi-word names)
- name_match = re.search(r"name=\"([^\"]+)\"", searchQuery)
- if name_match:
- searchQuery = name_match.group(1)
+ nameMatch = re.search(r"name=\"([^\"]+)\"", searchQuery)
+ if nameMatch:
+ searchQuery = nameMatch.group(1)
logger.info(f"Extracted name from quotes: '{searchQuery}'")
# Check for search mode specification (exact:, regex:, case:, and:)
if searchQuery.startswith(("exact:", "regex:", "case:", "and:")):
- mode_parts = searchQuery.split(':', 1)
- mode = mode_parts[0].strip()
- searchQuery = mode_parts[1].strip()
+ modeParts = searchQuery.split(':', 1)
+ mode = modeParts[0].strip()
+ searchQuery = modeParts[1].strip()
if mode == "exact":
searchOptions["exact_match"] = True
@@ -245,26 +317,66 @@ class MethodSharepoint(MethodBase):
searchOptions["and_terms"] = True
# Check if it contains path:search format
+ # Microsoft-standard paths: /sites/SiteName/Path:files:.pdf
if ':' in searchQuery:
- parts = searchQuery.split(':', 1) # Split only on first colon
- path_part = parts[0].strip()
- search_part = parts[1].strip()
+ # For Microsoft-standard paths (/sites/...), find the colon that separates path from search
+ if searchQuery.startswith('/sites/'):
+ # Find the colon that separates path from search (after the full path)
+ # Look for pattern: /sites/SiteName/Path/...:files:.pdf
+ # We need to find the colon that's followed by search type or file extension
+ colonPositions = []
+ for i, char in enumerate(searchQuery):
+ if char == ':':
+ colonPositions.append(i)
+
+ # If we have colons, find the one that's followed by search type or file extension
+ splitPos = None
+ if colonPositions:
+ for pos in colonPositions:
+ afterColon = searchQuery[pos+1:pos+10].strip().lower()
+ # Check if this colon is followed by search type or looks like a file extension
+ if afterColon.startswith(('files:', 'folders:', 'all:', '.')) or afterColon == '':
+ splitPos = pos
+ break
+
+ # If no clear split found, use the last colon
+ if splitPos is None and colonPositions:
+ splitPos = colonPositions[-1]
+
+ if splitPos:
+ pathPart = searchQuery[:splitPos].strip()
+ searchPart = searchQuery[splitPos+1:].strip()
+ else:
+ # Fallback: split on first colon
+ parts = searchQuery.split(':', 1)
+ pathPart = parts[0].strip()
+ searchPart = parts[1].strip()
+ else:
+ # Regular path:search format - split on first colon
+ parts = searchQuery.split(':', 1)
+ pathPart = parts[0].strip()
+ searchPart = parts[1].strip()
+
+ # Check if searchPart starts with search type (files:, folders:, all:)
+ if searchPart.startswith(("files:", "folders:", "all:")):
+ typeParts = searchPart.split(':', 1)
+ searchType = typeParts[0].strip() # Update searchType
+ searchPart = typeParts[1].strip() if len(typeParts) > 1 else ""
# Handle path part
- if not path_part or path_part == "*":
+ if not pathPart or pathPart == "*":
pathQuery = "*"
- elif path_part.startswith('/'):
- pathQuery = path_part
+ elif pathPart.startswith('/'):
+ pathQuery = pathPart
else:
- pathQuery = f"/Documents/{path_part}"
+ pathQuery = f"/Documents/{pathPart}"
# Handle search part
- if not search_part or search_part == "*":
+ if not searchPart or searchPart == "*":
fileQuery = "*"
else:
- fileQuery = search_part
+ fileQuery = searchPart
- # Use search_part as fileQuery (name extraction already handled above)
return pathQuery, fileQuery, searchType, searchOptions
# No colon - check if it looks like a path
@@ -303,42 +415,42 @@ class MethodSharepoint(MethodBase):
return ["*"] # Global search across all sites
# Split by semicolon to handle multiple paths
- raw_paths = [path.strip() for path in pathQuery.split(';') if path.strip()]
- resolved_paths = []
+ rawPaths = [path.strip() for path in pathQuery.split(';') if path.strip()]
+ resolvedPaths = []
- for raw_path in raw_paths:
+ for rawPath in rawPaths:
# Handle wildcards - return as-is
- if '*' in raw_path:
- resolved_paths.append(raw_path)
+ if '*' in rawPath:
+ resolvedPaths.append(rawPath)
# Handle absolute paths
- elif raw_path.startswith('/'):
- resolved_paths.append(raw_path)
+ elif rawPath.startswith('/'):
+ resolvedPaths.append(rawPath)
# Handle single word relative paths - prepend default folder
# BUT NOT space-separated words (those are search terms, not paths)
- elif ' ' not in raw_path:
- resolved_paths.append(f"/Documents/{raw_path}")
+ elif ' ' not in rawPath:
+ resolvedPaths.append(f"/Documents/{rawPath}")
else:
# Check if this looks like a path (has path separators) or search terms
- if '\\' in raw_path or '/' in raw_path:
+ if '\\' in rawPath or '/' in rawPath:
# This looks like a path with spaces in folder names - treat as valid path
- resolved_paths.append(raw_path)
- logger.info(f"Path with spaces '{raw_path}' treated as valid folder path")
+ resolvedPaths.append(rawPath)
+ logger.info(f"Path with spaces '{rawPath}' treated as valid folder path")
else:
# Space-separated words without path separators are search terms
# Return as "*" to search globally
- logger.info(f"Space-separated words '{raw_path}' treated as search terms, not folder path")
- resolved_paths.append("*")
+ logger.info(f"Space-separated words '{rawPath}' treated as search terms, not folder path")
+ resolvedPaths.append("*")
# Remove duplicates while preserving order
seen = set()
- unique_paths = []
- for path in resolved_paths:
+ uniquePaths = []
+ for path in resolvedPaths:
if path not in seen:
seen.add(path)
- unique_paths.append(path)
+ uniquePaths.append(path)
- logger.info(f"Resolved pathQuery '{pathQuery}' to {len(unique_paths)} paths: {unique_paths}")
- return unique_paths
+ logger.info(f"Resolved pathQuery '{pathQuery}' to {len(uniquePaths)} paths: {uniquePaths}")
+ return uniquePaths
except Exception as e:
logger.error(f"Error resolving pathQuery '{pathQuery}': {str(e)}")
@@ -359,6 +471,81 @@ class MethodSharepoint(MethodBase):
logger.error(f"Error parsing site URL {siteUrl}: {str(e)}")
return {"hostname": "", "sitePath": ""}
+ def _cleanSearchQuery(self, query: str) -> str:
+ """
+ Clean search query to make it compatible with Graph API KQL syntax.
+ Removes path-like syntax and invalid KQL constructs.
+
+ Parameters:
+ query (str): Raw search query that may contain paths and invalid syntax
+
+ Returns:
+ str: Cleaned query suitable for Graph API search endpoint
+ """
+ if not query or not query.strip():
+ return ""
+
+ query = query.strip()
+
+ # Handle patterns like: "Company Share/Freigegebene Dokumente/.../expenses:files:.pdf"
+ # Extract the search term and file extension
+
+ # First, extract file extension if present (format: :files:.pdf or just .pdf at the end)
+ fileExtension = ""
+ if ':files:' in query.lower() or ':folders:' in query.lower():
+ # Extract extension after the type filter
+ extMatch = re.search(r':(?:files|folders):(\.\w+)', query, re.IGNORECASE)
+ if extMatch:
+ fileExtension = extMatch.group(1)
+ # Remove the type filter part
+ query = re.sub(r':(?:files|folders):\.?\w*', '', query, flags=re.IGNORECASE)
+ elif query.endswith(('.pdf', '.doc', '.docx', '.xls', '.xlsx', '.txt', '.csv', '.ppt', '.pptx')):
+ # Extract extension from end
+ extMatch = re.search(r'(\.\w+)$', query)
+ if extMatch:
+ fileExtension = extMatch.group(1)
+ query = query[:-len(fileExtension)]
+
+ # Extract search term: get the last segment after the last slash (filename part)
+ queryNormalized = query.replace('\\', '/')
+ if '/' in queryNormalized:
+ # Extract the last segment (usually the filename/search term)
+ lastSegment = queryNormalized.split('/')[-1]
+ # Remove any remaining colons or type filters
+ if ':' in lastSegment:
+ lastSegment = lastSegment.split(':')[0]
+ searchTerm = lastSegment.strip()
+ else:
+ # No path separators, use the query as-is but remove type filters
+ if ':' in query:
+ searchTerm = query.split(':')[0].strip()
+ else:
+ searchTerm = query.strip()
+
+ # Remove any remaining type filters or invalid syntax
+ searchTerm = re.sub(r':(?:files|folders|all):?', '', searchTerm, flags=re.IGNORECASE)
+ searchTerm = searchTerm.strip()
+
+ # If we have a file extension, include it in the search term
+ # Note: Graph API search endpoint may not support filetype: syntax
+ # So we include the extension as part of the search term or filter results after
+ if fileExtension:
+ extWithoutDot = fileExtension.lstrip('.')
+ # Try simple approach: add extension as search term
+ # If this doesn't work, we'll filter results after search
+ if searchTerm:
+ # Include extension in search - Graph API will search in filename
+ searchTerm = f"{searchTerm} {extWithoutDot}"
+ else:
+ searchTerm = extWithoutDot
+
+ # Final cleanup: remove any remaining invalid characters for KQL
+ # Keep alphanumeric, spaces, hyphens, underscores, dots, and common search operators
+ searchTerm = re.sub(r'[^\w\s\-\.\*]', ' ', searchTerm)
+ searchTerm = ' '.join(searchTerm.split()) # Normalize whitespace
+
+ return searchTerm if searchTerm else "*"
+
async def _makeGraphApiCall(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
"""Make a Microsoft Graph API call with timeout and detailed logging"""
try:
@@ -386,9 +573,9 @@ class MethodSharepoint(MethodBase):
logger.debug(f"Graph API success: {len(str(result))} characters response")
return result
else:
- error_text = await response.text()
- logger.error(f"Graph API call failed: {response.status} - {error_text}")
- return {"error": f"API call failed: {response.status} - {error_text}"}
+ errorText = await response.text()
+ logger.error(f"Graph API call failed: {response.status} - {errorText}")
+ return {"error": f"API call failed: {response.status} - {errorText}"}
elif method == "PUT":
logger.debug(f"Starting PUT request to {url}")
@@ -399,9 +586,9 @@ class MethodSharepoint(MethodBase):
logger.debug(f"Graph API success: {len(str(result))} characters response")
return result
else:
- error_text = await response.text()
- logger.error(f"Graph API call failed: {response.status} - {error_text}")
- return {"error": f"API call failed: {response.status} - {error_text}"}
+ errorText = await response.text()
+ logger.error(f"Graph API call failed: {response.status} - {errorText}")
+ return {"error": f"API call failed: {response.status} - {errorText}"}
elif method == "POST":
logger.debug(f"Starting POST request to {url}")
@@ -412,9 +599,9 @@ class MethodSharepoint(MethodBase):
logger.debug(f"Graph API success: {len(str(result))} characters response")
return result
else:
- error_text = await response.text()
- logger.error(f"Graph API call failed: {response.status} - {error_text}")
- return {"error": f"API call failed: {response.status} - {error_text}"}
+ errorText = await response.text()
+ logger.error(f"Graph API call failed: {response.status} - {errorText}")
+ return {"error": f"API call failed: {response.status} - {errorText}"}
except asyncio.TimeoutError:
logger.error(f"Graph API call timed out after 30 seconds: {endpoint}")
@@ -423,10 +610,10 @@ class MethodSharepoint(MethodBase):
logger.error(f"Error making Graph API call: {str(e)}")
return {"error": f"Error making Graph API call: {str(e)}"}
- async def _getSiteId(self, hostname: str, site_path: str) -> str:
+ async def _getSiteId(self, hostname: str, sitePath: str) -> str:
"""Get SharePoint site ID from hostname and site path"""
try:
- endpoint = f"sites/{hostname}:/{site_path}"
+ endpoint = f"sites/{hostname}:/{sitePath}"
result = await self._makeGraphApiCall(endpoint)
if "error" in result:
@@ -464,35 +651,61 @@ class MethodSharepoint(MethodBase):
# Parse searchQuery to extract path, search terms, search type, and options
pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery)
+ logger.debug(f"Parsed searchQuery '{searchQuery}' -> pathQuery='{pathQuery}', fileQuery='{fileQuery}', searchType='{searchType}'")
connection = self._getMicrosoftConnection(connectionReference)
if not connection:
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
- # Discover SharePoint sites - use targeted approach when site parameter is provided
- if site:
- # When site parameter is provided, discover all sites first, then filter
- all_sites = await self._discoverSharePointSites()
- if not all_sites:
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+ # Extract site name from pathQuery if it contains Microsoft-standard path (/sites/SiteName/...)
+ siteFromPath = None
+ directSite = None
+ if pathQuery and pathQuery.startswith('/sites/'):
+ parsedPath = self._extractSiteFromStandardPath(pathQuery)
+ if parsedPath:
+ siteFromPath = parsedPath.get("siteName")
+ logger.info(f"Extracted site from Microsoft-standard pathQuery '{pathQuery}': '{siteFromPath}'")
+
+ # Try to get site directly by path (optimization - no need to load all 60 sites)
+ directSite = await self._getSiteByStandardPath(siteFromPath)
+ if directSite:
+ logger.info(f"Got site directly by standard path - no need to discover all sites")
+ sites = [directSite]
+ else:
+ logger.warning(f"Could not get site directly, falling back to site discovery")
+ directSite = None
+ else:
+ logger.warning(f"Failed to parse site from standard pathQuery '{pathQuery}'")
+
+ # If we didn't get the site directly, use discovery and filtering
+ if not directSite:
+ # Determine which site hint to use (priority: site parameter > site from pathQuery > site_hint from searchOptions)
+ siteHintToUse = site or siteFromPath or searchOptions.get("site_hint")
- sites = self._filter_sites_by_hint(all_sites, site)
- logger.info(f"Filtered sites by site parameter: '{site}' -> {len(sites)} sites")
- if not sites:
- return ActionResult.isFailure(error=f"No SharePoint sites found matching '{site}'")
- else:
- # No site parameter - discover all sites
- sites = await self._discoverSharePointSites()
- if not sites:
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+ # Discover SharePoint sites - use targeted approach when site hint is available
+ if siteHintToUse:
+ # When site hint is available, discover all sites first, then filter
+ allSites = await self._discoverSharePointSites()
+ if not allSites:
+ return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+
+ sites = self._filterSitesByHint(allSites, siteHintToUse)
+ logger.info(f"Filtered sites by site hint '{siteHintToUse}' -> {len(sites)} sites")
+ if not sites:
+ return ActionResult.isFailure(error=f"No SharePoint sites found matching '{siteHintToUse}'")
+ else:
+ # No site hint - discover all sites
+ sites = await self._discoverSharePointSites()
+ if not sites:
+ return ActionResult.isFailure(error="No SharePoint sites found or accessible")
# Resolve path query into search paths
- search_paths = self._resolvePathQuery(pathQuery)
+ searchPaths = self._resolvePathQuery(pathQuery)
try:
# Search across all discovered sites
- found_documents = []
- all_sites_searched = []
+ foundDocuments = []
+ allSitesSearched = []
# Handle different search approaches based on search type
if searchType == "folders" and fileQuery and fileQuery.strip() != "" and fileQuery.strip() != "*":
@@ -504,40 +717,40 @@ class MethodSharepoint(MethodBase):
if len(terms) > 1:
# Multiple terms: search for ALL terms (AND) - more specific results
- query_string = " AND ".join(terms)
+ queryString = " AND ".join(terms)
else:
# Single term: search for the term
- query_string = terms[0] if terms else fileQuery
- logger.info(f"Using unified search for folders: {query_string}")
+ queryString = terms[0] if terms else fileQuery
+ logger.info(f"Using unified search for folders: {queryString}")
payload = {
"requests": [
{
"entityTypes": ["driveItem"],
- "query": {"queryString": query_string},
+ "query": {"queryString": queryString},
"from": 0,
"size": 50
}
]
}
- logger.info(f"Using unified search API for folders with queryString: {query_string}")
+ logger.info(f"Using unified search API for folders with queryString: {queryString}")
# Use global search endpoint (site-specific search not available)
- unified_result = await self._makeGraphApiCall(
+ unifiedResult = await self._makeGraphApiCall(
"search/query",
method="POST",
data=json.dumps(payload).encode("utf-8")
)
- if "error" in unified_result:
- logger.warning(f"Unified search failed: {unified_result['error']}")
+ if "error" in unifiedResult:
+ logger.warning(f"Unified search failed: {unifiedResult['error']}")
items = []
else:
# Flatten hits -> driveItem resources
items = []
- for container in (unified_result.get("value", []) or []):
- for hits_container in (container.get("hitsContainers", []) or []):
- for hit in (hits_container.get("hits", []) or []):
+ for container in (unifiedResult.get("value", []) or []):
+ for hitsContainer in (container.get("hitsContainers", []) or []):
+ for hit in (hitsContainer.get("hits", []) or []):
resource = hit.get("resource")
if resource:
items.append(resource)
@@ -545,125 +758,125 @@ class MethodSharepoint(MethodBase):
logger.info(f"Unified search returned {len(items)} items (pre-filter)")
# Apply our improved folder detection logic
- folder_items = []
+ folderItems = []
for item in items:
resource = item
# Use the same detection logic as our test
- is_folder = False
+ isFolder = False
if 'folder' in resource:
- is_folder = True
+ isFolder = True
else:
# Try to detect by URL pattern or other indicators
- web_url = resource.get('webUrl', '')
+ webUrl = resource.get('webUrl', '')
name = resource.get('name', '')
# Check if URL has no file extension and looks like a folder path
- if '.' not in name and ('/' in web_url or '\\' in web_url):
- is_folder = True
+ if '.' not in name and ('/' in webUrl or '\\' in webUrl):
+ isFolder = True
- if is_folder:
- folder_items.append(item)
+ if isFolder:
+ folderItems.append(item)
- items = folder_items
+ items = folderItems
logger.info(f"Filtered to {len(items)} folders using improved detection logic")
# Process unified search results - extract site information from webUrl
for item in items:
- item_name = item.get("name", "")
- web_url = item.get("webUrl", "")
+ itemName = item.get("name", "")
+ webUrl = item.get("webUrl", "")
# Extract site information from webUrl
- site_name = "Unknown Site"
- site_id = "unknown"
+ siteName = "Unknown Site"
+ siteId = "unknown"
- if web_url and '/sites/' in web_url:
+ if webUrl and '/sites/' in webUrl:
try:
# Extract site name from URL: https://pcuster.sharepoint.com/sites/SiteName/...
- url_parts = web_url.split('/sites/')
- if len(url_parts) > 1:
- site_path = url_parts[1].split('/')[0]
+ urlParts = webUrl.split('/sites/')
+ if len(urlParts) > 1:
+ sitePath = urlParts[1].split('/')[0]
# Find matching site from discovered sites
# First try to match by site name (URL path)
for site in sites:
- if site.get("name") == site_path:
- site_name = site.get("displayName", site_path)
- site_id = site.get("id", "unknown")
+ if site.get("name") == sitePath:
+ siteName = site.get("displayName", sitePath)
+ siteId = site.get("id", "unknown")
break
else:
# If no match by name, try to match by displayName
for site in sites:
- if site.get("displayName") == site_path:
- site_name = site.get("displayName", site_path)
- site_id = site.get("id", "unknown")
+ if site.get("displayName") == sitePath:
+ siteName = site.get("displayName", sitePath)
+ siteId = site.get("id", "unknown")
break
else:
# If no exact match, use the site path as site name
- site_name = site_path
+ siteName = sitePath
# Try to find a site with similar name
for site in sites:
- if site_path.lower() in site.get("name", "").lower() or site_path.lower() in site.get("displayName", "").lower():
- site_name = site.get("displayName", site_path)
- site_id = site.get("id", "unknown")
+ if sitePath.lower() in site.get("name", "").lower() or sitePath.lower() in site.get("displayName", "").lower():
+ siteName = site.get("displayName", sitePath)
+ siteId = site.get("id", "unknown")
break
except Exception as e:
- logger.warning(f"Error extracting site info from URL {web_url}: {e}")
+ logger.warning(f"Error extracting site info from URL {webUrl}: {e}")
# Use improved folder detection logic
- is_folder = False
+ isFolder = False
if 'folder' in item:
- is_folder = True
+ isFolder = True
else:
# Try to detect by URL pattern or other indicators
name = item.get('name', '')
# Check if URL has no file extension and looks like a folder path
- if '.' not in name and ('/' in web_url or '\\' in web_url):
- is_folder = True
+ if '.' not in name and ('/' in webUrl or '\\' in webUrl):
+ isFolder = True
- item_type = "folder" if is_folder else "file"
- item_path = item.get("parentReference", {}).get("path", "")
- logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")
+ itemType = "folder" if isFolder else "file"
+ itemPath = item.get("parentReference", {}).get("path", "")
+ logger.debug(f"Processing {itemType}: '{itemName}' at path: '{itemPath}'")
# Simple filtering like test file - just check search type
- if searchType == "files" and is_folder:
+ if searchType == "files" and isFolder:
continue # Skip folders when searching for files
- elif searchType == "folders" and not is_folder:
+ elif searchType == "folders" and not isFolder:
continue # Skip files when searching for folders
# Simple approach like test file - no complex filtering
- logger.debug(f"Item '{item_name}' found - adding to results")
+ logger.debug(f"Item '{itemName}' found - adding to results")
# Create result with full path information for proper action chaining
- parent_path = item.get("parentReference", {}).get("path", "")
+ parentPath = item.get("parentReference", {}).get("path", "")
# Extract the full SharePoint path from webUrl or parentReference
- full_path = ""
- if web_url:
+ fullPath = ""
+ if webUrl:
# Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
- if '/sites/' in web_url:
- path_part = web_url.split('/sites/')[1]
+ if '/sites/' in webUrl:
+ pathPart = webUrl.split('/sites/')[1]
# Decode URL encoding and convert to backslash format
- decoded_path = urllib.parse.unquote(path_part)
- full_path = "\\" + decoded_path.replace('/', '\\')
- elif parent_path:
+ decodedPath = urllib.parse.unquote(pathPart)
+ fullPath = "\\" + decodedPath.replace('/', '\\')
+ elif parentPath:
# Use parentReference path if available
- full_path = parent_path.replace('/', '\\')
+ fullPath = parentPath.replace('/', '\\')
- doc_info = {
+ docInfo = {
"id": item.get("id"),
"name": item.get("name"),
- "type": "folder" if is_folder else "file",
- "siteName": site_name,
- "siteId": site_id,
- "webUrl": web_url,
- "fullPath": full_path,
- "parentPath": parent_path
+ "type": "folder" if isFolder else "file",
+ "siteName": siteName,
+ "siteId": siteId,
+ "webUrl": webUrl,
+ "fullPath": fullPath,
+ "parentPath": parentPath
}
- found_documents.append(doc_info)
+ foundDocuments.append(docInfo)
- logger.info(f"Found {len(found_documents)} documents from unified search")
+ logger.info(f"Found {len(foundDocuments)} documents from unified search")
except Exception as e:
logger.error(f"Error performing unified folder search: {str(e)}")
@@ -671,118 +884,174 @@ class MethodSharepoint(MethodBase):
pass
# If no unified search was performed or it failed, fall back to site-by-site search
- if not found_documents:
+ if not foundDocuments:
# Use simple approach like test file - no complex filtering
- site_scoped_sites = sites
+ siteScopedSites = sites
- for site in site_scoped_sites:
- site_id = site["id"]
- site_name = site["displayName"]
- site_url = site["webUrl"]
+ for site in siteScopedSites:
+ siteId = site["id"]
+ siteName = site["displayName"]
+ siteUrl = site["webUrl"]
- logger.info(f"Searching in site: {site_name} ({site_url})")
+ logger.info(f"Searching in site: {siteName} ({siteUrl})")
+
+ # Check if pathQuery contains a specific folder path (not just /sites/SiteName)
+ folderPath = None
+ if pathQuery and pathQuery.startswith('/sites/'):
+ parsedPath = self._extractSiteFromStandardPath(pathQuery)
+ if parsedPath:
+ innerPath = parsedPath.get("innerPath", "")
+ if innerPath and innerPath.strip():
+ # Remove leading slash if present
+ folderPath = innerPath.lstrip('/')
+
+ # Generic approach: Try to find the folder, if it fails, remove first segment
+ # This works for all languages because we test the actual API response
+ # In SharePoint Graph API, /drive/root already points to the default document library,
+ # so library names in paths should be removed
+ pathSegments = [s for s in folderPath.split('/') if s.strip()]
+ if len(pathSegments) > 1:
+ # Try with first segment removed (first segment is likely the document library)
+ testPath = '/'.join(pathSegments[1:])
+ # Quick test: try to get folder info (this is fast and doesn't require full search)
+ testEndpoint = f"sites/{siteId}/drive/root:/{urllib.parse.quote(testPath, safe='')}:"
+ testResult = await self._makeGraphApiCall(testEndpoint)
+ if testResult and "error" not in testResult:
+ # Path without first segment works - first segment was likely the document library
+ folderPath = testPath
+ logger.info(f"Removed document library name '{pathSegments[0]}' from folder path (tested via API)")
+ else:
+ # Keep original path - first segment is not a document library
+ logger.info(f"Keeping original folder path '{folderPath}' (first segment is not a document library)")
+ elif len(pathSegments) == 1:
+ # Only one segment - likely the document library itself, use root
+ folderPath = None
+ logger.info(f"Only one segment '{pathSegments[0]}' found, likely document library - using root")
+
+ if folderPath:
+ logger.info(f"Extracted folder path from pathQuery: '{folderPath}'")
+ else:
+ logger.info(f"Folder path resolved to root (only document library in path)")
# Use Microsoft Graph API for this specific site
# Handle empty or wildcard queries
if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*":
- # For wildcard/empty queries, list all items in the drive
- endpoint = f"sites/{site_id}/drive/root/children"
+ # For wildcard/empty queries, list all items
+ if folderPath:
+ # List items in specific folder
+ encodedPath = urllib.parse.quote(folderPath, safe='')
+ endpoint = f"sites/{siteId}/drive/root:/{encodedPath}:/children"
+ logger.info(f"Listing items in folder: '{folderPath}'")
+ else:
+ # List all items in the drive root
+ endpoint = f"sites/{siteId}/drive/root/children"
else:
# For files, use regular search API
- search_query = fileQuery.replace("'", "''") # Escape single quotes for OData
- endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
- logger.info(f"Using search API for files with query: '{search_query}'")
+ # Clean the query: remove path-like syntax and invalid KQL syntax
+ searchQuery = self._cleanSearchQuery(fileQuery)
+ # URL-encode the query parameter
+ encodedQuery = urllib.parse.quote(searchQuery, safe='')
+
+ if folderPath:
+ # Search in specific folder
+ encodedPath = urllib.parse.quote(folderPath, safe='')
+ endpoint = f"sites/{siteId}/drive/root:/{encodedPath}:/search(q='{encodedQuery}')"
+ logger.info(f"Searching in folder '{folderPath}' with query: '{searchQuery}' (encoded: '{encodedQuery}')")
+ else:
+ # Search in drive root
+ endpoint = f"sites/{siteId}/drive/root/search(q='{encodedQuery}')"
+ logger.info(f"Using search API for files with query: '{searchQuery}' (encoded: '{encodedQuery}')")
# Make the search API call (files)
- search_result = await self._makeGraphApiCall(endpoint)
- if "error" in search_result:
- logger.warning(f"Search failed for site {site_name}: {search_result['error']}")
+ searchResult = await self._makeGraphApiCall(endpoint)
+ if "error" in searchResult:
+ logger.warning(f"Search failed for site {siteName}: {searchResult['error']}")
continue
# Process search results for this site (files)
- items = search_result.get("value", [])
- logger.info(f"Retrieved {len(items)} items from site {site_name}")
+ items = searchResult.get("value", [])
+ logger.info(f"Retrieved {len(items)} items from site {siteName}")
- site_documents = []
+ siteDocuments = []
for item in items:
- item_name = item.get("name", "")
+ itemName = item.get("name", "")
# Use improved folder detection logic
- is_folder = False
+ isFolder = False
if 'folder' in item:
- is_folder = True
+ isFolder = True
else:
# Try to detect by URL pattern or other indicators
- web_url = item.get('webUrl', '')
+ webUrl = item.get('webUrl', '')
name = item.get('name', '')
# Check if URL has no file extension and looks like a folder path
- if '.' not in name and ('/' in web_url or '\\' in web_url):
- is_folder = True
+ if '.' not in name and ('/' in webUrl or '\\' in webUrl):
+ isFolder = True
- item_type = "folder" if is_folder else "file"
- item_path = item.get("parentReference", {}).get("path", "")
- logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")
+ itemType = "folder" if isFolder else "file"
+ itemPath = item.get("parentReference", {}).get("path", "")
+ logger.debug(f"Processing {itemType}: '{itemName}' at path: '{itemPath}'")
# Simple filtering like test file - just check search type
- if searchType == "files" and is_folder:
+ if searchType == "files" and isFolder:
continue # Skip folders when searching for files
- elif searchType == "folders" and not is_folder:
+ elif searchType == "folders" and not isFolder:
continue # Skip files when searching for folders
# Simple approach like test file - no complex filtering
- logger.debug(f"Item '{item_name}' found - adding to results")
+ logger.debug(f"Item '{itemName}' found - adding to results")
# Create result with full path information for proper action chaining
- web_url = item.get("webUrl", "")
- parent_path = item.get("parentReference", {}).get("path", "")
+ webUrl = item.get("webUrl", "")
+ parentPath = item.get("parentReference", {}).get("path", "")
# Extract the full SharePoint path from webUrl or parentReference
- full_path = ""
- if web_url:
+ fullPath = ""
+ if webUrl:
# Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
- if '/sites/' in web_url:
- path_part = web_url.split('/sites/')[1]
+ if '/sites/' in webUrl:
+ pathPart = webUrl.split('/sites/')[1]
# Decode URL encoding and convert to backslash format
- decoded_path = urllib.parse.unquote(path_part)
- full_path = "\\" + decoded_path.replace('/', '\\')
- elif parent_path:
+ decodedPath = urllib.parse.unquote(pathPart)
+ fullPath = "\\" + decodedPath.replace('/', '\\')
+ elif parentPath:
# Use parentReference path if available
- full_path = parent_path.replace('/', '\\')
+ fullPath = parentPath.replace('/', '\\')
- doc_info = {
+ docInfo = {
"id": item.get("id"),
"name": item.get("name"),
- "type": "folder" if is_folder else "file",
- "siteName": site_name,
- "siteId": site_id,
- "webUrl": web_url,
- "fullPath": full_path,
- "parentPath": parent_path
+ "type": "folder" if isFolder else "file",
+ "siteName": siteName,
+ "siteId": siteId,
+ "webUrl": webUrl,
+ "fullPath": fullPath,
+ "parentPath": parentPath
}
- site_documents.append(doc_info)
+ siteDocuments.append(docInfo)
- found_documents.extend(site_documents)
- all_sites_searched.append({
- "siteName": site_name,
- "siteUrl": site_url,
- "siteId": site_id,
- "documentsFound": len(site_documents)
+ foundDocuments.extend(siteDocuments)
+ allSitesSearched.append({
+ "siteName": siteName,
+ "siteUrl": siteUrl,
+ "siteId": siteId,
+ "documentsFound": len(siteDocuments)
})
- logger.info(f"Found {len(site_documents)} documents in site {site_name}")
+ logger.info(f"Found {len(siteDocuments)} documents in site {siteName}")
# Limit total results to maxResults
- if len(found_documents) > maxResults:
- found_documents = found_documents[:maxResults]
+ if len(foundDocuments) > maxResults:
+ foundDocuments = foundDocuments[:maxResults]
logger.info(f"Limited results to {maxResults} items")
- result_data = {
+ resultData = {
"searchQuery": searchQuery,
- "totalResults": len(found_documents),
+ "totalResults": len(foundDocuments),
"maxResults": maxResults,
- "foundDocuments": found_documents,
+ "foundDocuments": foundDocuments,
"timestamp": self.services.utils.timestampGetUtc()
}
@@ -791,17 +1060,17 @@ class MethodSharepoint(MethodBase):
return ActionResult.isFailure(error=str(e))
# Use default JSON format for output
- output_extension = ".json" # Default
- output_mime_type = "application/json" # Default
+ outputExtension = ".json" # Default
+ outputMimeType = "application/json" # Default
return ActionResult(
success=True,
documents=[
ActionDocument(
- documentName=f"sharepoint_find_path_{self._format_timestamp_for_filename()}{output_extension}",
- documentData=json.dumps(result_data, indent=2),
- mimeType=output_mime_type
+ documentName=f"sharepoint_find_path_{self._format_timestamp_for_filename()}{outputExtension}",
+ documentData=json.dumps(resultData, indent=2),
+ mimeType=outputMimeType
)
]
)
@@ -815,15 +1084,24 @@ class MethodSharepoint(MethodBase):
"""
GENERAL:
- Purpose: Read documents from SharePoint and extract content/metadata.
- - Input requirements: connectionReference (required); documentList (required); optional pathObject or pathQuery; includeMetadata.
- - Output format: JSON with read results per document.
+ - Input requirements: connectionReference (required); optional documentList, pathObject, or pathQuery; includeMetadata.
+ - Output format: Standardized ActionDocument format (documentName, documentData, mimeType).
+ - Binary files (PDFs, etc.) are Base64-encoded in documentData.
+ - Text files are stored as plain text in documentData.
+ - Returns ActionResult with documents list for template processing.
Parameters:
- - documentList (list, required): Document list reference(s) to read.
- connectionReference (str, required): Microsoft connection label.
- - pathObject (str, optional): Reference to a previous path result.
- - pathQuery (str, optional): Path query if no pathObject.
+ - pathObject (str, optional): Reference to a previous path result (from findDocumentPath).
+ - documentList (list, optional): Document list reference(s) to read (backward compatibility).
+ - pathQuery (str, optional): Path query if no pathObject (backward compatibility).
- includeMetadata (bool, optional): Include metadata. Default: True.
+
+ Returns:
+ - ActionResult with documents: List[ActionDocument] where each ActionDocument contains:
+ - documentName: File name
+ - documentData: Base64-encoded content (binary files) or plain text (text files)
+ - mimeType: MIME type (e.g., application/pdf, text/plain)
"""
try:
documentList = parameters.get("documentList")
@@ -834,48 +1112,196 @@ class MethodSharepoint(MethodBase):
pathObject = parameters.get("pathObject")
includeMetadata = parameters.get("includeMetadata", True)
- if not documentList or not connectionReference:
- return ActionResult.isFailure(error="Document list reference and connection reference are required")
+ # Validate connection reference
+ if not connectionReference:
+ return ActionResult.isFailure(error="Connection reference is required")
- # If pathObject is provided, extract folder IDs from it
- # Note: pathObject takes precedence over pathQuery when both are provided
+ # Get connection first - needed for both pathObject and documentList approaches
+ connection = self._getMicrosoftConnection(connectionReference)
+ if not connection:
+ return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
+
+ # If pathObject is provided, extract SharePoint file IDs and read them directly
+ # pathObject contains the result from findDocumentPath with foundDocuments array
+ sharePointFileIds = None
+ sites = None
if pathObject:
if pathQuery and pathQuery != "*":
logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)")
try:
# Resolve the reference label to get the actual document list
- document_list = self.services.chat.getChatDocumentsFromDocumentList([pathObject])
- if not document_list or len(document_list) == 0:
+ pathObjectDocuments = self.services.chat.getChatDocumentsFromDocumentList([pathObject])
+ if not pathObjectDocuments or len(pathObjectDocuments) == 0:
return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
- # Get the first document's content (which should be the JSON)
- first_document = document_list[0]
- file_data = self.services.chat.getFileData(first_document.fileId)
- if not file_data:
+ # Get the first document's content (which should be the JSON from findDocumentPath)
+ firstDocument = pathObjectDocuments[0]
+ fileData = self.services.chat.getFileData(firstDocument.fileId)
+ if not fileData:
return ActionResult.isFailure(error=f"No file data found for document: {pathObject}")
# Parse the JSON content
- result_data = json.loads(file_data)
- found_documents = result_data.get("foundDocuments", [])
+ resultData = json.loads(fileData)
+ foundDocuments = resultData.get("foundDocuments", [])
- # Extract folder IDs from the result
- folder_ids = []
- for doc in found_documents:
- if doc.get("type") == "folder":
- folder_ids.append(doc.get("id"))
-
- if folder_ids:
- # Use the first folder ID found as pathQuery
- pathQuery = folder_ids[0]
- logger.info(f"Using folder ID from pathObject: {pathQuery}")
+ if foundDocuments:
+ # Extract SharePoint file IDs from foundDocuments
+ sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"]
+ if not sharePointFileIds:
+ return ActionResult.isFailure(error=f"No files found in pathObject '{pathObject}'")
+ logger.info(f"Extracted {len(sharePointFileIds)} SharePoint file IDs from pathObject '{pathObject}'")
+
+ # Extract site information from foundDocuments
+ if foundDocuments:
+ firstDoc = foundDocuments[0]
+ siteName = firstDoc.get("siteName")
+ siteId = firstDoc.get("siteId")
+
+ if siteName and siteId:
+ sites = [{
+ "id": siteId,
+ "displayName": siteName,
+ "webUrl": firstDoc.get("webUrl", "")
+ }]
+ logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})")
+ else:
+ return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.")
else:
- return ActionResult.isFailure(error="No folders found in pathObject")
+ return ActionResult.isFailure(error=f"No documents found in pathObject '{pathObject}'")
except json.JSONDecodeError as e:
return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
except Exception as e:
return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
+ # If we have SharePoint file IDs from pathObject, read them directly
+ if sharePointFileIds and sites:
+ # Read SharePoint files directly using their IDs
+ readResults = []
+ siteId = sites[0]['id']
+
+ for fileId in sharePointFileIds:
+ try:
+ # Get file info from SharePoint
+ endpoint = f"sites/{siteId}/drive/items/{fileId}"
+ fileInfo = await self._makeGraphApiCall(endpoint)
+
+ if "error" in fileInfo:
+ logger.warning(f"Failed to get file info for {fileId}: {fileInfo['error']}")
+ continue
+
+ # Get file content using SharePoint service (handles binary data correctly)
+ fileName = fileInfo.get("name", f"file_{fileId}")
+ fileContent = await self.services.sharepoint.downloadFile(siteId, fileId)
+
+ # Create result document
+ resultItem = {
+ "fileId": fileId,
+ "fileName": fileName,
+ "sharepointFileId": fileId,
+ "siteName": sites[0]['displayName'],
+ "siteUrl": sites[0]['webUrl'],
+ "size": fileInfo.get("size", 0),
+ "createdDateTime": fileInfo.get("createdDateTime"),
+ "lastModifiedDateTime": fileInfo.get("lastModifiedDateTime"),
+ "webUrl": fileInfo.get("webUrl")
+ }
+
+ # Add content if available
+ if fileContent:
+ resultItem["content"] = fileContent
+
+ # Add metadata if requested
+ if includeMetadata:
+ resultItem["metadata"] = {
+ "mimeType": fileInfo.get("file", {}).get("mimeType"),
+ "downloadUrl": fileInfo.get("@microsoft.graph.downloadUrl"),
+ "createdBy": fileInfo.get("createdBy", {}),
+ "lastModifiedBy": fileInfo.get("lastModifiedBy", {}),
+ "parentReference": fileInfo.get("parentReference", {})
+ }
+
+ readResults.append(resultItem)
+ except Exception as e:
+ logger.error(f"Error reading file {fileId}: {str(e)}")
+ continue
+
+ if not readResults:
+ return ActionResult.isFailure(error="No files could be read from pathObject")
+
+ # Convert read results to ActionDocument objects
+ # IMPORTANT: For binary files (PDFs), store Base64-encoded content directly in documentData
+ # The system will create FileData and ChatDocument automatically
+ from modules.datamodels.datamodelChat import ActionDocument
+ import base64
+
+ actionDocuments = []
+ for resultItem in readResults:
+ fileContent = resultItem.get("content")
+ fileName = resultItem.get("fileName", f"file_{resultItem.get('fileId')}")
+
+ # Determine MIME type from metadata or file extension
+ mimeType = "application/octet-stream"
+ if resultItem.get("metadata", {}).get("mimeType"):
+ mimeType = resultItem["metadata"]["mimeType"]
+ elif fileName:
+ if fileName.endswith('.pdf'):
+ mimeType = "application/pdf"
+ elif fileName.endswith('.txt'):
+ mimeType = "text/plain"
+ elif fileName.endswith('.json'):
+ mimeType = "application/json"
+
+ # For binary files (PDFs, etc.), store Base64-encoded content directly
+ # The GenerationService will detect PDF mimeType and handle base64 decoding
+ if fileContent and isinstance(fileContent, bytes):
+ # Encode binary content as Base64 string
+ base64Content = base64.b64encode(fileContent).decode('utf-8')
+ actionDoc = ActionDocument(
+ documentName=fileName,
+ documentData=base64Content, # Base64 string for binary files
+ mimeType=mimeType
+ )
+ actionDocuments.append(actionDoc)
+ logger.info(f"Stored binary file {fileName} ({len(fileContent)} bytes) as Base64 in ActionDocument")
+ elif fileContent:
+ # Text content - store directly in documentData
+ actionDoc = ActionDocument(
+ documentName=fileName,
+ documentData=fileContent if isinstance(fileContent, str) else str(fileContent),
+ mimeType=mimeType
+ )
+ actionDocuments.append(actionDoc)
+ else:
+ # No content - store metadata only
+ docData = {
+ "fileName": fileName,
+ "sharepointFileId": resultItem.get("sharepointFileId"),
+ "siteName": resultItem.get("siteName"),
+ "siteUrl": resultItem.get("siteUrl"),
+ "size": resultItem.get("size"),
+ "createdDateTime": resultItem.get("createdDateTime"),
+ "lastModifiedDateTime": resultItem.get("lastModifiedDateTime"),
+ "webUrl": resultItem.get("webUrl")
+ }
+ if resultItem.get("metadata"):
+ docData["metadata"] = resultItem["metadata"]
+
+ actionDoc = ActionDocument(
+ documentName=fileName,
+ documentData=json.dumps(docData, indent=2),
+ mimeType=mimeType
+ )
+ actionDocuments.append(actionDoc)
+
+ # Return success with action documents
+ return ActionResult.isSuccess(documents=actionDocuments)
+
+ # Fallback: Use documentList parameter (for backward compatibility)
+ # Validate documentList
+ if not documentList:
+ return ActionResult.isFailure(error="Document list reference is required. Either provide documentList parameter or use pathObject that contains files.")
+
# Get documents from reference - ensure documentList is a list, not a string
# documentList is already normalized above
chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
@@ -883,86 +1309,66 @@ class MethodSharepoint(MethodBase):
if not chatDocuments:
return ActionResult.isFailure(error="No documents found for the provided reference")
- connection = self._getMicrosoftConnection(connectionReference)
- if not connection:
- return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
-
# Determine sites to use - strict validation: pathObject → pathQuery → ERROR
- sites = None
-
- # Step 1: Check pathObject first
- if pathObject:
- # When pathObject is provided, we should have specific site information
- # Extract site information from the pathObject result
- try:
- # Get the site information from the first folder in pathObject
- if 'found_documents' in locals() and found_documents:
- first_folder = found_documents[0]
- site_name = first_folder.get("siteName")
- site_id = first_folder.get("siteId")
-
- if site_name and site_id:
- # Use the specific site from pathObject instead of discovering all sites
- sites = [{
- "id": site_id,
- "displayName": site_name,
- "webUrl": first_folder.get("webUrl", "")
- }]
- logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})")
- else:
- # Site info missing from pathObject - this is an error
- return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.")
- else:
- # No documents found in pathObject - this is an error
- return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for read operation.")
- except Exception as e:
- # Error processing pathObject - this is an error
- return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for read operation.")
-
- # Step 2: If no pathObject, check pathQuery
- elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
- # Validate pathQuery format
- if not pathQuery.startswith('/'):
- return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work")
-
- # Check if pathQuery contains search terms (words without proper path structure)
- valid_path_prefixes = ['/site:', '/Documents', '/documents', '/Shared Documents', '/shared documents']
- if not any(pathQuery.startswith(prefix) for prefix in valid_path_prefixes):
- return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
-
- # For pathQuery, we need to discover sites to find the specific one
- all_sites = await self._discoverSharePointSites()
- if not all_sites:
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
-
- # If pathQuery starts with /site:, extract site name and filter
- if pathQuery.startswith('/site:'):
- # Extract site name from /site:Company Share/... format
- site_path_part = pathQuery[6:] # Remove '/site:'
- if '/' in site_path_part:
- site_name = site_path_part.split('/', 1)[0]
- else:
- site_name = site_path_part
+ if not sites:
+ # Step 2: If no pathObject, check pathQuery
+ if pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
+ # Validate pathQuery format
+ if not pathQuery.startswith('/'):
+ return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with Microsoft-standard syntax /sites//... e.g. /sites/company-share/Freigegebene Dokumente/Work")
- # Filter sites by name (case-insensitive substring match)
- sites = self._filter_sites_by_hint(all_sites, site_name)
- if not sites:
- return ActionResult.isFailure(error=f"No SharePoint site found matching '{site_name}'")
- logger.info(f"Filtered to site(s) matching '{site_name}': {[s['displayName'] for s in sites]}")
+ # Check if pathQuery contains search terms (words without proper path structure)
+ validPathPrefixes = ['/sites/', '/Documents', '/documents', '/Shared Documents', '/shared documents']
+ if not any(pathQuery.startswith(prefix) for prefix in validPathPrefixes):
+ return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
+
+ # If pathQuery starts with Microsoft-standard /sites/, try to get site directly
+ directSite = None
+ if pathQuery.startswith('/sites/'):
+ parsedPath = self._extractSiteFromStandardPath(pathQuery)
+ if parsedPath:
+ siteName = parsedPath.get("siteName")
+ # Try to get site directly by path (optimization - no need to load all 60 sites)
+ directSite = await self._getSiteByStandardPath(siteName)
+ if directSite:
+ logger.info(f"Got site directly by standard path - no need to discover all sites")
+ sites = [directSite]
+ else:
+ logger.warning(f"Could not get site directly, falling back to site discovery")
+
+ # If we didn't get the site directly, use discovery and filtering
+ if not directSite:
+ # For pathQuery, we need to discover sites to find the specific one
+ allSites = await self._discoverSharePointSites()
+ if not allSites:
+ return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+
+ # If pathQuery starts with Microsoft-standard /sites/, extract site name and filter
+ if pathQuery.startswith('/sites/'):
+ parsedPath = self._extractSiteFromStandardPath(pathQuery)
+ if parsedPath:
+ siteName = parsedPath.get("siteName")
+ # Filter sites by name (case-insensitive substring match)
+ sites = self._filterSitesByHint(allSites, siteName)
+ if not sites:
+ return ActionResult.isFailure(error=f"No SharePoint site found matching '{siteName}'")
+ logger.info(f"Filtered to site(s) matching '{siteName}': {[s['displayName'] for s in sites]}")
+ else:
+ sites = allSites
+ else:
+ sites = allSites
else:
- sites = all_sites
- else:
- # Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK
- return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
+ # Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK
+ return ActionResult.isFailure(error="No valid read path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
if not sites:
return ActionResult.isFailure(error="No valid target site determined for read operation")
# Resolve path query into search paths
- search_paths = self._resolvePathQuery(pathQuery)
+ searchPaths = self._resolvePathQuery(pathQuery)
# Process each chat document across all sites
- read_results = []
+ readResults = []
for i, chatDocument in enumerate(chatDocuments):
try:
@@ -970,90 +1376,90 @@ class MethodSharepoint(MethodBase):
fileName = chatDocument.fileName
# Search for this file across all sites
- file_found = False
+ fileFound = False
for site in sites:
- site_id = site["id"]
- site_name = site["displayName"]
- site_url = site["webUrl"]
+ siteId = site["id"]
+ siteName = site["displayName"]
+ siteUrl = site["webUrl"]
# Try to find the file by name in this site
- search_query = fileName.replace("'", "''") # Escape single quotes for OData
- endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
+ searchQuery = fileName.replace("'", "''") # Escape single quotes for OData
+ endpoint = f"sites/{siteId}/drive/root/search(q='{searchQuery}')"
- search_result = await self._makeGraphApiCall(endpoint)
+ searchResult = await self._makeGraphApiCall(endpoint)
- if "error" in search_result:
+ if "error" in searchResult:
continue
- items = search_result.get("value", [])
+ items = searchResult.get("value", [])
for item in items:
if item.get("name") == fileName:
# Found the file, get its details
- file_id = item.get("id")
- file_endpoint = f"sites/{site_id}/drive/items/{file_id}"
+ fileId = item.get("id")
+ fileEndpoint = f"sites/{siteId}/drive/items/{fileId}"
# Get file metadata
- file_info_result = await self._makeGraphApiCall(file_endpoint)
+ fileInfoResult = await self._makeGraphApiCall(fileEndpoint)
- if "error" in file_info_result:
+ if "error" in fileInfoResult:
continue
# Build result with metadata
- result_item = {
+ resultItem = {
"fileId": fileId,
"fileName": fileName,
- "sharepointFileId": file_id,
- "siteName": site_name,
- "siteUrl": site_url,
- "size": file_info_result.get("size", 0),
- "createdDateTime": file_info_result.get("createdDateTime"),
- "lastModifiedDateTime": file_info_result.get("lastModifiedDateTime"),
- "webUrl": file_info_result.get("webUrl")
+ "sharepointFileId": fileId,
+ "siteName": siteName,
+ "siteUrl": siteUrl,
+ "size": fileInfoResult.get("size", 0),
+ "createdDateTime": fileInfoResult.get("createdDateTime"),
+ "lastModifiedDateTime": fileInfoResult.get("lastModifiedDateTime"),
+ "webUrl": fileInfoResult.get("webUrl")
}
# Add metadata if requested
if includeMetadata:
- result_item["metadata"] = {
- "mimeType": file_info_result.get("file", {}).get("mimeType"),
- "downloadUrl": file_info_result.get("@microsoft.graph.downloadUrl"),
- "createdBy": file_info_result.get("createdBy", {}),
- "lastModifiedBy": file_info_result.get("lastModifiedBy", {}),
- "parentReference": file_info_result.get("parentReference", {})
+ resultItem["metadata"] = {
+ "mimeType": fileInfoResult.get("file", {}).get("mimeType"),
+ "downloadUrl": fileInfoResult.get("@microsoft.graph.downloadUrl"),
+ "createdBy": fileInfoResult.get("createdBy", {}),
+ "lastModifiedBy": fileInfoResult.get("lastModifiedBy", {}),
+ "parentReference": fileInfoResult.get("parentReference", {})
}
# Get file content if it's a readable format
- mime_type = file_info_result.get("file", {}).get("mimeType", "")
- if mime_type.startswith("text/") or mime_type in [
+ mimeType = fileInfoResult.get("file", {}).get("mimeType", "")
+ if mimeType.startswith("text/") or mimeType in [
"application/json", "application/xml", "application/javascript"
]:
# Download the file content
- content_endpoint = f"sites/{site_id}/drive/items/{file_id}/content"
+ contentEndpoint = f"sites/{siteId}/drive/items/{fileId}/content"
# For content download, we need to handle binary data
try:
async with aiohttp.ClientSession() as session:
headers = {"Authorization": f"Bearer {self.services.sharepoint._target.accessToken}"}
- async with session.get(f"https://graph.microsoft.com/v1.0/{content_endpoint}", headers=headers) as response:
+ async with session.get(f"https://graph.microsoft.com/v1.0/{contentEndpoint}", headers=headers) as response:
if response.status == 200:
content = await response.text()
- result_item["content"] = content
+ resultItem["content"] = content
else:
- result_item["content"] = f"Could not download content: HTTP {response.status}"
+ resultItem["content"] = f"Could not download content: HTTP {response.status}"
except Exception as e:
- result_item["content"] = f"Error downloading content: {str(e)}"
+ resultItem["content"] = f"Error downloading content: {str(e)}"
else:
- result_item["content"] = f"Binary file type ({mime_type}) - content not retrieved"
+ resultItem["content"] = f"Binary file type ({mimeType}) - content not retrieved"
- read_results.append(result_item)
- file_found = True
+ readResults.append(resultItem)
+ fileFound = True
break
- if file_found:
+ if fileFound:
break
- if not file_found:
- read_results.append({
+ if not fileFound:
+ readResults.append({
"fileId": fileId,
"fileName": fileName,
"error": "File not found in any accessible SharePoint site",
@@ -1062,20 +1468,20 @@ class MethodSharepoint(MethodBase):
except Exception as e:
logger.error(f"Error reading document {chatDocument.fileName}: {str(e)}")
- read_results.append({
+ readResults.append({
"fileId": chatDocument.fileId,
"fileName": chatDocument.fileName,
"error": str(e),
"content": None
})
- result_data = {
+ resultData = {
"connectionReference": connectionReference,
"pathQuery": pathQuery,
"documentList": documentList,
"includeMetadata": includeMetadata,
"sitesSearched": len(sites),
- "readResults": read_results,
+ "readResults": readResults,
"connection": {
"id": connection["id"],
"authority": "microsoft",
@@ -1085,17 +1491,17 @@ class MethodSharepoint(MethodBase):
}
# Use default JSON format for output
- output_extension = ".json" # Default
- output_mime_type = "application/json" # Default
+ outputExtension = ".json" # Default
+ outputMimeType = "application/json" # Default
return ActionResult(
success=True,
documents=[
ActionDocument(
- documentName=f"sharepoint_documents_{self._format_timestamp_for_filename()}{output_extension}",
- documentData=json.dumps(result_data, indent=2),
- mimeType=output_mime_type
+ documentName=f"sharepoint_documents_{self._format_timestamp_for_filename()}{outputExtension}",
+ documentData=json.dumps(resultData, indent=2),
+ mimeType=outputMimeType
)
]
)
@@ -1128,7 +1534,7 @@ class MethodSharepoint(MethodBase):
documentList = [documentList]
pathObject = parameters.get("pathObject")
- upload_path = pathQuery
+ uploadPath = pathQuery
logger.debug(f"Using pathQuery: {pathQuery}")
if not connectionReference or not documentList:
@@ -1138,63 +1544,63 @@ class MethodSharepoint(MethodBase):
if pathObject:
try:
# Resolve the reference label to get the actual document list
- document_list = self.services.chat.getChatDocumentsFromDocumentList([pathObject])
- if not document_list or len(document_list) == 0:
+ documentList = self.services.chat.getChatDocumentsFromDocumentList([pathObject])
+ if not documentList or len(documentList) == 0:
return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
# Get the first document's content (which should be the JSON)
- first_document = document_list[0]
- file_data = self.services.chat.getFileData(first_document.fileId)
- if not file_data:
+ firstDocument = documentList[0]
+ fileData = self.services.chat.getFileData(firstDocument.fileId)
+ if not fileData:
return ActionResult.isFailure(error=f"No file data found for document: {pathObject}")
# Parse the JSON content
- result_data = json.loads(file_data)
+ resultData = json.loads(fileData)
# Debug: Log the structure of the result document
- logger.info(f"Result document keys: {list(result_data.keys())}")
+ logger.info(f"Result document keys: {list(resultData.keys())}")
# Handle different result document formats
- found_documents = []
+ foundDocuments = []
# Check if it's a direct SharePoint result (has foundDocuments)
- if "foundDocuments" in result_data:
- found_documents = result_data.get("foundDocuments", [])
- logger.info(f"Found {len(found_documents)} documents in foundDocuments array")
+ if "foundDocuments" in resultData:
+ foundDocuments = resultData.get("foundDocuments", [])
+ logger.info(f"Found {len(foundDocuments)} documents in foundDocuments array")
# Check if it's an AI validation result (has result string with validationReport)
- elif "result" in result_data and "validationReport" in result_data["result"]:
+ elif "result" in resultData and "validationReport" in resultData["result"]:
try:
# Parse the nested JSON in the result field
- nested_result = json.loads(result_data["result"])
- validation_report = nested_result.get("validationReport", {})
- document_details = validation_report.get("documentDetails", {})
+ nestedResult = json.loads(resultData["result"])
+ validationReport = nestedResult.get("validationReport", {})
+ documentDetails = validationReport.get("documentDetails", {})
- if document_details:
+ if documentDetails:
# Convert the single document details to the expected format
doc = {
- "id": document_details.get("id"),
- "name": document_details.get("name"),
- "type": document_details.get("type", "").lower(), # Convert "Folder" to "folder"
- "siteName": document_details.get("siteName"),
- "siteId": document_details.get("siteId"),
- "fullPath": document_details.get("fullPath"),
- "webUrl": document_details.get("webUrl", ""),
- "parentPath": document_details.get("parentPath", "")
+ "id": documentDetails.get("id"),
+ "name": documentDetails.get("name"),
+ "type": documentDetails.get("type", "").lower(), # Convert "Folder" to "folder"
+ "siteName": documentDetails.get("siteName"),
+ "siteId": documentDetails.get("siteId"),
+ "fullPath": documentDetails.get("fullPath"),
+ "webUrl": documentDetails.get("webUrl", ""),
+ "parentPath": documentDetails.get("parentPath", "")
}
- found_documents = [doc]
+ foundDocuments = [doc]
logger.info(f"Extracted 1 document from validation report")
except json.JSONDecodeError as e:
logger.error(f"Failed to parse nested JSON in result field: {e}")
return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}")
# Debug: Log what we found in the result document
- logger.info(f"Result document contains {len(found_documents)} documents")
- for i, doc in enumerate(found_documents):
+ logger.info(f"Result document contains {len(foundDocuments)} documents")
+ for i, doc in enumerate(foundDocuments):
logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'")
# Extract folder information from the result
folders = []
- for doc in found_documents:
+ for doc in foundDocuments:
if doc.get("type") == "folder":
folders.append(doc)
@@ -1202,25 +1608,25 @@ class MethodSharepoint(MethodBase):
if folders:
# Use the first folder found - prefer folder ID for direct API calls
- first_folder = folders[0]
- if first_folder.get("id"):
+ firstFolder = folders[0]
+ if firstFolder.get("id"):
# Use folder ID directly for most reliable API calls
- upload_path = first_folder.get("id")
- logger.info(f"Using folder ID from pathObject: {upload_path}")
- elif first_folder.get("fullPath"):
+ uploadPath = firstFolder.get("id")
+ logger.info(f"Using folder ID from pathObject: {uploadPath}")
+ elif firstFolder.get("fullPath"):
# Extract the correct path portion from fullPath by removing site name
- full_path = first_folder.get("fullPath")
+ fullPath = firstFolder.get("fullPath")
# fullPath format: \\SiteName\\Library\\Folder\\SubFolder
# We need to remove the first two parts (\\SiteName\\) to get the actual folder path
- path_parts = full_path.lstrip('\\').split('\\')
- if len(path_parts) > 1:
+ pathParts = fullPath.lstrip('\\').split('\\')
+ if len(pathParts) > 1:
# Remove the first part (site name) and reconstruct the path
- actual_path = '\\'.join(path_parts[1:])
- upload_path = actual_path
- logger.info(f"Extracted path from fullPath: {upload_path}")
+ actualPath = '\\'.join(pathParts[1:])
+ uploadPath = actualPath
+ logger.info(f"Extracted path from fullPath: {uploadPath}")
else:
- upload_path = full_path
- logger.info(f"Using full path from pathObject (no site name to remove): {upload_path}")
+ uploadPath = fullPath
+ logger.info(f"Using full path from pathObject (no site name to remove): {uploadPath}")
else:
return ActionResult.isFailure(error="No valid folder information found in pathObject")
else:
@@ -1250,19 +1656,19 @@ class MethodSharepoint(MethodBase):
# Extract site information from the pathObject result
try:
# Get the site information from the first folder in pathObject
- if 'found_documents' in locals() and found_documents:
- first_folder = found_documents[0]
- site_name = first_folder.get("siteName")
- site_id = first_folder.get("siteId")
+ if 'foundDocuments' in locals() and foundDocuments:
+ firstFolder = foundDocuments[0]
+ siteName = firstFolder.get("siteName")
+ siteId = firstFolder.get("siteId")
- if site_name and site_id:
+ if siteName and siteId:
# Use the specific site from pathObject instead of discovering all sites
sites = [{
- "id": site_id,
- "displayName": site_name,
- "webUrl": first_folder.get("webUrl", "")
+ "id": siteId,
+ "displayName": siteName,
+ "webUrl": firstFolder.get("webUrl", "")
}]
- logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})")
+ logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})")
else:
# Site info missing from pathObject - this is an error, not a fallback
return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for upload.")
@@ -1274,73 +1680,106 @@ class MethodSharepoint(MethodBase):
return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for upload.")
else:
# No pathObject provided - check if pathQuery is valid
- if not upload_path or upload_path.strip() == "" or upload_path.strip() == "*":
+ if not uploadPath or uploadPath.strip() == "" or uploadPath.strip() == "*":
return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
# Validate pathQuery format
- if not upload_path.startswith('/'):
- return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work")
+ if not uploadPath.startswith('/'):
+ return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with Microsoft-standard syntax /sites//... e.g. /sites/company-share/Freigegebene Dokumente/Work")
- # Check if upload_path contains search terms (words without proper path structure)
- if not upload_path.startswith('/site:') and not upload_path.startswith('/Documents') and not upload_path.startswith('/Shared Documents'):
- return ActionResult.isFailure(error=f"Invalid pathQuery '{upload_path}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
+ # Check if uploadPath contains search terms (words without proper path structure)
+ validPathPrefixes = ['/sites/', '/Documents', '/documents', '/Shared Documents', '/shared documents']
+ if not any(uploadPath.startswith(prefix) for prefix in validPathPrefixes):
+ return ActionResult.isFailure(error=f"Invalid pathQuery '{uploadPath}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
- # For pathQuery, we need to discover sites to find the specific one
- sites = await self._discoverSharePointSites()
- if not sites:
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+ # If uploadPath starts with Microsoft-standard /sites/, try to get site directly
+ directSite = None
+ if uploadPath.startswith('/sites/'):
+ parsedPath = self._extractSiteFromStandardPath(uploadPath)
+ if parsedPath:
+ siteName = parsedPath.get("siteName")
+ # Try to get site directly by path (optimization - no need to load all 60 sites)
+ directSite = await self._getSiteByStandardPath(siteName)
+ if directSite:
+ logger.info(f"Got site directly by standard path - no need to discover all sites")
+ sites = [directSite]
+ else:
+ logger.warning(f"Could not get site directly, falling back to site discovery")
+
+ # If we didn't get the site directly, use discovery and filtering
+ if not directSite:
+ # For pathQuery, we need to discover sites to find the specific one
+ allSites = await self._discoverSharePointSites()
+ if not allSites:
+ return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+
+ # If uploadPath starts with Microsoft-standard /sites/, extract site name and filter
+ if uploadPath.startswith('/sites/'):
+ parsedPath = self._extractSiteFromStandardPath(uploadPath)
+ if parsedPath:
+ siteName = parsedPath.get("siteName")
+ # Filter sites by name (case-insensitive substring match)
+ sites = self._filterSitesByHint(allSites, siteName)
+ if not sites:
+ return ActionResult.isFailure(error=f"No SharePoint site found matching '{siteName}'")
+ logger.info(f"Filtered to site(s) matching '{siteName}': {[s['displayName'] for s in sites]}")
+ else:
+ sites = allSites
+ else:
+ sites = allSites
if not sites:
return ActionResult.isFailure(error="No valid target site determined for upload")
# Process upload paths based on whether pathObject was provided
- upload_site_scope = None
+ uploadSiteScope = None
if not pathObject:
# Parse the validated pathQuery to extract site and path information
- parsed = self._parse_site_scoped_path(upload_path)
- if not parsed:
- return ActionResult.isFailure(error="Invalid upload_path. Use /site:/")
+ parsed = self._extractSiteFromStandardPath(uploadPath)
- # Find matching site
- candidate_sites = self._filter_sites_by_hint(sites, parsed["siteName"]) # substring match
+ if not parsed:
+ return ActionResult.isFailure(error="Invalid uploadPath. Use Microsoft-standard /sites//")
+
+ # Find matching site (already filtered above, but ensure we have the right one)
+ candidateSites = self._filterSitesByHint(sites, parsed["siteName"]) # substring match
# Choose exact displayName match if available
- exact = [s for s in candidate_sites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()]
- selected_site = exact[0] if exact else (candidate_sites[0] if candidate_sites else None)
- if not selected_site:
+ exact = [s for s in candidateSites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()]
+ selectedSite = exact[0] if exact else (candidateSites[0] if candidateSites else None)
+ if not selectedSite:
return ActionResult.isFailure(error=f"SharePoint site '{parsed['siteName']}' not found or not accessible")
- upload_site_scope = selected_site
+ uploadSiteScope = selectedSite
# Use the inner path portion as the actual upload target path
# Remove document library name from path (same logic as listDocuments)
- inner_path = parsed['innerPath'].lstrip('/')
- path_segments = [s for s in inner_path.split('/') if s.strip()]
- if len(path_segments) > 1:
+ innerPath = parsed.get('innerPath', '').lstrip('/')
+ pathSegments = [s for s in innerPath.split('/') if s.strip()]
+ if len(pathSegments) > 1:
# Path has multiple segments - first might be a library name
# Try without first segment (assuming it's a library name)
- inner_path = '/'.join(path_segments[1:])
- logger.info(f"Removed first path segment (potential library name), path changed from '{parsed['innerPath']}' to '{inner_path}'")
- elif len(path_segments) == 1:
+ innerPath = '/'.join(pathSegments[1:])
+ logger.info(f"Removed first path segment (potential library name), path changed from '{parsed['innerPath']}' to '{innerPath}'")
+ elif len(pathSegments) == 1:
# Only one segment - if it's a common library-like name, use empty path (root)
- first_segment_lower = path_segments[0].lower()
- library_indicators = ['document', 'dokument', 'shared', 'freigegeben', 'library', 'bibliothek']
- if any(indicator in first_segment_lower for indicator in library_indicators):
- inner_path = ''
- logger.info(f"First segment '{path_segments[0]}' appears to be a library name, using root")
+ firstSegmentLower = pathSegments[0].lower()
+ libraryIndicators = ['document', 'dokument', 'shared', 'freigegeben', 'library', 'bibliothek']
+ if any(indicator in firstSegmentLower for indicator in libraryIndicators):
+ innerPath = ''
+ logger.info(f"First segment '{pathSegments[0]}' appears to be a library name, using root")
- upload_paths = [f"/{inner_path}" if inner_path else "/"]
- sites = [selected_site]
+ uploadPaths = [f"/{innerPath}" if innerPath else "/"]
+ sites = [selectedSite]
else:
- # When using pathObject, check if upload_path is a folder ID or a path
- if upload_path.startswith('01PPXICCB') or upload_path.startswith('01'):
+ # When using pathObject, check if uploadPath is a folder ID or a path
+ if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
# It's a folder ID - use it directly
- upload_paths = [upload_path]
- logger.info(f"Using folder ID directly for upload: {upload_path}")
+ uploadPaths = [uploadPath]
+ logger.info(f"Using folder ID directly for upload: {uploadPath}")
else:
# It's a path - resolve it normally
- upload_paths = self._resolvePathQuery(upload_path)
+ uploadPaths = self._resolvePathQuery(uploadPath)
# Process each document upload
- upload_results = []
+ uploadResults = []
# Extract file names from documents
fileNames = [doc.fileName for doc in chatDocuments]
@@ -1349,11 +1788,11 @@ class MethodSharepoint(MethodBase):
for i, (chatDocument, fileName) in enumerate(zip(chatDocuments, fileNames)):
try:
fileId = chatDocument.fileId
- file_data = self.services.chat.getFileData(fileId)
+ fileData = self.services.chat.getFileData(fileId)
- if not file_data:
+ if not fileData:
logger.warning(f"File data not found for fileId: {fileId}")
- upload_results.append({
+ uploadResults.append({
"fileName": fileName,
"fileId": fileId,
"error": "File data not found",
@@ -1362,77 +1801,77 @@ class MethodSharepoint(MethodBase):
continue
# Upload to the first available site (or could be made configurable)
- upload_successful = False
+ uploadSuccessful = False
for site in sites:
- site_id = site["id"]
- site_name = site["displayName"]
- site_url = site["webUrl"]
+ siteId = site["id"]
+ siteName = site["displayName"]
+ siteUrl = site["webUrl"]
# Use the first upload path or default to Documents
- upload_path = upload_paths[0] if upload_paths else "/Documents"
+ uploadPath = uploadPaths[0] if uploadPaths else "/Documents"
# Handle wildcard paths - replace with default Documents folder
- if upload_path == "*":
- upload_path = "/Documents"
+ if uploadPath == "*":
+ uploadPath = "/Documents"
logger.warning(f"Wildcard path '*' detected, using default '/Documents' folder for upload")
- # Check if upload_path is a folder ID or a regular path
- if upload_path.startswith('01PPXICCB') or upload_path.startswith('01'):
+ # Check if uploadPath is a folder ID or a regular path
+ if uploadPath.startswith('01PPXICCB') or uploadPath.startswith('01'):
# It's a folder ID - use the folder-specific upload endpoint
- upload_endpoint = f"sites/{site_id}/drive/items/{upload_path}:/{fileName}:/content"
- logger.info(f"Using folder ID upload endpoint: {upload_endpoint}")
+ uploadEndpoint = f"sites/{siteId}/drive/items/{uploadPath}:/{fileName}:/content"
+ logger.info(f"Using folder ID upload endpoint: {uploadEndpoint}")
else:
# It's a regular path - use the root-based upload endpoint
- upload_path = upload_path.rstrip('/') + '/' + fileName
- upload_path_clean = upload_path.lstrip('/')
- upload_endpoint = f"sites/{site_id}/drive/root:/{upload_path_clean}:/content"
- logger.info(f"Using path-based upload endpoint: {upload_endpoint}")
+ uploadPath = uploadPath.rstrip('/') + '/' + fileName
+ uploadPathClean = uploadPath.lstrip('/')
+ uploadEndpoint = f"sites/{siteId}/drive/root:/{uploadPathClean}:/content"
+ logger.info(f"Using path-based upload endpoint: {uploadEndpoint}")
# Upload endpoint for small files (< 4MB)
- if len(file_data) < 4 * 1024 * 1024: # 4MB
+ if len(fileData) < 4 * 1024 * 1024: # 4MB
# Upload the file
- upload_result = await self._makeGraphApiCall(
- upload_endpoint,
+ uploadResult = await self._makeGraphApiCall(
+ uploadEndpoint,
method="PUT",
- data=file_data
+ data=fileData
)
- if "error" not in upload_result:
- upload_results.append({
+ if "error" not in uploadResult:
+ uploadResults.append({
"fileName": fileName,
"fileId": fileId,
"uploadStatus": "success",
- "siteName": site_name,
- "siteUrl": site_url,
- "uploadPath": upload_path,
- "uploadEndpoint": upload_endpoint,
- "sharepointFileId": upload_result.get("id"),
- "webUrl": upload_result.get("webUrl"),
- "size": upload_result.get("size"),
- "createdDateTime": upload_result.get("createdDateTime")
+ "siteName": siteName,
+ "siteUrl": siteUrl,
+ "uploadPath": uploadPath,
+ "uploadEndpoint": uploadEndpoint,
+ "sharepointFileId": uploadResult.get("id"),
+ "webUrl": uploadResult.get("webUrl"),
+ "size": uploadResult.get("size"),
+ "createdDateTime": uploadResult.get("createdDateTime")
})
- upload_successful = True
+ uploadSuccessful = True
break
else:
- logger.warning(f"Upload failed to site {site_name}: {upload_result['error']}")
+ logger.warning(f"Upload failed to site {siteName}: {uploadResult['error']}")
else:
# For large files, we would need to implement resumable upload
- logger.warning(f"File too large ({len(file_data)} bytes) for site {site_name}")
+ logger.warning(f"File too large ({len(fileData)} bytes) for site {siteName}")
continue
- if not upload_successful:
- upload_results.append({
+ if not uploadSuccessful:
+ uploadResults.append({
"fileName": fileName,
"fileId": fileId,
- "error": f"File too large ({len(file_data)} bytes) or upload failed to all sites. Files larger than 4MB require resumable upload (not implemented).",
+ "error": f"File too large ({len(fileData)} bytes) or upload failed to all sites. Files larger than 4MB require resumable upload (not implemented).",
"uploadStatus": "failed"
})
except Exception as e:
logger.error(f"Error uploading document {fileName}: {str(e)}")
- upload_results.append({
+ uploadResults.append({
"fileName": fileName,
"fileId": fileId,
"error": str(e),
@@ -1440,13 +1879,13 @@ class MethodSharepoint(MethodBase):
})
# Create result data
- result_data = {
+ resultData = {
"connectionReference": connectionReference,
- "pathQuery": upload_path,
+ "pathQuery": uploadPath,
"documentList": documentList,
"fileNames": fileNames,
"sitesAvailable": len(sites),
- "uploadResults": upload_results,
+ "uploadResults": uploadResults,
"connection": {
"id": connection["id"],
"authority": "microsoft",
@@ -1456,17 +1895,17 @@ class MethodSharepoint(MethodBase):
}
# Use default JSON format for output
- output_extension = ".json" # Default
- output_mime_type = "application/json" # Default
+ outputExtension = ".json" # Default
+ outputMimeType = "application/json" # Default
return ActionResult(
success=True,
documents=[
ActionDocument(
- documentName=f"sharepoint_upload_{self._format_timestamp_for_filename()}{output_extension}",
- documentData=json.dumps(result_data, indent=2),
- mimeType=output_mime_type
+ documentName=f"sharepoint_upload_{self._format_timestamp_for_filename()}{outputExtension}",
+ documentData=json.dumps(resultData, indent=2),
+ mimeType=outputMimeType
)
]
)
@@ -1498,7 +1937,7 @@ class MethodSharepoint(MethodBase):
pathQuery = parameters.get("pathQuery")
includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX
- list_query = pathQuery
+ listQuery = pathQuery
logger.info(f"Using pathQuery: {pathQuery}")
if not connectionReference:
@@ -1511,65 +1950,65 @@ class MethodSharepoint(MethodBase):
logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)")
try:
# Resolve the reference label to get the actual document list
- document_list = self.services.chat.getChatDocumentsFromDocumentList([pathObject])
- if not document_list or len(document_list) == 0:
+ documentList = self.services.chat.getChatDocumentsFromDocumentList([pathObject])
+ if not documentList or len(documentList) == 0:
return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
# Get the first document's content (which should be the JSON)
- first_document = document_list[0]
- logger.info(f"Document fileId: {first_document.fileId}, fileName: {first_document.fileName}")
- file_data = self.services.chat.getFileData(first_document.fileId)
- if not file_data:
- return ActionResult.isFailure(error=f"No file data found for document: {pathObject} (fileId: {first_document.fileId})")
- logger.info(f"File data length: {len(file_data) if file_data else 0}")
+ firstDocument = documentList[0]
+ logger.info(f"Document fileId: {firstDocument.fileId}, fileName: {firstDocument.fileName}")
+ fileData = self.services.chat.getFileData(firstDocument.fileId)
+ if not fileData:
+ return ActionResult.isFailure(error=f"No file data found for document: {pathObject} (fileId: {firstDocument.fileId})")
+ logger.info(f"File data length: {len(fileData) if fileData else 0}")
# Parse the JSON content
- result_data = json.loads(file_data)
+ resultData = json.loads(fileData)
# Debug: Log the structure of the result document
- logger.info(f"Result document keys: {list(result_data.keys())}")
+ logger.info(f"Result document keys: {list(resultData.keys())}")
# Handle different result document formats
- found_documents = []
+ foundDocuments = []
# Check if it's a direct SharePoint result (has foundDocuments)
- if "foundDocuments" in result_data:
- found_documents = result_data.get("foundDocuments", [])
- logger.info(f"Found {len(found_documents)} documents in foundDocuments array")
+ if "foundDocuments" in resultData:
+ foundDocuments = resultData.get("foundDocuments", [])
+ logger.info(f"Found {len(foundDocuments)} documents in foundDocuments array")
# Check if it's an AI validation result (has result string with validationReport)
- elif "result" in result_data and "validationReport" in result_data["result"]:
+ elif "result" in resultData and "validationReport" in resultData["result"]:
try:
# Parse the nested JSON in the result field
- nested_result = json.loads(result_data["result"])
- validation_report = nested_result.get("validationReport", {})
- document_details = validation_report.get("documentDetails", {})
+ nestedResult = json.loads(resultData["result"])
+ validationReport = nestedResult.get("validationReport", {})
+ documentDetails = validationReport.get("documentDetails", {})
- if document_details:
+ if documentDetails:
# Convert the single document details to the expected format
doc = {
- "id": document_details.get("id"),
- "name": document_details.get("name"),
- "type": document_details.get("type", "").lower(), # Convert "Folder" to "folder"
- "siteName": document_details.get("siteName"),
- "siteId": document_details.get("siteId"),
- "fullPath": document_details.get("fullPath"),
- "webUrl": document_details.get("webUrl", ""),
- "parentPath": document_details.get("parentPath", "")
+ "id": documentDetails.get("id"),
+ "name": documentDetails.get("name"),
+ "type": documentDetails.get("type", "").lower(), # Convert "Folder" to "folder"
+ "siteName": documentDetails.get("siteName"),
+ "siteId": documentDetails.get("siteId"),
+ "fullPath": documentDetails.get("fullPath"),
+ "webUrl": documentDetails.get("webUrl", ""),
+ "parentPath": documentDetails.get("parentPath", "")
}
- found_documents = [doc]
+ foundDocuments = [doc]
logger.info(f"Extracted 1 document from validation report")
except ValueError as e:
logger.error(f"Failed to parse nested JSON in result field: {e}")
return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}")
# Debug: Log what we found in the result document
- logger.info(f"Result document contains {len(found_documents)} documents")
- for i, doc in enumerate(found_documents):
+ logger.info(f"Result document contains {len(foundDocuments)} documents")
+ for i, doc in enumerate(foundDocuments):
logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'")
# Extract folder information from the result
folders = []
- for doc in found_documents:
+ for doc in foundDocuments:
if doc.get("type") == "folder":
folders.append(doc)
@@ -1577,25 +2016,25 @@ class MethodSharepoint(MethodBase):
if folders:
# Use the first folder found - prefer folder ID for direct API calls
- first_folder = folders[0]
- if first_folder.get("id"):
+ firstFolder = folders[0]
+ if firstFolder.get("id"):
# Use folder ID directly for most reliable API calls
- list_query = first_folder.get("id")
- logger.info(f"Using folder ID from pathObject: {list_query}")
- elif first_folder.get("fullPath"):
+ listQuery = firstFolder.get("id")
+ logger.info(f"Using folder ID from pathObject: {listQuery}")
+ elif firstFolder.get("fullPath"):
# Extract the correct path portion from fullPath by removing site name
- full_path = first_folder.get("fullPath")
+ fullPath = firstFolder.get("fullPath")
# fullPath format: \\SiteName\\Library\\Folder\\SubFolder
# We need to remove the first two parts (\\SiteName\\) to get the actual folder path
- path_parts = full_path.lstrip('\\').split('\\')
- if len(path_parts) > 1:
+ pathParts = fullPath.lstrip('\\').split('\\')
+ if len(pathParts) > 1:
# Remove the first part (site name) and reconstruct the path
- actual_path = '\\'.join(path_parts[1:])
- list_query = actual_path
- logger.info(f"Extracted path from fullPath: {list_query}")
+ actualPath = '\\'.join(pathParts[1:])
+ listQuery = actualPath
+ logger.info(f"Extracted path from fullPath: {listQuery}")
else:
- list_query = full_path
- logger.info(f"Using full path from pathObject (no site name to remove): {list_query}")
+ listQuery = fullPath
+ logger.info(f"Using full path from pathObject (no site name to remove): {listQuery}")
else:
return ActionResult.isFailure(error="No valid folder information found in pathObject")
else:
@@ -1611,19 +2050,11 @@ class MethodSharepoint(MethodBase):
if not connection:
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
- logger.info(f"Starting SharePoint listDocuments for list_query: {list_query}")
+ logger.info(f"Starting SharePoint listDocuments for listQuery: {listQuery}")
logger.debug(f"Connection ID: {connection['id']}")
- # For listDocuments, if pathQuery starts with /site:, use it directly without parsing
- # (parsing would split on the colon and break the site name)
- if list_query and list_query.strip().startswith('/site:'):
- pathQuery = list_query.strip()
- fileQuery = "*"
- searchType = "all"
- searchOptions = {}
- else:
- # Parse list_query to extract path, search terms, search type, and options
- pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(list_query)
+ # Parse listQuery to extract path, search terms, search type, and options
+ pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(listQuery)
# Determine sites to use - strict validation: pathObject → pathQuery → ERROR
sites = None
@@ -1634,19 +2065,19 @@ class MethodSharepoint(MethodBase):
# Extract site information from the pathObject result
try:
# Get the site information from the first folder in pathObject
- if 'found_documents' in locals() and found_documents:
- first_folder = found_documents[0]
- site_name = first_folder.get("siteName")
- site_id = first_folder.get("siteId")
+ if 'foundDocuments' in locals() and foundDocuments:
+ firstFolder = foundDocuments[0]
+ siteName = firstFolder.get("siteName")
+ siteId = firstFolder.get("siteId")
- if site_name and site_id:
+ if siteName and siteId:
# Use the specific site from pathObject instead of discovering all sites
sites = [{
- "id": site_id,
- "displayName": site_name,
- "webUrl": first_folder.get("webUrl", "")
+ "id": siteId,
+ "displayName": siteName,
+ "webUrl": firstFolder.get("webUrl", "")
}]
- logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})")
+ logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})")
else:
# Site info missing from pathObject - this is an error
return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for list operation.")
@@ -1661,34 +2092,48 @@ class MethodSharepoint(MethodBase):
elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
# Validate pathQuery format
if not pathQuery.startswith('/'):
- return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work")
+ return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with Microsoft-standard syntax /sites//... e.g. /sites/company-share/Freigegebene Dokumente/Work")
# Check if pathQuery contains search terms (words without proper path structure)
- valid_path_prefixes = ['/site:', '/Documents', '/documents', '/Shared Documents', '/shared documents']
- if not any(pathQuery.startswith(prefix) for prefix in valid_path_prefixes):
+ validPathPrefixes = ['/sites/', '/Documents', '/documents', '/Shared Documents', '/shared documents']
+ if not any(pathQuery.startswith(prefix) for prefix in validPathPrefixes):
return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
- # For pathQuery, we need to discover sites to find the specific one
- all_sites = await self._discoverSharePointSites()
- if not all_sites:
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+ # If pathQuery starts with Microsoft-standard /sites/, try to get site directly
+ directSite = None
+ if pathQuery.startswith('/sites/'):
+ parsedPath = self._extractSiteFromStandardPath(pathQuery)
+ if parsedPath:
+ siteName = parsedPath.get("siteName")
+ # Try to get site directly by path (optimization - no need to load all 60 sites)
+ directSite = await self._getSiteByStandardPath(siteName)
+ if directSite:
+ logger.info(f"Got site directly by standard path - no need to discover all sites")
+ sites = [directSite]
+ else:
+ logger.warning(f"Could not get site directly, falling back to site discovery")
- # If pathQuery starts with /site:, extract site name and filter
- if pathQuery.startswith('/site:'):
- # Extract site name from /site:Company Share/... format
- site_path_part = pathQuery[6:] # Remove '/site:'
- if '/' in site_path_part:
- site_name = site_path_part.split('/', 1)[0]
- else:
- site_name = site_path_part
+ # If we didn't get the site directly, use discovery and filtering
+ if not directSite:
+ # For pathQuery, we need to discover sites to find the specific one
+ allSites = await self._discoverSharePointSites()
+ if not allSites:
+ return ActionResult.isFailure(error="No SharePoint sites found or accessible")
- # Filter sites by name (case-insensitive substring match)
- sites = self._filter_sites_by_hint(all_sites, site_name)
- if not sites:
- return ActionResult.isFailure(error=f"No SharePoint site found matching '{site_name}'")
- logger.info(f"Filtered to site(s) matching '{site_name}': {[s['displayName'] for s in sites]}")
- else:
- sites = all_sites
+ # If pathQuery starts with Microsoft-standard /sites/, extract site name and filter
+ if pathQuery.startswith('/sites/'):
+ parsedPath = self._extractSiteFromStandardPath(pathQuery)
+ if parsedPath:
+ siteName = parsedPath.get("siteName")
+ # Filter sites by name (case-insensitive substring match)
+ sites = self._filterSitesByHint(allSites, siteName)
+ if not sites:
+ return ActionResult.isFailure(error=f"No SharePoint site found matching '{siteName}'")
+ logger.info(f"Filtered to site(s) matching '{siteName}': {[s['displayName'] for s in sites]}")
+ else:
+ sites = allSites
+ else:
+ sites = allSites
else:
# Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK
return ActionResult.isFailure(error="No valid list path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
@@ -1696,232 +2141,231 @@ class MethodSharepoint(MethodBase):
if not sites:
return ActionResult.isFailure(error="No valid target site determined for list operation")
- # Check if list_query is a folder ID (starts with 01PPXICCB...)
- if list_query.startswith('01PPXICCB') or list_query.startswith('01'):
+ # Check if listQuery is a folder ID (starts with 01PPXICCB...)
+ if listQuery.startswith('01PPXICCB') or listQuery.startswith('01'):
# Direct folder ID - use it directly
- folder_paths = [list_query]
- logger.info(f"Using direct folder ID: {list_query}")
+ folderPaths = [listQuery]
+ logger.info(f"Using direct folder ID: {listQuery}")
else:
- # Remove /site:SiteName prefix from pathQuery before resolving (it's only for site filtering)
+ # Remove site prefix from pathQuery before resolving (it's only for site filtering)
pathQueryForResolve = pathQuery
- if pathQuery.startswith('/site:'):
- # Remove /site:SiteName/ and keep the rest
- site_path_part = pathQuery[6:] # Remove '/site:'
- if '/' in site_path_part:
- # Remove the site name part, keep the folder path
- pathQueryForResolve = '/' + site_path_part.split('/', 1)[1]
+ # Microsoft-standard path: /sites/SiteName/Path -> /Path
+ if pathQuery.startswith('/sites/'):
+ parsedPath = self._extractSiteFromStandardPath(pathQuery)
+ if parsedPath:
+ innerPath = parsedPath.get("innerPath", "")
+ pathQueryForResolve = '/' + innerPath if innerPath else '/'
else:
- # Only site name, no path - use root
pathQueryForResolve = '/'
# Remove first path segment if it looks like a document library name
# In SharePoint Graph API, /drive/root already points to the default document library,
# so library names in paths should be removed
# Generic approach: if path has multiple segments, store original for fallback
- path_segments = [s for s in pathQueryForResolve.split('/') if s.strip()]
- if len(path_segments) > 1:
+ pathSegments = [s for s in pathQueryForResolve.split('/') if s.strip()]
+ if len(pathSegments) > 1:
# Path has multiple segments - first might be a library name
# Store original for potential fallback
- original_path = pathQueryForResolve
+ originalPath = pathQueryForResolve
# Try without first segment (assuming it's a library name)
- pathQueryForResolve = '/' + '/'.join(path_segments[1:])
- logger.info(f"Removed first path segment (potential library name), path changed from '{original_path}' to '{pathQueryForResolve}'")
- elif len(path_segments) == 1:
+ pathQueryForResolve = '/' + '/'.join(pathSegments[1:])
+ logger.info(f"Removed first path segment (potential library name), path changed from '{originalPath}' to '{pathQueryForResolve}'")
+ elif len(pathSegments) == 1:
# Only one segment - if it's a common library-like name, use root
- first_segment_lower = path_segments[0].lower()
- library_indicators = ['document', 'dokument', 'shared', 'freigegeben', 'library', 'bibliothek']
- if any(indicator in first_segment_lower for indicator in library_indicators):
+ firstSegmentLower = pathSegments[0].lower()
+ libraryIndicators = ['document', 'dokument', 'shared', 'freigegeben', 'library', 'bibliothek']
+ if any(indicator in firstSegmentLower for indicator in libraryIndicators):
pathQueryForResolve = '/'
- logger.info(f"First segment '{path_segments[0]}' appears to be a library name, using root")
+ logger.info(f"First segment '{pathSegments[0]}' appears to be a library name, using root")
# Resolve path query into folder paths
- folder_paths = self._resolvePathQuery(pathQueryForResolve)
- logger.info(f"Resolved folder paths: {folder_paths}")
+ folderPaths = self._resolvePathQuery(pathQueryForResolve)
+ logger.info(f"Resolved folder paths: {folderPaths}")
# Process each folder path across all sites
- list_results = []
+ listResults = []
- for folderPath in folder_paths:
+ for folderPath in folderPaths:
try:
- folder_results = []
+ folderResults = []
for site in sites:
- site_id = site["id"]
- site_name = site["displayName"]
- site_url = site["webUrl"]
+ siteId = site["id"]
+ siteName = site["displayName"]
+ siteUrl = site["webUrl"]
- logger.info(f"Listing folder {folderPath} in site: {site_name}")
+ logger.info(f"Listing folder {folderPath} in site: {siteName}")
# Determine the endpoint based on folder path
if folderPath in ["/", ""] or folderPath == "*":
# Root folder
- endpoint = f"sites/{site_id}/drive/root/children"
+ endpoint = f"sites/{siteId}/drive/root/children"
elif folderPath.startswith('01PPXICCB') or folderPath.startswith('01'):
# Direct folder ID
- endpoint = f"sites/{site_id}/drive/items/{folderPath}/children"
+ endpoint = f"sites/{siteId}/drive/items/{folderPath}/children"
else:
# Specific folder path - remove leading slash if present and URL encode
- folder_path_clean = folderPath.lstrip('/')
+ folderPathClean = folderPath.lstrip('/')
# URL encode the path for Graph API (spaces and special characters need encoding)
- folder_path_encoded = urllib.parse.quote(folder_path_clean, safe='/')
- endpoint = f"sites/{site_id}/drive/root:/{folder_path_encoded}:/children"
+ folderPathEncoded = urllib.parse.quote(folderPathClean, safe='/')
+ endpoint = f"sites/{siteId}/drive/root:/{folderPathEncoded}:/children"
# Make the API call to list folder contents
- api_result = await self._makeGraphApiCall(endpoint)
+ apiResult = await self._makeGraphApiCall(endpoint)
- if "error" in api_result:
- logger.warning(f"Failed to list folder {folderPath} in site {site_name}: {api_result['error']}")
+ if "error" in apiResult:
+ logger.warning(f"Failed to list folder {folderPath} in site {siteName}: {apiResult['error']}")
continue
# Process the results
- items = api_result.get("value", [])
- processed_items = []
+ items = apiResult.get("value", [])
+ processedItems = []
for item in items:
# Use improved folder detection logic
- is_folder = False
+ isFolder = False
if 'folder' in item:
- is_folder = True
+ isFolder = True
else:
# Try to detect by URL pattern or other indicators
- web_url = item.get('webUrl', '')
+ webUrl = item.get('webUrl', '')
name = item.get('name', '')
# Check if URL has no file extension and looks like a folder path
- if '.' not in name and ('/' in web_url or '\\' in web_url):
- is_folder = True
+ if '.' not in name and ('/' in webUrl or '\\' in webUrl):
+ isFolder = True
- item_info = {
+ itemInfo = {
"id": item.get("id"),
"name": item.get("name"),
"size": item.get("size", 0),
"createdDateTime": item.get("createdDateTime"),
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
"webUrl": item.get("webUrl"),
- "type": "folder" if is_folder else "file",
- "siteName": site_name,
- "siteUrl": site_url
+ "type": "folder" if isFolder else "file",
+ "siteName": siteName,
+ "siteUrl": siteUrl
}
# Add file-specific information
if "file" in item:
- item_info.update({
+ itemInfo.update({
"mimeType": item["file"].get("mimeType"),
"downloadUrl": item.get("@microsoft.graph.downloadUrl")
})
# Add folder-specific information
if "folder" in item:
- item_info.update({
+ itemInfo.update({
"childCount": item["folder"].get("childCount", 0)
})
- processed_items.append(item_info)
+ processedItems.append(itemInfo)
# If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only)
if includeSubfolders:
- folder_items = [item for item in processed_items if item['type'] == 'folder']
- logger.info(f"Including subfolders - processing {len(folder_items)} folders")
- subfolder_count = 0
- max_subfolders = 10 # Limit to prevent infinite loops
+ folderItems = [item for item in processedItems if item['type'] == 'folder']
+ logger.info(f"Including subfolders - processing {len(folderItems)} folders")
+ subfolderCount = 0
+ maxSubfolders = 10 # Limit to prevent infinite loops
- for item in processed_items[:]: # Use slice to avoid modifying list during iteration
- if item["type"] == "folder" and subfolder_count < max_subfolders:
- subfolder_count += 1
- subfolder_path = f"{folderPath.rstrip('/')}/{item['name']}"
- subfolder_endpoint = f"sites/{site_id}/drive/items/{item['id']}/children"
+ for item in processedItems[:]: # Use slice to avoid modifying list during iteration
+ if item["type"] == "folder" and subfolderCount < maxSubfolders:
+ subfolderCount += 1
+ subfolderPath = f"{folderPath.rstrip('/')}/{item['name']}"
+ subfolderEndpoint = f"sites/{siteId}/drive/items/{item['id']}/children"
logger.debug(f"Getting contents of subfolder: {item['name']}")
- subfolder_result = await self._makeGraphApiCall(subfolder_endpoint)
- if "error" not in subfolder_result:
- subfolder_items = subfolder_result.get("value", [])
- logger.debug(f"Found {len(subfolder_items)} items in subfolder {item['name']}")
+ subfolderResult = await self._makeGraphApiCall(subfolderEndpoint)
+ if "error" not in subfolderResult:
+ subfolderItems = subfolderResult.get("value", [])
+ logger.debug(f"Found {len(subfolderItems)} items in subfolder {item['name']}")
- for subfolder_item in subfolder_items:
+ for subfolderItem in subfolderItems:
# Use improved folder detection logic for subfolder items
- subfolder_is_folder = False
- if 'folder' in subfolder_item:
- subfolder_is_folder = True
+ subfolderIsFolder = False
+ if 'folder' in subfolderItem:
+ subfolderIsFolder = True
else:
# Try to detect by URL pattern or other indicators
- subfolder_web_url = subfolder_item.get('webUrl', '')
- subfolder_name = subfolder_item.get('name', '')
+ subfolderWebUrl = subfolderItem.get('webUrl', '')
+ subfolderName = subfolderItem.get('name', '')
# Check if URL has no file extension and looks like a folder path
- if '.' not in subfolder_name and ('/' in subfolder_web_url or '\\' in subfolder_web_url):
- subfolder_is_folder = True
+ if '.' not in subfolderName and ('/' in subfolderWebUrl or '\\' in subfolderWebUrl):
+ subfolderIsFolder = True
# Only add files and direct subfolders, NO RECURSION
- subfolder_item_info = {
- "id": subfolder_item.get("id"),
- "name": subfolder_item.get("name"),
- "size": subfolder_item.get("size", 0),
- "createdDateTime": subfolder_item.get("createdDateTime"),
- "lastModifiedDateTime": subfolder_item.get("lastModifiedDateTime"),
- "webUrl": subfolder_item.get("webUrl"),
- "type": "folder" if subfolder_is_folder else "file",
- "parentPath": subfolder_path,
- "siteName": site_name,
- "siteUrl": site_url
+ subfolderItemInfo = {
+ "id": subfolderItem.get("id"),
+ "name": subfolderItem.get("name"),
+ "size": subfolderItem.get("size", 0),
+ "createdDateTime": subfolderItem.get("createdDateTime"),
+ "lastModifiedDateTime": subfolderItem.get("lastModifiedDateTime"),
+ "webUrl": subfolderItem.get("webUrl"),
+ "type": "folder" if subfolderIsFolder else "file",
+ "parentPath": subfolderPath,
+ "siteName": siteName,
+ "siteUrl": siteUrl
}
- if "file" in subfolder_item:
- subfolder_item_info.update({
- "mimeType": subfolder_item["file"].get("mimeType"),
- "downloadUrl": subfolder_item.get("@microsoft.graph.downloadUrl")
+ if "file" in subfolderItem:
+ subfolderItemInfo.update({
+ "mimeType": subfolderItem["file"].get("mimeType"),
+ "downloadUrl": subfolderItem.get("@microsoft.graph.downloadUrl")
})
- processed_items.append(subfolder_item_info)
+ processedItems.append(subfolderItemInfo)
else:
- logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolder_result.get('error')}")
- elif subfolder_count >= max_subfolders:
- logger.warning(f"Reached maximum subfolder limit ({max_subfolders}), skipping remaining folders")
+ logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolderResult.get('error')}")
+ elif subfolderCount >= maxSubfolders:
+ logger.warning(f"Reached maximum subfolder limit ({maxSubfolders}), skipping remaining folders")
break
- logger.info(f"Processed {subfolder_count} subfolders, total items: {len(processed_items)}")
+ logger.info(f"Processed {subfolderCount} subfolders, total items: {len(processedItems)}")
- folder_results.append({
- "siteName": site_name,
- "siteUrl": site_url,
- "itemCount": len(processed_items),
- "items": processed_items
+ folderResults.append({
+ "siteName": siteName,
+ "siteUrl": siteUrl,
+ "itemCount": len(processedItems),
+ "items": processedItems
})
- list_results.append({
+ listResults.append({
"folderPath": folderPath,
- "sitesProcessed": len(folder_results),
- "siteResults": folder_results
+ "sitesProcessed": len(folderResults),
+ "siteResults": folderResults
})
except Exception as e:
logger.error(f"Error listing folder {folderPath}: {str(e)}")
- list_results.append({
+ listResults.append({
"folderPath": folderPath,
"error": str(e),
"siteResults": []
})
# Create result data
- result_data = {
- "pathQuery": list_query,
+ resultData = {
+ "pathQuery": listQuery,
"includeSubfolders": includeSubfolders,
"sitesSearched": len(sites),
- "listResults": list_results,
+ "listResults": listResults,
"timestamp": self.services.utils.timestampGetUtc()
}
# Use default JSON format for output
- output_extension = ".json" # Default
- output_mime_type = "application/json" # Default
+ outputExtension = ".json" # Default
+ outputMimeType = "application/json" # Default
return ActionResult(
success=True,
documents=[
ActionDocument(
- documentName=f"sharepoint_document_list_{self._format_timestamp_for_filename()}{output_extension}",
- documentData=json.dumps(result_data, indent=2),
- mimeType=output_mime_type
+ documentName=f"sharepoint_document_list_{self._format_timestamp_for_filename()}{outputExtension}",
+ documentData=json.dumps(resultData, indent=2),
+ mimeType=outputMimeType
)
]
)
diff --git a/modules/workflows/processing/core/taskPlanner.py b/modules/workflows/processing/core/taskPlanner.py
index 6a73d971..ec6d3bb7 100644
--- a/modules/workflows/processing/core/taskPlanner.py
+++ b/modules/workflows/processing/core/taskPlanner.py
@@ -37,16 +37,27 @@ class TaskPlanner:
# Check workflow status before calling AI service
checkWorkflowStopped(self.services)
-
+
# Analyze user intent to obtain cleaned user objective for planning
- # This intent will be reused for workflow-level validation in executeTask
- from modules.workflows.processing.adaptive import IntentAnalyzer
- intentAnalyzer = IntentAnalyzer(self.services)
- workflowIntent = await intentAnalyzer.analyzeUserIntent(actualUserPrompt, None)
- # Store workflow intent for reuse in executeTask (avoid redundant analysis)
- if not hasattr(workflow, '_workflowIntent'):
- workflow._workflowIntent = workflowIntent
- cleanedObjective = workflowIntent.get('primaryGoal', actualUserPrompt) if isinstance(workflowIntent, dict) else actualUserPrompt
+ # SKIP intent analysis for AUTOMATION mode - it uses predefined JSON plans
+ from modules.datamodels.datamodelChat import WorkflowModeEnum
+ workflowMode = getattr(workflow, 'workflowMode', None)
+ skipIntentionAnalysis = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION)
+
+ if skipIntentionAnalysis:
+ logger.info("Skipping intent analysis for AUTOMATION mode - using direct user input")
+ # For automation mode, use user input directly without intent analysis
+ cleanedObjective = actualUserPrompt
+ workflowIntent = None
+ else:
+ # This intent will be reused for workflow-level validation in executeTask
+ from modules.workflows.processing.adaptive import IntentAnalyzer
+ intentAnalyzer = IntentAnalyzer(self.services)
+ workflowIntent = await intentAnalyzer.analyzeUserIntent(actualUserPrompt, None)
+ # Store workflow intent for reuse in executeTask (avoid redundant analysis)
+ if not hasattr(workflow, '_workflowIntent'):
+ workflow._workflowIntent = workflowIntent
+ cleanedObjective = workflowIntent.get('primaryGoal', actualUserPrompt) if isinstance(workflowIntent, dict) else actualUserPrompt
# Create proper context object for task planning using cleaned intent
# For task planning, we need to create a minimal TaskStep since TaskContext requires it
diff --git a/modules/workflows/processing/modes/modeAutomation.py b/modules/workflows/processing/modes/modeAutomation.py
index a4609e15..96e379db 100644
--- a/modules/workflows/processing/modes/modeAutomation.py
+++ b/modules/workflows/processing/modes/modeAutomation.py
@@ -26,51 +26,39 @@ class AutomationMode(BaseMode):
async def generateTaskPlan(self, userInput: str, workflow: ChatWorkflow) -> TaskPlan:
"""
- Generate task plan from stored template plan (no AI planning needed).
- The plan is stored in module-level cache by executeAutomation.
+ Generate task plan from JSON plan in userInput (no AI planning needed).
+ AUTOMATION mode ALWAYS requires a JSON plan to be provided in the user input.
+ The plan can be:
+ - Embedded between and
+ - Or as direct JSON in userInput
"""
try:
- # Get plan from module-level cache (stored by executeAutomation)
+ # AUTOMATION mode ALWAYS requires a JSON plan to be provided in userInput
+ # Try to extract plan from userInput (embedded JSON or direct JSON)
templatePlan = None
- if hasattr(self, '_templatePlanCache') and self._templatePlanCache:
- templatePlan = self._templatePlanCache.get(workflow.id)
-
- # Try module-level cache
- if not templatePlan:
- try:
- from modules.workflows.processing.modes import modeAutomation
- if hasattr(modeAutomation, '_templatePlanCache'):
- templatePlan = modeAutomation._templatePlanCache.get(workflow.id)
- if templatePlan:
- logger.info(f"Retrieved template plan from module cache for workflow {workflow.id}")
- except Exception as e:
- logger.warning(f"Could not access module cache: {str(e)}")
-
- if not templatePlan:
- # Fallback: Extract from prompt (embedded as JSON comment)
- try:
- # Look for embedded plan in prompt (between and )
- startMarker = ""
- endMarker = ""
- startIdx = userInput.find(startMarker)
- endIdx = userInput.find(endMarker)
-
- if startIdx >= 0 and endIdx > startIdx:
- planJson = userInput[startIdx + len(startMarker):endIdx].strip()
- templatePlan = json.loads(planJson)
- logger.info("Extracted template plan from embedded JSON in prompt")
- elif '{' in userInput and '"tasks"' in userInput:
- # Try parsing entire userInput as JSON (fallback)
- jsonStart = userInput.find('{')
- jsonEnd = userInput.rfind('}') + 1
- if jsonStart >= 0 and jsonEnd > jsonStart:
- templatePlan = json.loads(userInput[jsonStart:jsonEnd])
- logger.info("Parsed template plan from userInput JSON (fallback)")
- else:
- raise ValueError("No template plan found in cache or prompt")
- except (json.JSONDecodeError, ValueError) as e:
- logger.error(f"Could not parse template plan: {str(e)}")
- raise ValueError(f"Template mode requires a predefined plan, but none was found: {str(e)}")
+ try:
+ # Look for embedded plan in prompt (between and )
+ startMarker = ""
+ endMarker = ""
+ startIdx = userInput.find(startMarker)
+ endIdx = userInput.find(endMarker)
+
+ if startIdx >= 0 and endIdx > startIdx:
+ planJson = userInput[startIdx + len(startMarker):endIdx].strip()
+ templatePlan = json.loads(planJson)
+ logger.info("Extracted template plan from embedded JSON in prompt")
+ elif '{' in userInput and '"tasks"' in userInput:
+ # Try parsing entire userInput as JSON (fallback)
+ jsonStart = userInput.find('{')
+ jsonEnd = userInput.rfind('}') + 1
+ if jsonStart >= 0 and jsonEnd > jsonStart:
+ templatePlan = json.loads(userInput[jsonStart:jsonEnd])
+ logger.info("Parsed template plan from userInput JSON (fallback)")
+ else:
+ raise ValueError("No template plan found in userInput. AUTOMATION mode requires a JSON plan to be provided in the user input.")
+ except (json.JSONDecodeError, ValueError) as e:
+ logger.error(f"Could not parse template plan: {str(e)}")
+ raise ValueError(f"AUTOMATION mode requires a predefined JSON plan with 'tasks' array, but none was found. Please provide the plan in the user input (embedded between and or as direct JSON). Error: {str(e)}")
logger.info(f"Using template plan with {len(templatePlan.get('tasks', []))} tasks")
@@ -109,15 +97,6 @@ class AutomationMode(BaseMode):
logger.info(f"Generated task plan from template with {len(tasks)} tasks")
- # Clean up cache after retrieving plan (prevent memory leaks)
- try:
- from modules.workflows.processing.modes import modeAutomation
- if hasattr(modeAutomation, '_templatePlanCache') and workflow.id in modeAutomation._templatePlanCache:
- del modeAutomation._templatePlanCache[workflow.id]
- logger.debug(f"Cleaned up template plan cache for workflow {workflow.id}")
- except Exception as e:
- logger.warning(f"Could not clean up template plan cache: {str(e)}")
-
return taskPlan
except Exception as e:
diff --git a/modules/workflows/workflowManager.py b/modules/workflows/workflowManager.py
index 363a42e6..6751bde6 100644
--- a/modules/workflows/workflowManager.py
+++ b/modules/workflows/workflowManager.py
@@ -204,118 +204,129 @@ class WorkflowManager:
}
# Analyze the user's input to detect language, normalize request, extract intent, and offload bulky context into documents
+ # SKIP user intention analysis for AUTOMATION mode - it uses predefined JSON plans
createdDocs = []
+ workflowMode = getattr(workflow, 'workflowMode', None)
+ skipIntentionAnalysis = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION)
- try:
- analyzerPrompt = (
- "You are an input analyzer. From the user's message, perform ALL of the following in one pass:\n"
- "1) detectedLanguage: detect ISO 639-1 language code (e.g., de, en).\n"
- "2) normalizedRequest: full, explicit restatement of the user's request in the detected language; do NOT summarize; preserve ALL constraints and details.\n"
- "3) intent: concise single-paragraph core request in the detected language for high-level routing.\n"
- "4) contextItems: supportive data blocks to attach as separate documents if significantly larger than the intent (large literal content, long lists/tables, code/JSON blocks, transcripts, CSV fragments, detailed specs). Keep URLs in the intent unless they embed large pasted content.\n\n"
- "Rules:\n"
- "- If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained.\n"
- "- If content exceeds that threshold, move bulky parts into contextItems; keep intent short and clear.\n"
- "- Preserve critical references (URLs, filenames) in intent.\n"
- "- Normalize to the primary detected language if mixed-language.\n\n"
- "Return ONLY JSON (no markdown) with this shape:\n"
- "{\n"
- " \"detectedLanguage\": \"de|en|fr|it|...\",\n"
- " \"normalizedRequest\": \"Full explicit instruction in detected language\",\n"
- " \"intent\": \"Concise normalized request...\",\n"
- " \"contextItems\": [\n"
- " {\n"
- " \"title\": \"User context 1\",\n"
- " \"mimeType\": \"text/plain\",\n"
- " \"content\": \"Full extracted content block here\"\n"
- " }\n"
- " ]\n"
- "}\n\n"
- f"User message:\n{self.services.utils.sanitizePromptContent(userInput.prompt, 'userinput')}"
- )
-
- # Call AI analyzer (planning call - will use static parameters)
- aiResponse = await self.services.ai.callAiPlanning(
- prompt=analyzerPrompt,
- placeholders=None,
- debugType="userintention"
- )
-
+ if skipIntentionAnalysis:
+ logger.info("Skipping user intention analysis for AUTOMATION mode - using direct user input")
+ # For automation mode, use user input directly without AI analysis
+ self.services.currentUserPrompt = userInput.prompt
detectedLanguage = None
normalizedRequest = None
intentText = userInput.prompt
contextItems = []
-
- # Parse analyzer response (JSON expected)
+ else:
try:
- jsonStart = aiResponse.find('{') if aiResponse else -1
- jsonEnd = aiResponse.rfind('}') + 1 if aiResponse else 0
- if jsonStart != -1 and jsonEnd > jsonStart:
- parsed = json.loads(aiResponse[jsonStart:jsonEnd])
- detectedLanguage = parsed.get('detectedLanguage') or None
- normalizedRequest = parsed.get('normalizedRequest') or None
- if parsed.get('intent'):
- intentText = parsed.get('intent')
- contextItems = parsed.get('contextItems') or []
- except Exception:
+ analyzerPrompt = (
+ "You are an input analyzer. From the user's message, perform ALL of the following in one pass:\n"
+ "1) detectedLanguage: detect ISO 639-1 language code (e.g., de, en).\n"
+ "2) normalizedRequest: full, explicit restatement of the user's request in the detected language; do NOT summarize; preserve ALL constraints and details.\n"
+ "3) intent: concise single-paragraph core request in the detected language for high-level routing.\n"
+ "4) contextItems: supportive data blocks to attach as separate documents if significantly larger than the intent (large literal content, long lists/tables, code/JSON blocks, transcripts, CSV fragments, detailed specs). Keep URLs in the intent unless they embed large pasted content.\n\n"
+ "Rules:\n"
+ "- If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained.\n"
+ "- If content exceeds that threshold, move bulky parts into contextItems; keep intent short and clear.\n"
+ "- Preserve critical references (URLs, filenames) in intent.\n"
+ "- Normalize to the primary detected language if mixed-language.\n\n"
+ "Return ONLY JSON (no markdown) with this shape:\n"
+ "{\n"
+ " \"detectedLanguage\": \"de|en|fr|it|...\",\n"
+ " \"normalizedRequest\": \"Full explicit instruction in detected language\",\n"
+ " \"intent\": \"Concise normalized request...\",\n"
+ " \"contextItems\": [\n"
+ " {\n"
+ " \"title\": \"User context 1\",\n"
+ " \"mimeType\": \"text/plain\",\n"
+ " \"content\": \"Full extracted content block here\"\n"
+ " }\n"
+ " ]\n"
+ "}\n\n"
+ f"User message:\n{self.services.utils.sanitizePromptContent(userInput.prompt, 'userinput')}"
+ )
+
+ # Call AI analyzer (planning call - will use static parameters)
+ aiResponse = await self.services.ai.callAiPlanning(
+ prompt=analyzerPrompt,
+ placeholders=None,
+ debugType="userintention"
+ )
+
+ detectedLanguage = None
+ normalizedRequest = None
+ intentText = userInput.prompt
contextItems = []
- # Update services state
- if detectedLanguage and isinstance(detectedLanguage, str):
- self._setUserLanguage(detectedLanguage)
+ # Parse analyzer response (JSON expected)
try:
- setattr(self.services, 'currentUserLanguage', detectedLanguage)
+ jsonStart = aiResponse.find('{') if aiResponse else -1
+ jsonEnd = aiResponse.rfind('}') + 1 if aiResponse else 0
+ if jsonStart != -1 and jsonEnd > jsonStart:
+ parsed = json.loads(aiResponse[jsonStart:jsonEnd])
+ detectedLanguage = parsed.get('detectedLanguage') or None
+ normalizedRequest = parsed.get('normalizedRequest') or None
+ if parsed.get('intent'):
+ intentText = parsed.get('intent')
+ contextItems = parsed.get('contextItems') or []
+ except Exception:
+ contextItems = []
+
+ # Update services state
+ if detectedLanguage and isinstance(detectedLanguage, str):
+ self._setUserLanguage(detectedLanguage)
+ try:
+ setattr(self.services, 'currentUserLanguage', detectedLanguage)
+ except Exception:
+ pass
+ self.services.currentUserPrompt = intentText or userInput.prompt
+ try:
+ if normalizedRequest:
+ setattr(self.services, 'currentUserPromptNormalized', normalizedRequest)
+ if contextItems is not None:
+ setattr(self.services, 'currentUserContextItems', contextItems)
except Exception:
pass
- self.services.currentUserPrompt = intentText or userInput.prompt
- try:
- if normalizedRequest:
- setattr(self.services, 'currentUserPromptNormalized', normalizedRequest)
- if contextItems is not None:
- setattr(self.services, 'currentUserContextItems', contextItems)
- except Exception:
- pass
+ # Create documents for context items
+ if contextItems and isinstance(contextItems, list):
+ for idx, item in enumerate(contextItems):
+ try:
+ title = item.get('title') if isinstance(item, dict) else None
+ mime = item.get('mimeType') if isinstance(item, dict) else None
+ content = item.get('content') if isinstance(item, dict) else None
+ if not content:
+ continue
+ fileName = (title or f"user_context_{idx+1}.txt").strip()
+ mimeType = (mime or "text/plain").strip()
- # Create documents for context items
- if contextItems and isinstance(contextItems, list):
- for idx, item in enumerate(contextItems):
- try:
- title = item.get('title') if isinstance(item, dict) else None
- mime = item.get('mimeType') if isinstance(item, dict) else None
- content = item.get('content') if isinstance(item, dict) else None
- if not content:
+ # Neutralize content before storing if neutralization is enabled
+ contentBytes = content.encode('utf-8')
+ contentBytes = await self._neutralizeContentIfEnabled(contentBytes, mimeType)
+
+ # Create file in component storage
+ fileItem = self.services.interfaceDbComponent.createFile(
+ name=fileName,
+ mimeType=mimeType,
+ content=contentBytes
+ )
+ # Persist file data
+ self.services.interfaceDbComponent.createFileData(fileItem.id, contentBytes)
+
+ # Collect file info
+ fileInfo = self.services.chat.getFileInfo(fileItem.id)
+ from modules.datamodels.datamodelChat import ChatDocument
+ doc = ChatDocument(
+ fileId=fileItem.id,
+ fileName=fileInfo.get("fileName", fileName) if fileInfo else fileName,
+ fileSize=fileInfo.get("size", len(contentBytes)) if fileInfo else len(contentBytes),
+ mimeType=fileInfo.get("mimeType", mimeType) if fileInfo else mimeType
+ )
+ createdDocs.append(doc)
+ except Exception:
continue
- fileName = (title or f"user_context_{idx+1}.txt").strip()
- mimeType = (mime or "text/plain").strip()
-
- # Neutralize content before storing if neutralization is enabled
- contentBytes = content.encode('utf-8')
- contentBytes = await self._neutralizeContentIfEnabled(contentBytes, mimeType)
-
- # Create file in component storage
- fileItem = self.services.interfaceDbComponent.createFile(
- name=fileName,
- mimeType=mimeType,
- content=contentBytes
- )
- # Persist file data
- self.services.interfaceDbComponent.createFileData(fileItem.id, contentBytes)
-
- # Collect file info
- fileInfo = self.services.chat.getFileInfo(fileItem.id)
- from modules.datamodels.datamodelChat import ChatDocument
- doc = ChatDocument(
- fileId=fileItem.id,
- fileName=fileInfo.get("fileName", fileName) if fileInfo else fileName,
- fileSize=fileInfo.get("size", len(contentBytes)) if fileInfo else len(contentBytes),
- mimeType=fileInfo.get("mimeType", mimeType) if fileInfo else mimeType
- )
- createdDocs.append(doc)
- except Exception:
- continue
- except Exception as e:
- logger.warning(f"Prompt analysis failed or skipped: {str(e)}")
+ except Exception as e:
+ logger.warning(f"Prompt analysis failed or skipped: {str(e)}")
# Process user-uploaded documents (fileIds) and combine with context documents
if userInput.listFileId: