From a1ebcac5887464c19b830355c43048b47a534c34 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Thu, 4 Sep 2025 23:40:07 +0200
Subject: [PATCH] Sharepoint integration completed
---
modules/chat/handling/handlingTasks.py | 2 +-
modules/chat/handling/promptFactory.py | 93 ++-
modules/interfaces/interfaceChatModel.py | 36 +
modules/methods/methodDocument.py | 35 +-
modules/methods/methodOutlook.py | 5 +-
modules/methods/methodSharepoint.py | 873 ++++++++++++++---------
test_graph_search.py | 311 ++++++++
7 files changed, 992 insertions(+), 363 deletions(-)
create mode 100644 test_graph_search.py
diff --git a/modules/chat/handling/handlingTasks.py b/modules/chat/handling/handlingTasks.py
index 88465b0e..feb7b335 100644
--- a/modules/chat/handling/handlingTasks.py
+++ b/modules/chat/handling/handlingTasks.py
@@ -792,7 +792,7 @@ class HandlingTasks:
# Add specific error details if available
if retry_context and hasattr(retry_context, 'previous_review_result') and retry_context.previous_review_result:
- reason = retry_context.previous_review_result.get('reason', '')
+ reason = retry_context.previous_review_result.reason or ''
if reason and reason != "Task failed after all retries.":
error_message += f"{reason}\n\n"
diff --git a/modules/chat/handling/promptFactory.py b/modules/chat/handling/promptFactory.py
index 640aebba..ada386ba 100644
--- a/modules/chat/handling/promptFactory.py
+++ b/modules/chat/handling/promptFactory.py
@@ -228,20 +228,81 @@ async def createActionDefinitionPrompt(context: TaskContext, service) -> str:
connRefs = service.getConnectionReferenceList()
- # Debug logging for connections
- logging.debug(f"Connection references retrieved: {connRefs}")
- logging.debug(f"Connection references type: {type(connRefs)}")
- logging.debug(f"Connection references length: {len(connRefs) if connRefs else 0}")
-
- # Log document availability for debugging
- logging.debug(f"Enhanced document context length: {len(available_documents_str)}")
-
- available_methods_str = ''
+ # Create a structured JSON format for better AI parsing
+ # This replaces the old hard-to-read format with a clean JSON structure
+ # that the AI can easily parse and understand
+ available_methods_json = {}
for method, actions in method_actions.items():
- available_methods_str += f"- {method}:\n"
+ available_methods_json[method] = {}
+ # Get the method instance for accessing docstrings
+ method_instance = service.methods.get(method, {}).get('instance') if hasattr(service, 'methods') else None
+
for action, sig in actions:
- available_methods_str += f" - {action}: {sig}\n"
-
+ # Parse the signature to extract parameters
+ if '(' in sig and ')' in sig:
+ # Extract parameters from signature
+ params_start = sig.find('(')
+ params_end = sig.find(')')
+ params_str = sig[params_start+1:params_end]
+
+ # Parse parameters directly from the docstring - much simpler and more reliable!
+ parameters = []
+
+ # Get the actual function's docstring
+ if method_instance and hasattr(method_instance, action):
+ func = getattr(method_instance, action)
+ if hasattr(func, '__doc__') and func.__doc__:
+ docstring = func.__doc__
+
+ # Parse Parameters section from docstring
+ lines = docstring.split('\n')
+ in_parameters = False
+ for i, line in enumerate(lines):
+ original_line = line
+ line = line.strip()
+
+ if line == 'Parameters:':
+ in_parameters = True
+ continue
+ elif in_parameters and line and not original_line.startswith(' ') and not original_line.startswith('\t'):
+ # End of parameters section
+ break
+ elif in_parameters and (original_line.startswith(' ') or original_line.startswith('\t')):
+ # This is a parameter line - already stripped
+ # Format: "paramName (type): description"
+ if ':' in line:
+ # Find the colon that separates param from description
+ colon_pos = line.find(':')
+ param_part = line[:colon_pos].strip()
+ description = line[colon_pos+1:].strip()
+
+ # Parse parameter name and type
+ if '(' in param_part and ')' in param_part:
+ param_name = param_part.split('(')[0].strip()
+ type_part = param_part[param_part.find('(')+1:param_part.find(')')].strip()
+
+ # Check if optional
+ is_optional = 'optional' in type_part
+ param_type = type_part.replace('optional', '').strip().rstrip(',').strip()
+
+ parameters.append({
+ "name": param_name,
+ "type": param_type,
+ "description": description,
+ "required": not is_optional
+ })
+
+ available_methods_json[method][action] = {
+ "signature": sig,
+ "parameters": parameters,
+ "description": f"{method}.{action} action"
+ }
+
+ # Convert to a compact, AI-friendly format
+ available_methods_str = f"""
+AVAILABLE ACTIONS (JSON format for better AI parsing):
+{json.dumps(available_methods_json, indent=1, separators=(',', ':'))}
+"""
retry_context = ""
if context.retry_count and context.retry_count > 0:
retry_context = f"""
@@ -264,10 +325,10 @@ Previous action results that failed or were incomplete:
if context.previous_review_result:
retry_context += f"""
Previous review feedback:
-- Status: {context.previous_review_result.get('status', 'unknown') or 'unknown'}
-- Reason: {context.previous_review_result.get('reason', 'No reason provided') or 'No reason provided'}
-- Quality Score: {context.previous_review_result.get('quality_score', 0) or 0}/10
-- Unmet Criteria: {', '.join(context.previous_review_result.get('unmet_criteria', []) or [])}
+- Status: {context.previous_review_result.status or 'unknown'}
+- Reason: {context.previous_review_result.reason or 'No reason provided'}
+- Quality Score: {context.previous_review_result.quality_score or 0}/10
+- Unmet Criteria: {', '.join(context.previous_review_result.unmet_criteria or [])}
"""
# Use Pydantic model directly - no need for getattr
diff --git a/modules/interfaces/interfaceChatModel.py b/modules/interfaces/interfaceChatModel.py
index 769e94c9..629b59ee 100644
--- a/modules/interfaces/interfaceChatModel.py
+++ b/modules/interfaces/interfaceChatModel.py
@@ -845,4 +845,40 @@ register_model_labels(
}
)
+# ===== Centralized AI Call Response Models =====
+
+class AiResult(BaseModel, ModelMixin):
+ """Document result from centralized AI call"""
+ filename: str = Field(description="Name of the result document")
+ mimetype: str = Field(description="MIME type of the result document")
+ content: str = Field(description="Content of the result document")
+
+# Register labels for AiResult
+register_model_labels(
+ "AiResult",
+ {"en": "Result Document", "fr": "Document de résultat"},
+ {
+ "filename": {"en": "Filename", "fr": "Nom de fichier"},
+ "mimetype": {"en": "MIME Type", "fr": "Type MIME"},
+ "content": {"en": "Content", "fr": "Contenu"}
+ }
+)
+
+class CentralizedAiResponse(BaseModel, ModelMixin):
+ """Standardized response format from centralized AI calls"""
+ aiResults: List[AiResult] = Field(default_factory=list, description="List of result documents")
+ success: bool = Field(description="Whether the AI call was successful")
+ error: Optional[str] = Field(None, description="Error message if the call failed")
+
+# Register labels for CentralizedAiResponse
+register_model_labels(
+ "CentralizedAiResponse",
+ {"en": "Centralized AI Response", "fr": "Réponse IA centralisée"},
+ {
+ "aiResults": {"en": "Result Documents", "fr": "Documents de résultat"},
+ "success": {"en": "Success", "fr": "Succès"},
+ "error": {"en": "Error", "fr": "Erreur"}
+ }
+)
+
diff --git a/modules/methods/methodDocument.py b/modules/methods/methodDocument.py
index 72d6d81b..d7dae427 100644
--- a/modules/methods/methodDocument.py
+++ b/modules/methods/methodDocument.py
@@ -618,23 +618,24 @@ class MethodDocument(MethodBase):
# Create AI prompt for comprehensive report generation using user's prompt
combinedContent = "\n\n".join(allContent)
aiPrompt = f"""
- {prompt}
-
- Report Title: {title}
-
- Additional Requirements:
- 1. Create a professional, well-formatted HTML report
- 2. Include an executive summary at the beginning
- 3. Organize information logically with clear sections
- 4. Highlight key findings and insights
- 5. Include relevant data, statistics, and conclusions
- 6. Use proper HTML formatting with headers, lists, and styling
- 7. Make it readable and professional
-
- Document Content:
- {combinedContent}
-
- Generate a complete HTML report that addresses the user's specific requirements and integrates all the information into a cohesive, professional document.
+{prompt}
+
+Report Title: {title}
+
+Additional Requirements:
+1. Create a professional, well-formatted HTML report
+2. Include an executive summary at the beginning
+3. Organize information logically with clear sections
+4. Highlight key findings and insights
+5. Include relevant data, statistics, and conclusions
+6. Use proper HTML formatting with headers, lists, and styling
+7. Make it readable and professional
+
+Document Content:
+---START OF DOCUMENT CONTENT-----------------------------------------------
+{combinedContent}
+---END OF DOCUMENT CONTENT-----------------------------------------------
+Generate a complete HTML report that addresses the user's specific requirements and integrates all the information into a cohesive, professional document.
"""
# Call AI to generate the report
diff --git a/modules/methods/methodOutlook.py b/modules/methods/methodOutlook.py
index 2c2c90ce..8a2b5b7d 100644
--- a/modules/methods/methodOutlook.py
+++ b/modules/methods/methodOutlook.py
@@ -379,10 +379,7 @@ class MethodOutlook(MethodBase):
connectionReference (str): Reference to the Microsoft connection
folder (str, optional): Email folder to read from (default: "Inbox")
limit (int, optional): Maximum number of emails to read (default: 10)
- filter (str, optional): Filter criteria for emails. Supports:
- - Email address (e.g., "user@domain.com") - filters by sender
- - Search queries (e.g., "from:user@domain.com", "subject:meeting")
- - Text content (e.g., "project update") - searches in subject
+ filter (str, optional): Filter criteria for emails. Supports: Email address (e.g., "user@domain.com") - filters by sender, Search queries (e.g., "from:user@domain.com", "subject:meeting"), Text content (e.g., "project update") - searches in subject
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
"""
try:
diff --git a/modules/methods/methodSharepoint.py b/modules/methods/methodSharepoint.py
index 4312bf58..19af4c1e 100644
--- a/modules/methods/methodSharepoint.py
+++ b/modules/methods/methodSharepoint.py
@@ -167,9 +167,16 @@ class MethodSharepoint(MethodBase):
"""
Parse searchQuery to extract path, search terms, search type, and search options.
+ CRITICAL: NEVER convert words to paths! Words stay as search terms.
+ - "root document lesson" → fileQuery="root document lesson" (NOT "/root/document/lesson")
+ - "root, gose" → fileQuery="root, gose" (NOT "/root/gose")
+ - "druckersteuerung eskalation logobject" → fileQuery="druckersteuerung eskalation logobject"
+
Parameters:
searchQuery (str): Enhanced search query with options:
- "budget" -> pathQuery="*", fileQuery="budget", searchType="all", options={}
+ - "root document lesson" -> pathQuery="*", fileQuery="root document lesson", searchType="all", options={}
+ - "root, gose" -> pathQuery="*", fileQuery="root, gose", searchType="all", options={}
- "/Documents:budget" -> pathQuery="/Documents", fileQuery="budget", searchType="all", options={}
- "files:budget" -> pathQuery="*", fileQuery="budget", searchType="files", options={}
- "folders:DELTA" -> pathQuery="*", fileQuery="DELTA", searchType="folders", options={}
@@ -187,6 +194,10 @@ class MethodSharepoint(MethodBase):
searchQuery = searchQuery.strip()
searchOptions = {}
+
+ # CRITICAL: Do NOT convert space-separated or comma-separated words to paths!
+ # "root document lesson" should stay as "root document lesson", NOT "/root/document/lesson"
+ # "root, gose" should stay as "root, gose", NOT "/root/gose"
# Check for search type specification (files:, folders:, all:) FIRST
searchType = "all" # Default
@@ -277,12 +288,14 @@ class MethodSharepoint(MethodBase):
return searchQuery, "*", searchType, searchOptions
else:
- # It's a search term only
+ # It's a search term only - keep words as-is, do NOT convert to paths
+ # "root document lesson" stays as "root document lesson"
+ # "root, gose" stays as "root, gose"
return "*", searchQuery, searchType, searchOptions
except Exception as e:
logger.error(f"Error parsing searchQuery '{searchQuery}': {str(e)}")
- return "*", "*", "all", {}
+ raise ValueError(f"Failed to parse searchQuery '{searchQuery}': {str(e)}")
def _resolvePathQuery(self, pathQuery: str) -> List[str]:
"""
@@ -293,8 +306,9 @@ class MethodSharepoint(MethodBase):
- Direct paths (e.g., "/Documents/Project1")
- Wildcards (e.g., "/Documents/*")
- Multiple paths separated by semicolons (e.g., "/Docs; /Files")
- - Relative paths (e.g., "Project1" -> resolved to default folder)
+ - Single word relative paths (e.g., "Project1" -> resolved to default folder)
- Empty string or "*" for global search
+ - Space-separated words are treated as search terms, NOT folder paths
Returns:
List[str]: List of resolved paths
@@ -314,9 +328,21 @@ class MethodSharepoint(MethodBase):
# Handle absolute paths
elif raw_path.startswith('/'):
resolved_paths.append(raw_path)
- # Handle relative paths - prepend default folder
- else:
+ # Handle single word relative paths - prepend default folder
+ # BUT NOT space-separated words (those are search terms, not paths)
+ elif ' ' not in raw_path:
resolved_paths.append(f"/Documents/{raw_path}")
+ else:
+ # Check if this looks like a path (has path separators) or search terms
+ if '\\' in raw_path or '/' in raw_path:
+ # This looks like a path with spaces in folder names - treat as valid path
+ resolved_paths.append(raw_path)
+ logger.info(f"Path with spaces '{raw_path}' treated as valid folder path")
+ else:
+ # Space-separated words without path separators are search terms
+ # Return as "*" to search globally
+ logger.info(f"Space-separated words '{raw_path}' treated as search terms, not folder path")
+ resolved_paths.append("*")
# Remove duplicates while preserving order
seen = set()
@@ -331,7 +357,7 @@ class MethodSharepoint(MethodBase):
except Exception as e:
logger.error(f"Error resolving pathQuery '{pathQuery}': {str(e)}")
- return ["*"] # Fallback to global search
+ raise ValueError(f"Failed to resolve pathQuery '{pathQuery}': {str(e)}")
def _parseSiteUrl(self, siteUrl: str) -> Dict[str, str]:
"""Parse SharePoint site URL to extract hostname and site path"""
@@ -423,74 +449,28 @@ class MethodSharepoint(MethodBase):
except Exception as e:
logger.error(f"Error getting site ID: {str(e)}")
return ""
-
+
+
@action
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
"""
- Find documents by searching their content, names, or metadata across all accessible SharePoint sites
+ Find documents/folders by searching their NAMES across SharePoint sites.
Parameters:
- connectionReference (str): Reference to the Microsoft connection
- site (str, optional): SharePoint site name or hint to search within (e.g., "SSS", "KM XYZ"). If not provided, searches all accessible sites
- searchQuery (str): [path:][type:][mode:]query - Enhanced search syntax:
- - "budget", "/Documents:budget", "files:budget", "folders:DELTA", "*"
- - "exact:\"Operations 2025\"" - exact phrase matching
- - "regex:^Operations.*2025$" - regex pattern matching
- - "case:DELTA" - case-sensitive search
- - "and:DELTA AND 2025 Mars AND Group" - all terms must be present
- - "folders:and:DELTA AND 2025 Mars AND Group" - combined options
- - Site hint support: "folders:site=KM LayerFinance;name=Work" or "folders:site:KM LayerFinance Work"
- - For quoted names: "folders:site=KM;name=\"page staten\""
- - For folder search: words like "part1 part2" will search for folders containing BOTH terms
- Note: For storage locations, use "folders:" prefix. When searching for folders, multiple words are treated as search terms that must all appear in the folder name or path.
- Site hints help narrow search to specific SharePoint sites for better accuracy.
- resultDocument (str, optional): JSON result document from previous findDocumentPath action to refine search
- searchScope (str, optional): Search scope - options: "all" (default), "documents" (files only), "pages" (SharePoint pages only)
- maxResults (int, optional): Maximum number of results to return (default: 100)
- expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
+ connectionReference (str): Microsoft connection reference
+ site (str, optional): Site hint (e.g., "SSS", "KM XYZ")
+ searchQuery (str): Search query - "budget", "folders:alpha", "files:budget", "/Documents/Project1", "namepart1 namepart2 namepart3". Use "folders:" prefix when user wants to store files or find folders
+ maxResults (int, optional): Max results (default: 100)
"""
try:
connectionReference = parameters.get("connectionReference")
site = parameters.get("site")
searchQuery = parameters.get("searchQuery", "*")
- resultDocument = parameters.get("resultDocument")
- searchScope = parameters.get("searchScope", "all")
maxResults = parameters.get("maxResults", 100)
- expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
if not connectionReference:
return ActionResult.isFailure(error="Connection reference is required")
- # If resultDocument is provided, extract site information to refine search
- if resultDocument:
- try:
- import json
- # Resolve the reference label to get the actual document list
- document_list = self.service.getChatDocumentsFromDocumentList([resultDocument])
- if not document_list or len(document_list) == 0:
- return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}")
-
- # Get the first document's content (which should be the JSON)
- first_document = document_list[0]
- file_data = self.service.getFileData(first_document.fileId)
- if not file_data:
- return ActionResult.isFailure(error=f"No file data found for document: {resultDocument}")
-
- # Parse the JSON content
- result_data = json.loads(file_data)
- found_documents = result_data.get("foundDocuments", [])
-
- # Extract site information from the result for context
- if found_documents:
- # Use the site information from the previous search to refine current search
- # This could be used to limit search to specific sites or add context
- logger.info(f"Refining search using {len(found_documents)} documents from previous result")
-
- except json.JSONDecodeError as e:
- return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}")
- except Exception as e:
- return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}")
-
# Parse searchQuery to extract path, search terms, search type, and options
pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery)
@@ -498,17 +478,22 @@ class MethodSharepoint(MethodBase):
if not connection:
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
- # Discover all SharePoint sites accessible to the user
- sites = await self._discoverSharePointSites(connection["accessToken"])
- if not sites:
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
-
- # Filter sites by site parameter if provided
+ # Discover SharePoint sites - use targeted approach when site parameter is provided
if site:
- sites = self._filter_sites_by_hint(sites, site)
+ # When site parameter is provided, discover all sites first, then filter
+ all_sites = await self._discoverSharePointSites(connection["accessToken"])
+ if not all_sites:
+ return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+
+ sites = self._filter_sites_by_hint(all_sites, site)
logger.info(f"Filtered sites by site parameter: '{site}' -> {len(sites)} sites")
if not sites:
return ActionResult.isFailure(error=f"No SharePoint sites found matching '{site}'")
+ else:
+ # No site parameter - discover all sites
+ sites = await self._discoverSharePointSites(connection["accessToken"])
+ if not sites:
+ return ActionResult.isFailure(error="No SharePoint sites found or accessible")
# Resolve path query into search paths
search_paths = self._resolvePathQuery(pathQuery)
@@ -518,25 +503,8 @@ class MethodSharepoint(MethodBase):
found_documents = []
all_sites_searched = []
- # Apply site hint filtering if provided in search options
+ # Use simple approach like test file - no complex filtering
site_scoped_sites = sites
- strict_folder_name: Optional[str] = None
-
- # First check for explicit site hint in search options
- if searchOptions.get("site_hint"):
- site_scoped_sites = self._filter_sites_by_hint(sites, searchOptions["site_hint"])
- logger.info(f"Filtered sites by explicit site hint: '{searchOptions['site_hint']}' -> {len(site_scoped_sites)} sites")
-
- # Heuristic: if user searched for folders with pattern " ",
- # prefer filtering sites by the first token(s) and match folder name exactly for the last token
- elif searchType == "folders" and fileQuery and ' ' in fileQuery and not searchOptions.get("regex_match"):
- # treat last token as folder name, preceding tokens combined as site hint
- tokens = [t for t in fileQuery.split(' ') if t]
- if len(tokens) >= 2:
- strict_folder_name = tokens[-1]
- site_hint = ' '.join(tokens[:-1])
- site_scoped_sites = self._filter_sites_by_hint(sites, site_hint)
- logger.info(f"Filtered sites by heuristic site hint: '{site_hint}' -> {len(site_scoped_sites)} sites")
for site in site_scoped_sites:
site_id = site["id"]
@@ -554,38 +522,20 @@ class MethodSharepoint(MethodBase):
# For specific queries, use different approaches based on search type
if searchType == "folders":
# Use Microsoft Graph unified search endpoint: POST /search/query
- # Scope by all drives in the site (e.g., Shared Documents, Documents, language variants)
+ # This approach works reliably for finding folders
try:
import json
- # Discover drives for the site to build precise path scopes
- drives_resp = await self._makeGraphApiCall(connection["accessToken"], f"sites/{site_id}/drives")
- path_filters = []
- if not ("error" in drives_resp):
- for drv in (drives_resp.get("value", []) or []):
- web_url = (drv.get("webUrl") or "").rstrip('/') + '/'
- if web_url:
- # path:"/"
- path_filters.append(f"path:\"{web_url}\"")
- if not path_filters:
- # fallback to site root if no drives found
- scoped_path = site_url.rstrip('/') + '/'
- path_filters = [f"path:\"{scoped_path}\""]
-
- # Use KQL syntax for folder search
+
+ # Use Microsoft Graph Search API syntax (simple term search only)
terms = [t for t in fileQuery.split() if t.strip()]
+
if len(terms) > 1:
- # Multiple terms: first search for folders containing ANY of the terms (OR)
- # This broadens the search to catch all potential matches
- name_terms = " OR ".join([f"foldername:*{t}*" for t in terms])
- name_filter = f"({name_terms})"
+ # Multiple terms: search for ALL terms (AND) - more specific results
+ query_string = " AND ".join(terms)
else:
- # Single term: search for folders containing the term
- single_term = terms[0] if terms else fileQuery
- name_filter = f"foldername:*{single_term}*"
-
- # Use KQL syntax with isFolder:true
- query_string = f"isFolder:true AND {name_filter}"
- logger.info(f"Using KQL query: {query_string}")
+ # Single term: search for the term
+ query_string = terms[0] if terms else fileQuery
+ logger.info(f"Using search query for folders: {query_string}")
payload = {
"requests": [
@@ -598,14 +548,15 @@ class MethodSharepoint(MethodBase):
]
}
logger.info(f"Using unified search API for folders with queryString: {query_string}")
- logger.info(f"Payload: {json.dumps(payload, indent=2)}")
+
+ # Use global search endpoint (site-specific search not available)
unified_result = await self._makeGraphApiCall(
connection["accessToken"],
"search/query",
method="POST",
data=json.dumps(payload).encode("utf-8")
)
- logger.info(f"Unified search response: {json.dumps(unified_result, indent=2)}")
+
if "error" in unified_result:
logger.warning(f"Unified search failed for site {site_name}: {unified_result['error']}")
items = []
@@ -618,18 +569,32 @@ class MethodSharepoint(MethodBase):
resource = hit.get("resource")
if resource:
items.append(resource)
- logger.info(f"Unified search returned {len(items)} items (pre-filter)")
-
- # Post-filter: For multiple terms, filter results to only include folders that contain ALL terms
- if len(terms) > 1:
- filtered_items = []
+
+ logger.info(f"Unified search returned {len(items)} items (pre-filter)")
+
+ # Apply our improved folder detection logic
+ folder_items = []
for item in items:
- folder_name = item.get("name", "").lower()
- # Check if folder name contains ALL search terms
- if all(term.lower() in folder_name for term in terms):
- filtered_items.append(item)
- items = filtered_items
- logger.info(f"Post-filtered to {len(items)} items containing all terms: {terms}")
+ resource = item
+
+ # Use the same detection logic as our test
+ is_folder = False
+ if 'folder' in resource:
+ is_folder = True
+ else:
+ # Try to detect by URL pattern or other indicators
+ web_url = resource.get('webUrl', '')
+ name = resource.get('name', '')
+
+ # Check if URL has no file extension and looks like a folder path
+ if '.' not in name and ('/' in web_url or '\\' in web_url):
+ is_folder = True
+
+ if is_folder:
+ folder_items.append(item)
+
+ items = folder_items
+ logger.info(f"Filtered to {len(items)} folders using improved detection logic")
except Exception as e:
logger.error(f"Error performing unified folder search: {str(e)}")
@@ -652,79 +617,60 @@ class MethodSharepoint(MethodBase):
for item in items:
item_name = item.get("name", "")
- item_type = "folder" if "folder" in item else "file"
+
+ # Use improved folder detection logic
+ is_folder = False
+ if 'folder' in item:
+ is_folder = True
+ else:
+ # Try to detect by URL pattern or other indicators
+ web_url = item.get('webUrl', '')
+ name = item.get('name', '')
+
+ # Check if URL has no file extension and looks like a folder path
+ if '.' not in name and ('/' in web_url or '\\' in web_url):
+ is_folder = True
+
+ item_type = "folder" if is_folder else "file"
item_path = item.get("parentReference", {}).get("path", "")
logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'")
- # Filter by search scope if specified
- if searchScope == "documents" and "folder" in item:
- logger.debug(f"Skipping folder '{item_name}' due to documents scope")
- continue
- elif searchScope == "pages" and "file" in item and not item["file"].get("mimeType", "").startswith("text/html"):
- logger.debug(f"Skipping file '{item_name}' due to pages scope")
- continue
+ # Simple filtering like test file - just check search type
+ if searchType == "files" and is_folder:
+ continue # Skip folders when searching for files
+ elif searchType == "folders" and not is_folder:
+ continue # Skip files when searching for folders
- # Filter by search type (files, folders, all)
- if searchType == "files" and "folder" in item:
- logger.debug(f"Skipping folder '{item_name}' due to files search type")
- continue
- elif searchType == "folders" and "file" in item:
- logger.debug(f"Skipping file '{item_name}' due to folders search type")
- continue
-
- # Enhanced post-filtering based on search options
- if fileQuery != "*" and fileQuery.strip() and searchType != "folders":
- # For non-folder searches, apply name filtering
- # (Folder searches are already filtered by the recursive search)
- search_target = item_name
-
- # Apply different filtering based on search options
- if searchOptions.get("exact_match"):
- # Exact phrase matching
- if searchOptions.get("case_sensitive"):
- if fileQuery not in search_target:
- continue
- else:
- if fileQuery.lower() not in search_target.lower():
- continue
- elif searchOptions.get("regex_match"):
- # Regex pattern matching
- import re
- flags = 0 if searchOptions.get("case_sensitive") else re.IGNORECASE
- if not re.search(fileQuery, search_target, flags):
- continue
- elif searchOptions.get("and_terms"):
- # AND terms mode: Split by " AND " and ensure ALL terms are present
- search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target
- and_terms = [term.strip() for term in fileQuery.split(" AND ") if term.strip()]
- and_terms = [term.lower() if not searchOptions.get("case_sensitive") else term for term in and_terms]
- if not all(term in search_name for term in and_terms):
- continue # Skip this item if not all AND terms match
- else:
- # Default: ALL search terms must be present (space-separated)
- search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target
- search_terms = [term.strip().lower() if not searchOptions.get("case_sensitive") else term.strip()
- for term in fileQuery.split() if term.strip()]
- if not all(term in search_name for term in search_terms):
- continue # Skip this item if not all terms match
-
- # If strict folder name requested, enforce exact (case-insensitive) match on folders
- if strict_folder_name:
- item_is_folder = "folder" in item
- item_name_ci = (item.get("name") or "").strip().lower()
- if item_is_folder and item_name_ci != strict_folder_name.lower():
- logger.debug(f"Skipping folder '{item_name}' - doesn't match strict name '{strict_folder_name}'")
- continue
-
- logger.debug(f"Item '{item_name}' passed all filters - adding to results")
+ # Simple approach like test file - no complex filtering
+ logger.debug(f"Item '{item_name}' found - adding to results")
- # Create minimal result with only essential reference information
+ # Create result with full path information for proper action chaining
+ web_url = item.get("webUrl", "")
+ parent_path = item.get("parentReference", {}).get("path", "")
+
+ # Extract the full SharePoint path from webUrl or parentReference
+ full_path = ""
+ if web_url:
+ # Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung
+ if '/sites/' in web_url:
+ path_part = web_url.split('/sites/')[1]
+ # Decode URL encoding and convert to backslash format
+ import urllib.parse
+ decoded_path = urllib.parse.unquote(path_part)
+ full_path = "\\" + decoded_path.replace('/', '\\')
+ elif parent_path:
+ # Use parentReference path if available
+ full_path = parent_path.replace('/', '\\')
+
doc_info = {
"id": item.get("id"),
"name": item.get("name"),
- "type": "folder" if "folder" in item else "file",
+ "type": "folder" if is_folder else "file",
"siteName": site_name,
- "siteId": site_id
+ "siteId": site_id,
+ "webUrl": web_url,
+ "fullPath": full_path,
+ "parentPath": parent_path
}
site_documents.append(doc_info)
@@ -756,18 +702,10 @@ class MethodSharepoint(MethodBase):
logger.error(f"Error searching SharePoint: {str(e)}")
return ActionResult.isFailure(error=str(e))
- # Determine output format based on expected formats
+ # Use default JSON format for output
output_extension = ".json" # Default
output_mime_type = "application/json" # Default
- if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
- # Use the first expected format
- expected_format = expectedDocumentFormats[0]
- output_extension = expected_format.get("extension", ".json")
- output_mime_type = expected_format.get("mimeType", "application/json")
- logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
- else:
- logger.info("No expected format specified, using default .json format")
return ActionResult(
success=True,
@@ -792,36 +730,37 @@ class MethodSharepoint(MethodBase):
Parameters:
documentList (str): Reference to the document list to read
connectionReference (str): Reference to the Microsoft connection
- pathQuery (str): Path query to locate documents (e.g., "/Documents/Project1", "*" for all sites)
- resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to pathQuery)
+ pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
+ pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
includeMetadata (bool, optional): Whether to include metadata (default: True)
- expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
"""
try:
documentList = parameters.get("documentList")
connectionReference = parameters.get("connectionReference")
pathQuery = parameters.get("pathQuery", "*")
- resultDocument = parameters.get("resultDocument")
+ pathObject = parameters.get("pathObject")
includeMetadata = parameters.get("includeMetadata", True)
- expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
if not documentList or not connectionReference:
return ActionResult.isFailure(error="Document list reference and connection reference are required")
- # If resultDocument is provided, extract folder IDs from it
- if resultDocument:
+ # If pathObject is provided, extract folder IDs from it
+ # Note: pathObject takes precedence over pathQuery when both are provided
+ if pathObject:
+ if pathQuery and pathQuery != "*":
+ logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)")
try:
import json
# Resolve the reference label to get the actual document list
- document_list = self.service.getChatDocumentsFromDocumentList([resultDocument])
+ document_list = self.service.getChatDocumentsFromDocumentList([pathObject])
if not document_list or len(document_list) == 0:
- return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}")
+ return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
# Get the first document's content (which should be the JSON)
first_document = document_list[0]
file_data = self.service.getFileData(first_document.fileId)
if not file_data:
- return ActionResult.isFailure(error=f"No file data found for document: {resultDocument}")
+ return ActionResult.isFailure(error=f"No file data found for document: {pathObject}")
# Parse the JSON content
result_data = json.loads(file_data)
@@ -836,14 +775,14 @@ class MethodSharepoint(MethodBase):
if folder_ids:
# Use the first folder ID found as pathQuery
pathQuery = folder_ids[0]
- logger.info(f"Using folder ID from resultDocument: {pathQuery}")
+ logger.info(f"Using folder ID from pathObject: {pathQuery}")
else:
- return ActionResult.isFailure(error="No folders found in resultDocument")
+ return ActionResult.isFailure(error="No folders found in pathObject")
except json.JSONDecodeError as e:
- return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}")
+ return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
except Exception as e:
- return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}")
+ return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
# Get documents from reference - ensure documentList is a list, not a string
if isinstance(documentList, str):
@@ -857,10 +796,58 @@ class MethodSharepoint(MethodBase):
if not connection:
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
- # Discover all SharePoint sites accessible to the user
- sites = await self._discoverSharePointSites(connection["accessToken"])
+ # Determine sites to use - strict validation: pathObject → pathQuery → ERROR
+ sites = None
+
+ # Step 1: Check pathObject first
+ if pathObject:
+ # When pathObject is provided, we should have specific site information
+ # Extract site information from the pathObject result
+ try:
+ # Get the site information from the first folder in pathObject
+ if 'found_documents' in locals() and found_documents:
+ first_folder = found_documents[0]
+ site_name = first_folder.get("siteName")
+ site_id = first_folder.get("siteId")
+
+ if site_name and site_id:
+ # Use the specific site from pathObject instead of discovering all sites
+ sites = [{
+ "id": site_id,
+ "displayName": site_name,
+ "webUrl": first_folder.get("webUrl", "")
+ }]
+ logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})")
+ else:
+ # Site info missing from pathObject - this is an error
+ return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.")
+ else:
+ # No documents found in pathObject - this is an error
+ return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for read operation.")
+ except Exception as e:
+ # Error processing pathObject - this is an error
+ return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for read operation.")
+
+ # Step 2: If no pathObject, check pathQuery
+ elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
+ # Validate pathQuery format
+ if not pathQuery.startswith('/'):
+ return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work")
+
+ # Check if pathQuery contains search terms (words without proper path structure)
+ if not pathQuery.startswith('/site:') and not pathQuery.startswith('/Documents') and not pathQuery.startswith('/Shared Documents'):
+ return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
+
+ # For pathQuery, we need to discover sites to find the specific one
+ sites = await self._discoverSharePointSites(connection["accessToken"])
+ if not sites:
+ return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+ else:
+ # Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK
+ return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
+
if not sites:
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+ return ActionResult.isFailure(error="No valid target site determined for read operation")
# Resolve path query into search paths
search_paths = self._resolvePathQuery(pathQuery)
@@ -988,18 +975,10 @@ class MethodSharepoint(MethodBase):
"timestamp": get_utc_timestamp()
}
- # Determine output format based on expected formats
+ # Use default JSON format for output
output_extension = ".json" # Default
output_mime_type = "application/json" # Default
- if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
- # Use the first expected format
- expected_format = expectedDocumentFormats[0]
- output_extension = expected_format.get("extension", ".json")
- output_mime_type = expected_format.get("mimeType", "application/json")
- logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
- else:
- logger.info("No expected format specified, using default .json format")
return ActionResult(
success=True,
@@ -1025,61 +1004,121 @@ class MethodSharepoint(MethodBase):
Parameters:
connectionReference (str): Reference to the Microsoft connection
- sitePath (str): REQUIRED - Specific SharePoint path where to upload documents. Must be a valid SharePoint path format:
- - For direct upload: "/site://" (e.g., "/site:KM XYZ/Documents/Work")
- - If user provides words like "word1 word2", the system MUST call findDocumentPath first to locate the actual folder path, the result then to give to this parameter
+ pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
+ pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
documentList (str): Reference to the document list to upload
fileNames (List[str]): List of names for the uploaded files
- resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to sitePath)
- expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
"""
try:
connectionReference = parameters.get("connectionReference")
- sitePath = parameters.get("sitePath", "/Documents")
+ pathQuery = parameters.get("pathQuery")
documentList = parameters.get("documentList")
fileNames = parameters.get("fileNames")
- resultDocument = parameters.get("resultDocument")
- expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+ pathObject = parameters.get("pathObject")
+
+ upload_path = pathQuery
+ logger.debug(f"Using pathQuery: {pathQuery}")
if not connectionReference or not documentList or not fileNames:
return ActionResult.isFailure(error="Connection reference, document list, and file names are required")
- # If resultDocument is provided, extract folder IDs from it
- if resultDocument:
+ # If pathObject is provided, extract folder IDs from it
+ if pathObject:
try:
import json
# Resolve the reference label to get the actual document list
- document_list = self.service.getChatDocumentsFromDocumentList([resultDocument])
+ document_list = self.service.getChatDocumentsFromDocumentList([pathObject])
if not document_list or len(document_list) == 0:
- return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}")
+ return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
# Get the first document's content (which should be the JSON)
first_document = document_list[0]
file_data = self.service.getFileData(first_document.fileId)
if not file_data:
- return ActionResult.isFailure(error=f"No file data found for document: {resultDocument}")
+ return ActionResult.isFailure(error=f"No file data found for document: {pathObject}")
# Parse the JSON content
result_data = json.loads(file_data)
- found_documents = result_data.get("foundDocuments", [])
- # Extract folder IDs from the result
- folder_ids = []
+ # Debug: Log the structure of the result document
+ logger.info(f"Result document keys: {list(result_data.keys())}")
+
+ # Handle different result document formats
+ found_documents = []
+
+ # Check if it's a direct SharePoint result (has foundDocuments)
+ if "foundDocuments" in result_data:
+ found_documents = result_data.get("foundDocuments", [])
+ logger.info(f"Found {len(found_documents)} documents in foundDocuments array")
+ # Check if it's an AI validation result (has result string with validationReport)
+ elif "result" in result_data and "validationReport" in result_data["result"]:
+ try:
+ # Parse the nested JSON in the result field
+ nested_result = json.loads(result_data["result"])
+ validation_report = nested_result.get("validationReport", {})
+ document_details = validation_report.get("documentDetails", {})
+
+ if document_details:
+ # Convert the single document details to the expected format
+ doc = {
+ "id": document_details.get("id"),
+ "name": document_details.get("name"),
+ "type": document_details.get("type", "").lower(), # Convert "Folder" to "folder"
+ "siteName": document_details.get("siteName"),
+ "siteId": document_details.get("siteId"),
+ "fullPath": document_details.get("fullPath"),
+ "webUrl": document_details.get("webUrl", ""),
+ "parentPath": document_details.get("parentPath", "")
+ }
+ found_documents = [doc]
+ logger.info(f"Extracted 1 document from validation report")
+ except json.JSONDecodeError as e:
+ logger.error(f"Failed to parse nested JSON in result field: {e}")
+ return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}")
+
+ # Debug: Log what we found in the result document
+ logger.info(f"Result document contains {len(found_documents)} documents")
+ for i, doc in enumerate(found_documents):
+ logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'")
+
+ # Extract folder information from the result
+ folders = []
for doc in found_documents:
if doc.get("type") == "folder":
- folder_ids.append(doc.get("id"))
+ folders.append(doc)
- if folder_ids:
- # Use the first folder ID found as sitePath
- sitePath = folder_ids[0]
- logger.info(f"Using folder ID from resultDocument: {sitePath}")
+ logger.info(f"Found {len(folders)} folders in result document")
+
+ if folders:
+ # Use the first folder found - prefer folder ID for direct API calls
+ first_folder = folders[0]
+ if first_folder.get("id"):
+ # Use folder ID directly for most reliable API calls
+ upload_path = first_folder.get("id")
+ logger.info(f"Using folder ID from pathObject: {upload_path}")
+ elif first_folder.get("fullPath"):
+ # Extract the correct path portion from fullPath by removing site name
+ full_path = first_folder.get("fullPath")
+ # fullPath format: \\SiteName\\Library\\Folder\\SubFolder
+ # We need to remove the first two parts (\\SiteName\\) to get the actual folder path
+ path_parts = full_path.lstrip('\\').split('\\')
+ if len(path_parts) > 1:
+ # Remove the first part (site name) and reconstruct the path
+ actual_path = '\\'.join(path_parts[1:])
+ upload_path = actual_path
+ logger.info(f"Extracted path from fullPath: {upload_path}")
+ else:
+ upload_path = full_path
+ logger.info(f"Using full path from pathObject (no site name to remove): {upload_path}")
+ else:
+ return ActionResult.isFailure(error="No valid folder information found in pathObject")
else:
- return ActionResult.isFailure(error="No folders found in resultDocument")
+ return ActionResult.isFailure(error="No folders found in pathObject")
except json.JSONDecodeError as e:
- return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}")
+ return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
except Exception as e:
- return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}")
+ return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
# Get Microsoft connection
connection = self._getMicrosoftConnection(connectionReference)
@@ -1093,39 +1132,85 @@ class MethodSharepoint(MethodBase):
if not chatDocuments:
return ActionResult.isFailure(error="No documents found for the provided reference")
- # Discover all SharePoint sites accessible to the user
- sites = await self._discoverSharePointSites(connection["accessToken"])
+ # Determine sites to use based on whether pathObject was provided
+ sites = None
+ if pathObject:
+ # When pathObject is provided, we should have specific site information
+ # Extract site information from the pathObject result
+ try:
+ # Get the site information from the first folder in pathObject
+ if 'found_documents' in locals() and found_documents:
+ first_folder = found_documents[0]
+ site_name = first_folder.get("siteName")
+ site_id = first_folder.get("siteId")
+
+ if site_name and site_id:
+ # Use the specific site from pathObject instead of discovering all sites
+ sites = [{
+ "id": site_id,
+ "displayName": site_name,
+ "webUrl": first_folder.get("webUrl", "")
+ }]
+ logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})")
+ else:
+ # Site info missing from pathObject - this is an error, not a fallback
+ return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for upload.")
+ else:
+ # No documents found in pathObject - this is an error
+ return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for upload.")
+ except Exception as e:
+ # Error processing pathObject - this is an error, not a fallback
+ return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for upload.")
+ else:
+ # No pathObject provided - check if pathQuery is valid
+ if not upload_path or upload_path.strip() == "" or upload_path.strip() == "*":
+ return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
+
+ # Validate pathQuery format
+ if not upload_path.startswith('/'):
+ return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work")
+
+ # Check if upload_path contains search terms (words without proper path structure)
+ if not upload_path.startswith('/site:') and not upload_path.startswith('/Documents') and not upload_path.startswith('/Shared Documents'):
+ return ActionResult.isFailure(error=f"Invalid pathQuery '{upload_path}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
+
+ # For pathQuery, we need to discover sites to find the specific one
+ sites = await self._discoverSharePointSites(connection["accessToken"])
+ if not sites:
+ return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+
if not sites:
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+ return ActionResult.isFailure(error="No valid target site determined for upload")
- # Enforce site-scoped path usage when using sitePath directly (without resultDocument)
+ # Process upload paths based on whether pathObject was provided
upload_site_scope = None
- if not resultDocument:
- if not sitePath or not sitePath.startswith('/'):
- return ActionResult.isFailure(error="sitePath must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work")
-
- # Check if sitePath contains search terms (words without proper path structure)
- if not sitePath.startswith('/site:') and not sitePath.startswith('/Documents') and not sitePath.startswith('/Shared Documents'):
- # This looks like search terms, not a valid path
- return ActionResult.isFailure(error=f"Invalid sitePath '{sitePath}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as sitePath.")
-
- parsed = self._parse_site_scoped_path(sitePath)
+ if not pathObject:
+ # Parse the validated pathQuery to extract site and path information
+ parsed = self._parse_site_scoped_path(upload_path)
if not parsed:
- return ActionResult.isFailure(error="Invalid sitePath. Use /site:/")
- # find matching site
+ return ActionResult.isFailure(error="Invalid upload_path. Use /site:/")
+
+ # Find matching site
candidate_sites = self._filter_sites_by_hint(sites, parsed["siteName"]) # substring match
- # choose exact displayName match if available
+ # Choose exact displayName match if available
exact = [s for s in candidate_sites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()]
selected_site = exact[0] if exact else (candidate_sites[0] if candidate_sites else None)
if not selected_site:
return ActionResult.isFailure(error=f"SharePoint site '{parsed['siteName']}' not found or not accessible")
+
upload_site_scope = selected_site
# Use the inner path portion as the actual upload target path
upload_paths = [f"/{parsed['innerPath'].lstrip('/')}"]
sites = [selected_site]
else:
- # Resolve path query into upload paths (fallback behavior when using resultDocument)
- upload_paths = self._resolvePathQuery(sitePath)
+ # When using pathObject, check if upload_path is a folder ID or a path
+ if upload_path.startswith('01PPXICCB') or upload_path.startswith('01'):
+ # It's a folder ID - use it directly
+ upload_paths = [upload_path]
+ logger.info(f"Using folder ID directly for upload: {upload_path}")
+ else:
+ # It's a path - resolve it normally
+ upload_paths = self._resolvePathQuery(upload_path)
# Process each document upload
upload_results = []
@@ -1155,12 +1240,26 @@ class MethodSharepoint(MethodBase):
# Use the first upload path or default to Documents
upload_path = upload_paths[0] if upload_paths else "/Documents"
- upload_path = upload_path.rstrip('/') + '/' + fileName
- upload_path_clean = upload_path.lstrip('/')
+
+ # Handle wildcard paths - replace with default Documents folder
+ if upload_path == "*":
+ upload_path = "/Documents"
+ logger.warning(f"Wildcard path '*' detected, using default '/Documents' folder for upload")
+
+ # Check if upload_path is a folder ID or a regular path
+ if upload_path.startswith('01PPXICCB') or upload_path.startswith('01'):
+ # It's a folder ID - use the folder-specific upload endpoint
+ upload_endpoint = f"sites/{site_id}/drive/items/{upload_path}:/{fileName}:/content"
+ logger.info(f"Using folder ID upload endpoint: {upload_endpoint}")
+ else:
+ # It's a regular path - use the root-based upload endpoint
+ upload_path = upload_path.rstrip('/') + '/' + fileName
+ upload_path_clean = upload_path.lstrip('/')
+ upload_endpoint = f"sites/{site_id}/drive/root:/{upload_path_clean}:/content"
+ logger.info(f"Using path-based upload endpoint: {upload_endpoint}")
# Upload endpoint for small files (< 4MB)
if len(file_data) < 4 * 1024 * 1024: # 4MB
- upload_endpoint = f"sites/{site_id}/drive/root:/{upload_path_clean}:/content"
# Upload the file
upload_result = await self._makeGraphApiCall(
@@ -1178,6 +1277,7 @@ class MethodSharepoint(MethodBase):
"siteName": site_name,
"siteUrl": site_url,
"uploadPath": upload_path,
+ "uploadEndpoint": upload_endpoint,
"sharepointFileId": upload_result.get("id"),
"webUrl": upload_result.get("webUrl"),
"size": upload_result.get("size"),
@@ -1212,7 +1312,7 @@ class MethodSharepoint(MethodBase):
# Create result data
result_data = {
"connectionReference": connectionReference,
- "sitePath": sitePath,
+ "pathQuery": upload_path,
"documentList": documentList,
"fileNames": fileNames,
"sitesAvailable": len(sites),
@@ -1225,18 +1325,10 @@ class MethodSharepoint(MethodBase):
"timestamp": get_utc_timestamp()
}
- # Determine output format based on expected formats
+ # Use default JSON format for output
output_extension = ".json" # Default
output_mime_type = "application/json" # Default
- if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
- # Use the first expected format
- expected_format = expectedDocumentFormats[0]
- output_extension = expected_format.get("extension", ".json")
- output_mime_type = expected_format.get("mimeType", "application/json")
- logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
- else:
- logger.info("No expected format specified, using default .json format")
return ActionResult(
success=True,
@@ -1263,82 +1355,194 @@ class MethodSharepoint(MethodBase):
Parameters:
connectionReference (str): Reference to the Microsoft connection
- searchQuery (str): [path:][type:][mode:]query - "Test Plan", "folders:Test Plan", "/Documents", "*"
- Note: Use "folders:Name" to search for folders anywhere, not "path:/Name" which looks only in root
- resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to searchQuery)
+ pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action
+ pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites)
includeSubfolders (bool, optional): Whether to include subfolders (default: False)
- expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
"""
try:
connectionReference = parameters.get("connectionReference")
- searchQuery = parameters.get("searchQuery", "*")
- resultDocument = parameters.get("resultDocument")
+ pathObject = parameters.get("pathObject")
+ pathQuery = parameters.get("pathQuery")
includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX
- expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+
+ list_query = pathQuery
+ logger.info(f"Using pathQuery: {pathQuery}")
if not connectionReference:
return ActionResult.isFailure(error="Connection reference is required")
- # If resultDocument is provided, resolve the reference and extract folder IDs from it
- if resultDocument:
+ # If pathObject is provided, resolve the reference and extract folder IDs from it
+ # Note: pathObject takes precedence over pathQuery when both are provided
+ if pathObject:
+ if pathQuery and pathQuery != "*":
+ logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)")
try:
import json
# Resolve the reference label to get the actual document list
- document_list = self.service.getChatDocumentsFromDocumentList([resultDocument])
+ document_list = self.service.getChatDocumentsFromDocumentList([pathObject])
if not document_list or len(document_list) == 0:
- return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}")
+ return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}")
# Get the first document's content (which should be the JSON)
first_document = document_list[0]
logger.info(f"Document fileId: {first_document.fileId}, fileName: {first_document.fileName}")
file_data = self.service.getFileData(first_document.fileId)
if not file_data:
- return ActionResult.isFailure(error=f"No file data found for document: {resultDocument} (fileId: {first_document.fileId})")
+ return ActionResult.isFailure(error=f"No file data found for document: {pathObject} (fileId: {first_document.fileId})")
logger.info(f"File data length: {len(file_data) if file_data else 0}")
# Parse the JSON content
result_data = json.loads(file_data)
- found_documents = result_data.get("foundDocuments", [])
- # Extract folder IDs from the result
- folder_ids = []
+ # Debug: Log the structure of the result document
+ logger.info(f"Result document keys: {list(result_data.keys())}")
+
+ # Handle different result document formats
+ found_documents = []
+
+ # Check if it's a direct SharePoint result (has foundDocuments)
+ if "foundDocuments" in result_data:
+ found_documents = result_data.get("foundDocuments", [])
+ logger.info(f"Found {len(found_documents)} documents in foundDocuments array")
+ # Check if it's an AI validation result (has result string with validationReport)
+ elif "result" in result_data and "validationReport" in result_data["result"]:
+ try:
+ # Parse the nested JSON in the result field
+ nested_result = json.loads(result_data["result"])
+ validation_report = nested_result.get("validationReport", {})
+ document_details = validation_report.get("documentDetails", {})
+
+ if document_details:
+ # Convert the single document details to the expected format
+ doc = {
+ "id": document_details.get("id"),
+ "name": document_details.get("name"),
+ "type": document_details.get("type", "").lower(), # Convert "Folder" to "folder"
+ "siteName": document_details.get("siteName"),
+ "siteId": document_details.get("siteId"),
+ "fullPath": document_details.get("fullPath"),
+ "webUrl": document_details.get("webUrl", ""),
+ "parentPath": document_details.get("parentPath", "")
+ }
+ found_documents = [doc]
+ logger.info(f"Extracted 1 document from validation report")
+ except json.JSONDecodeError as e:
+ logger.error(f"Failed to parse nested JSON in result field: {e}")
+ return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}")
+
+ # Debug: Log what we found in the result document
+ logger.info(f"Result document contains {len(found_documents)} documents")
+ for i, doc in enumerate(found_documents):
+ logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'")
+
+ # Extract folder information from the result
+ folders = []
for doc in found_documents:
if doc.get("type") == "folder":
- folder_ids.append(doc.get("id"))
+ folders.append(doc)
- if folder_ids:
- # Use the first folder ID found
- searchQuery = folder_ids[0]
- logger.info(f"Using folder ID from resultDocument: {searchQuery}")
+ logger.info(f"Found {len(folders)} folders in result document")
+
+ if folders:
+ # Use the first folder found - prefer folder ID for direct API calls
+ first_folder = folders[0]
+ if first_folder.get("id"):
+ # Use folder ID directly for most reliable API calls
+ list_query = first_folder.get("id")
+ logger.info(f"Using folder ID from pathObject: {list_query}")
+ elif first_folder.get("fullPath"):
+ # Extract the correct path portion from fullPath by removing site name
+ full_path = first_folder.get("fullPath")
+ # fullPath format: \\SiteName\\Library\\Folder\\SubFolder
+ # We need to remove the first two parts (\\SiteName\\) to get the actual folder path
+ path_parts = full_path.lstrip('\\').split('\\')
+ if len(path_parts) > 1:
+ # Remove the first part (site name) and reconstruct the path
+ actual_path = '\\'.join(path_parts[1:])
+ list_query = actual_path
+ logger.info(f"Extracted path from fullPath: {list_query}")
+ else:
+ list_query = full_path
+ logger.info(f"Using full path from pathObject (no site name to remove): {list_query}")
+ else:
+ return ActionResult.isFailure(error="No valid folder information found in pathObject")
else:
- return ActionResult.isFailure(error="No folders found in resultDocument")
+ return ActionResult.isFailure(error="No folders found in pathObject")
except json.JSONDecodeError as e:
- return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}")
+ return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}")
except Exception as e:
- return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}")
+ return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}")
# Get Microsoft connection
connection = self._getMicrosoftConnection(connectionReference)
if not connection:
return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference")
- logger.info(f"Starting SharePoint listDocuments for searchQuery: {searchQuery}")
+ logger.info(f"Starting SharePoint listDocuments for list_query: {list_query}")
logger.debug(f"Connection ID: {connection['id']}")
- # Parse searchQuery to extract path, search terms, search type, and options
- pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery)
+ # Parse list_query to extract path, search terms, search type, and options
+ pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(list_query)
+
+ # Determine sites to use - strict validation: pathObject → pathQuery → ERROR
+ sites = None
+
+ # Step 1: Check pathObject first
+ if pathObject:
+ # When pathObject is provided, we should have specific site information
+ # Extract site information from the pathObject result
+ try:
+ # Get the site information from the first folder in pathObject
+ if 'found_documents' in locals() and found_documents:
+ first_folder = found_documents[0]
+ site_name = first_folder.get("siteName")
+ site_id = first_folder.get("siteId")
+
+ if site_name and site_id:
+ # Use the specific site from pathObject instead of discovering all sites
+ sites = [{
+ "id": site_id,
+ "displayName": site_name,
+ "webUrl": first_folder.get("webUrl", "")
+ }]
+ logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})")
+ else:
+ # Site info missing from pathObject - this is an error
+ return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for list operation.")
+ else:
+ # No documents found in pathObject - this is an error
+ return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for list operation.")
+ except Exception as e:
+ # Error processing pathObject - this is an error
+ return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for list operation.")
+
+ # Step 2: If no pathObject, check pathQuery
+ elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*":
+ # Validate pathQuery format
+ if not pathQuery.startswith('/'):
+ return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work")
+
+ # Check if pathQuery contains search terms (words without proper path structure)
+ if not pathQuery.startswith('/site:') and not pathQuery.startswith('/Documents') and not pathQuery.startswith('/Shared Documents'):
+ return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
+
+ # For pathQuery, we need to discover sites to find the specific one
+ sites = await self._discoverSharePointSites(connection["accessToken"])
+ if not sites:
+ return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+ else:
+ # Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK
+ return ActionResult.isFailure(error="No valid list path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.")
- # Discover all SharePoint sites accessible to the user
- sites = await self._discoverSharePointSites(connection["accessToken"])
if not sites:
- return ActionResult.isFailure(error="No SharePoint sites found or accessible")
+ return ActionResult.isFailure(error="No valid target site determined for list operation")
- # Check if searchQuery is a folder ID (starts with 01PPXICCB...)
- if searchQuery.startswith('01PPXICCB') or searchQuery.startswith('01'):
+ # Check if list_query is a folder ID (starts with 01PPXICCB...)
+ if list_query.startswith('01PPXICCB') or list_query.startswith('01'):
# Direct folder ID - use it directly
- folder_paths = [searchQuery]
- logger.info(f"Using direct folder ID: {searchQuery}")
+ folder_paths = [list_query]
+ logger.info(f"Using direct folder ID: {list_query}")
else:
# Resolve path query into folder paths
folder_paths = self._resolvePathQuery(pathQuery)
@@ -1382,6 +1586,19 @@ class MethodSharepoint(MethodBase):
processed_items = []
for item in items:
+ # Use improved folder detection logic
+ is_folder = False
+ if 'folder' in item:
+ is_folder = True
+ else:
+ # Try to detect by URL pattern or other indicators
+ web_url = item.get('webUrl', '')
+ name = item.get('name', '')
+
+ # Check if URL has no file extension and looks like a folder path
+ if '.' not in name and ('/' in web_url or '\\' in web_url):
+ is_folder = True
+
item_info = {
"id": item.get("id"),
"name": item.get("name"),
@@ -1389,7 +1606,7 @@ class MethodSharepoint(MethodBase):
"createdDateTime": item.get("createdDateTime"),
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
"webUrl": item.get("webUrl"),
- "type": "folder" if "folder" in item else "file",
+ "type": "folder" if is_folder else "file",
"siteName": site_name,
"siteUrl": site_url
}
@@ -1411,7 +1628,8 @@ class MethodSharepoint(MethodBase):
# If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only)
if includeSubfolders:
- logger.info(f"Including subfolders - processing {len([item for item in processed_items if item['type'] == 'folder'])} folders")
+ folder_items = [item for item in processed_items if item['type'] == 'folder']
+ logger.info(f"Including subfolders - processing {len(folder_items)} folders")
subfolder_count = 0
max_subfolders = 10 # Limit to prevent infinite loops
@@ -1428,6 +1646,19 @@ class MethodSharepoint(MethodBase):
logger.debug(f"Found {len(subfolder_items)} items in subfolder {item['name']}")
for subfolder_item in subfolder_items:
+ # Use improved folder detection logic for subfolder items
+ subfolder_is_folder = False
+ if 'folder' in subfolder_item:
+ subfolder_is_folder = True
+ else:
+ # Try to detect by URL pattern or other indicators
+ subfolder_web_url = subfolder_item.get('webUrl', '')
+ subfolder_name = subfolder_item.get('name', '')
+
+ # Check if URL has no file extension and looks like a folder path
+ if '.' not in subfolder_name and ('/' in subfolder_web_url or '\\' in subfolder_web_url):
+ subfolder_is_folder = True
+
# Only add files and direct subfolders, NO RECURSION
subfolder_item_info = {
"id": subfolder_item.get("id"),
@@ -1436,7 +1667,7 @@ class MethodSharepoint(MethodBase):
"createdDateTime": subfolder_item.get("createdDateTime"),
"lastModifiedDateTime": subfolder_item.get("lastModifiedDateTime"),
"webUrl": subfolder_item.get("webUrl"),
- "type": "folder" if "folder" in subfolder_item else "file",
+ "type": "folder" if subfolder_is_folder else "file",
"parentPath": subfolder_path,
"siteName": site_name,
"siteUrl": site_url
@@ -1480,25 +1711,17 @@ class MethodSharepoint(MethodBase):
# Create result data
result_data = {
- "searchQuery": searchQuery,
+ "pathQuery": list_query,
"includeSubfolders": includeSubfolders,
"sitesSearched": len(sites),
"listResults": list_results,
"timestamp": get_utc_timestamp()
}
- # Determine output format based on expected formats
+ # Use default JSON format for output
output_extension = ".json" # Default
output_mime_type = "application/json" # Default
- if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
- # Use the first expected format
- expected_format = expectedDocumentFormats[0]
- output_extension = expected_format.get("extension", ".json")
- output_mime_type = expected_format.get("mimeType", "application/json")
- logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
- else:
- logger.info("No expected format specified, using default .json format")
return ActionResult(
success=True,
diff --git a/test_graph_search.py b/test_graph_search.py
new file mode 100644
index 00000000..981aa778
--- /dev/null
+++ b/test_graph_search.py
@@ -0,0 +1,311 @@
+#!/usr/bin/env python3
+"""
+Simple test script for Microsoft Graph Search API
+Tests folder search queries directly
+"""
+
+import requests
+import json
+import sys
+import os
+
+# Add the gateway modules to the path
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+
+def test_graph_folders_direct(access_token):
+ """Test direct Microsoft Graph API call to list folders"""
+ print("🔍 Testing direct Graph API folder listing...")
+
+ # Try to list folders from the main site - need to get site ID first
+ # Let's try to find the site by name first
+ url = "https://graph.microsoft.com/v1.0/sites/pcuster.sharepoint.com:/sites/SSSRESYNachfolge:/drive/root/children"
+
+ headers = {
+ "Authorization": f"Bearer {access_token}",
+ "Content-Type": "application/json"
+ }
+
+ try:
+ response = requests.get(url, headers=headers)
+
+ if response.status_code == 200:
+ data = response.json()
+ items = data.get('value', [])
+ print(f"✅ SUCCESS - Found {len(items)} items in root")
+
+ folders = []
+ files = []
+
+ for item in items:
+ if 'folder' in item:
+ folders.append(item)
+ elif 'file' in item:
+ files.append(item)
+
+ print(f" 📁 Folders: {len(folders)}")
+ print(f" 📄 Files: {len(files)}")
+
+ if folders:
+ print("\n📁 FOLDERS found:")
+ for i, folder in enumerate(folders[:5], 1):
+ name = folder.get('name', 'No name')
+ web_url = folder.get('webUrl', 'No URL')
+ print(f" {i}. {name}")
+ print(f" URL: {web_url}")
+ print()
+
+ else:
+ print(f"❌ ERROR - Status {response.status_code}")
+ print(f"Error: {response.text[:200]}")
+
+ except Exception as e:
+ print(f"Exception: {str(e)}")
+
+def test_graph_search(access_token, query_string):
+ """Test a Microsoft Graph Search API query and show resulting paths"""
+
+ url = "https://graph.microsoft.com/v1.0/search/query"
+
+ headers = {
+ "Authorization": f"Bearer {access_token}",
+ "Content-Type": "application/json"
+ }
+
+ payload = {
+ "requests": [
+ {
+ "entityTypes": ["driveItem"],
+ "query": {
+ "queryString": query_string
+ },
+ "from": 0,
+ "size": 50
+ }
+ ]
+ }
+
+ print(f"Testing: {query_string}")
+ print("-" * 50)
+
+ try:
+ response = requests.post(url, headers=headers, json=payload)
+
+ if response.status_code == 200:
+ data = response.json()
+
+ # Extract useful info
+ if "value" in data and len(data["value"]) > 0:
+ hits = data["value"][0].get("hitsContainers", [])
+ if hits:
+ total = hits[0].get("total", 0)
+ results = hits[0].get("hits", [])
+ print(f"✅ SUCCESS - Found {total} results")
+
+ # First, let's see what types of results we're getting
+ print(f"📊 Analyzing {len(results)} results...")
+
+ # Count different types of results with better detection
+ file_count = 0
+ folder_count = 0
+ other_count = 0
+
+ # Debug: Let's see what the actual resource structure looks like
+ if results:
+ print("🔍 DEBUG: First result structure:")
+ first_result = results[0]
+ print(f" Keys: {list(first_result.keys())}")
+ if 'resource' in first_result:
+ resource = first_result['resource']
+ print(f" Resource keys: {list(resource.keys())}")
+ if 'folder' in resource:
+ print(f" Folder info: {resource['folder']}")
+ if 'file' in resource:
+ print(f" File info: {resource['file']}")
+ print()
+
+ for result in results:
+ resource = result.get('resource', {})
+
+ # Better detection logic
+ is_folder = False
+ is_file = False
+
+ # Check for explicit folder/file indicators
+ if 'folder' in resource:
+ is_folder = True
+ elif 'file' in resource:
+ is_file = True
+ else:
+ # Try to detect by URL pattern or other indicators
+ web_url = resource.get('webUrl', '')
+ name = resource.get('name', '')
+
+ # Check if URL ends with a file extension (likely a file)
+ if '.' in name and any(name.lower().endswith(ext) for ext in ['.pdf', '.docx', '.xlsx', '.pptx', '.txt', '.cs', '.py', '.js', '.html', '.css']):
+ is_file = True
+ # Check if URL has no file extension and looks like a folder path
+ elif '.' not in name and ('/' in web_url or '\\' in web_url):
+ is_folder = True
+
+ if is_folder:
+ folder_count += 1
+ elif is_file:
+ file_count += 1
+ else:
+ other_count += 1
+
+ print(f" 📄 Files: {file_count}")
+ print(f" 📁 Folders: {folder_count}")
+ print(f" ❓ Other: {other_count}")
+ print()
+
+ # Show sample results regardless of type
+ print(f"📋 Sample results (showing first 5):")
+ for i, result in enumerate(results[:5], 1):
+ resource = result.get('resource', {})
+ web_url = resource.get('webUrl', 'No URL')
+ name = resource.get('name', 'No name')
+
+ # Determine type using same logic as counting
+ is_folder = False
+ is_file = False
+
+ if 'folder' in resource:
+ is_folder = True
+ elif 'file' in resource:
+ is_file = True
+ else:
+ # Try to detect by URL pattern or other indicators
+ web_url = resource.get('webUrl', '')
+ name = resource.get('name', '')
+
+ # Check if URL ends with a file extension (likely a file)
+ if '.' in name and any(name.lower().endswith(ext) for ext in ['.pdf', '.docx', '.xlsx', '.pptx', '.txt', '.cs', '.py', '.js', '.html', '.css']):
+ is_file = True
+ # Check if URL has no file extension and looks like a folder path
+ elif '.' not in name and ('/' in web_url or '\\' in web_url):
+ is_folder = True
+
+ if is_folder:
+ item_type = "📁 FOLDER"
+ elif is_file:
+ file_info = resource.get('file', {})
+ mime_type = file_info.get('mimeType', 'Unknown type') if file_info else 'Detected by extension'
+ item_type = f"📄 FILE ({mime_type})"
+ else:
+ item_type = "❓ UNKNOWN"
+
+ # Extract path from webUrl
+ if '/sites/SSSRESYNachfolge/' in web_url:
+ path_part = web_url.split('/sites/SSSRESYNachfolge/')[-1]
+ path_with_backslashes = path_part.replace('/', '\\')
+ display_path = f"\\{path_with_backslashes}"
+ else:
+ display_path = web_url
+
+ print(f" {i}. {item_type} - {name}")
+ print(f" Path: {display_path}")
+ print(f" URL: {web_url}")
+ print()
+
+ if len(results) > 5:
+ print(f" ... and {len(results) - 5} more results")
+
+ # Now filter and show only FOLDER results if any exist
+ folder_results = []
+ for result in results:
+ resource = result.get('resource', {})
+
+ # Use the same detection logic as counting
+ is_folder = False
+ if 'folder' in resource:
+ is_folder = True
+ else:
+ # Try to detect by URL pattern or other indicators
+ web_url = resource.get('webUrl', '')
+ name = resource.get('name', '')
+
+ # Check if URL has no file extension and looks like a folder path
+ if '.' not in name and ('/' in web_url or '\\' in web_url):
+ is_folder = True
+
+ if is_folder:
+ folder_results.append(result)
+
+ if folder_results:
+ print(f"\n📁 FOLDER DETAILS ({len(folder_results)} folders found):")
+ for i, result in enumerate(folder_results, 1):
+ web_url = result.get('resource', {}).get('webUrl', 'No URL')
+ name = result.get('resource', {}).get('name', 'No name')
+
+ if '/sites/SSSRESYNachfolge/' in web_url:
+ path_part = web_url.split('/sites/SSSRESYNachfolge/')[-1]
+ path_with_backslashes = path_part.replace('/', '\\')
+ folder_path = f"\\{path_with_backslashes}"
+ else:
+ folder_path = web_url
+
+ print(f" {i}. 📁 {name}")
+ print(f" Path: {folder_path}")
+ print(f" URL: {web_url}")
+ print()
+ else:
+ print(f"\n❌ No folders found in results - all {total} results are files or other types")
+ else:
+ print("❌ SUCCESS but no hits containers found")
+ else:
+ print("❌ SUCCESS but no value array in response")
+
+ else:
+ print(f"❌ ERROR - Status {response.status_code}")
+ error_text = response.text[:200] + "..." if len(response.text) > 200 else response.text
+ print(f"Error: {error_text}")
+
+ except Exception as e:
+ print(f"Exception: {str(e)}")
+
+def main():
+ """Main test function"""
+
+ # Use the access token from the database
+ access_token = "eyJ0eXAiOiJKV1QiLCJub25jZSI6IkxwTjBjTXo2SGlja2ZPLUpnekRwTFE1QktfQmVOWHBwRWZ2UzZBMDh2REUiLCJhbGciOiJSUzI1NiIsIng1dCI6IkpZaEFjVFBNWl9MWDZEQmxPV1E3SG4wTmVYRSIsImtpZCI6IkpZaEFjVFBNWl9MWDZEQmxPV1E3SG4wTmVYRSJ9.eyJhdWQiOiIwMDAwMDAwMy0wMDAwLTAwMDAtYzAwMC0wMDAwMDAwMDAwMDAiLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC82YTUxYWFlYi0yNDY3LTQxODYtOTUwNC0yYTA1YWVkYzU5MWYvIiwiaWF0IjoxNzU3MDEwNTc0LCJuYmYiOjE3NTcwMTA1NzQsImV4cCI6MTc1NzAxNTQ1MSwiYWNjdCI6MCwiYWNyIjoiMSIsImFjcnMiOlsicDEiXSwiYWlvIjoiQVpRQWEvOFpBQUFBcU0xNVFOMkhaQld5QXNsbStiT0QzbzRuU1RhUzg5bGdTV3ZUQVZvYVhqcUhlT1VaNFE1aEh0bE51WUdxelEvM0tDRnZlZktycU1HTUp2VmlVaWVibUhjbnBtL0FaRFA1Sk1YNnI4c1FCSVdLVTZPY29sUUNuOWpvcVZLb1VIOFl3WTJhM3picTlkeGdqVC94dU5NaCtKcXhMV1JMdEUrUjBZeGl0c3J0QXhpd0pRaGZmalIzK0xPSGtmVkxhOExaIiwiYW1yIjpbInB3ZCIsIm1mYSJdLCJhcHBfZGlzcGxheW5hbWUiOiJQb3dlck9uIEFwcCIsImFwcGlkIjoiYzdlNzExMmQtNjFkYy00ZjNhLThjZDMtMDhjYzRjZDc1MDRjIiwiYXBwaWRhY3IiOiIxIiwiZmFtaWx5X25hbWUiOiJNb3RzY2giLCJnaXZlbl9uYW1lIjoiUGF0cmljayIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjE3OC4xOTcuMjE4LjQ4IiwibmFtZSI6IlBhdHJpY2sgTW90c2NoIiwib2lkIjoiN2QwOGFhYjktYTE3MC00OTc1LTg4OTgtYmM3ZTBhOTU0ODhlIiwicGxhdGYiOiIzIiwicHVpZCI6IjEwMDM3RkZFOENERDZBODIiLCJyaCI6IjEuQVFzQTY2cFJhbWNraGtHVkJDb0ZydHhaSHdNQUFBQUFBQUFBd0FBQUFBQUFBQUNFQURBTEFBLiIsInNjcCI6IkZpbGVzLlJlYWRXcml0ZS5BbGwgTWFpbC5SZWFkV3JpdGUgTWFpbC5SZWFkV3JpdGUuU2hhcmVkIE1haWwuU2VuZCBvcGVuaWQgcHJvZmlsZSBTaXRlcy5SZWFkV3JpdGUuQWxsIFVzZXIuUmVhZCBlbWFpbCIsInNpZCI6IjAwNmY5Mjk5LTY3ZDUtYmU3Zi1kYWI4LWQwYTBlZTI1MTBkNiIsInNpZ25pbl9zdGF0ZSI6WyJrbXNpIl0sInN1YiI6IklnMGlwM3hhZGJMaXVLemJGZ3dWaE5JTV9Eekcwd3B4aUVGYjJKWXVjbjQiLCJ0ZW5hbnRfcmVnaW9uX3Njb3BlIjoiRVUiLCJ0aWQiOiI2YTUxYWFlYi0yNDY3LTQxODYtOTUwNC0yYTA1YWVkYzU5MWYiLCJ1bmlxdWVfbmFtZSI6InAubW90c2NoQHZhbHVlb24uY2giLCJ1cG4iOiJwLm1vdHNjaEB2YWx1ZW9uLmNoIiwidXRpIjoieTh5ZGhEcWRDMG1nVTBpLV94azFBUSIsInZlciI6IjEuMCIsIndpZHMiOlsiOWI4OTVkOTItMmNkMy00NGM3LTlkMDItYTZhYzJkNWVhNWMzIiwiY2YxYzM4ZTUtMzYyMS00MDA0LWE3Y2ItODc5NjI0ZGNlZDdjIiwiMTU4YzA0N2EtYzkwNy00NTU2LWI3ZWYtNDQ2NTUxYTZiNWY3IiwiODkyYzU4NDItYTlhNi00NjNhLTgwNDEtNzJhYTA4Y2EzY2Y2IiwiOWYwNjIwNGQtNzNjMS00ZDRjLTg4MGEtNmVkYjkwNjA2ZmQ4IiwiYjc5ZmJmNGQtM2VmOS00Njg5LTgxNDMtNzZiMTk0ZTg1NTA5Il0sInhtc19mdGQiOiIwcEZ4RVctQnl6Y3M5UW5HdXNDbU1Ka1V4MHNQWlEzOUkzWUwxRGZJdnpzQmMzZGxaR1Z1WXkxa2MyMXoiLCJ4bXNfaWRyZWwiOiIxIDI0IiwieG1zX3N0Ijp7InN1YiI6IlIydkQwRzFtbWFZUkM3SllXY0lTWlcyS0RQZ05CakJMRmw2ZUxBQl9QVU0ifSwieG1zX3RjZHQiOjE0MTgyMTQ1MDEsInhtc190ZGJyIjoiRVUifQ.JYEWH2YxBrgWSn-9WN3BixJ91q19RGd0U7HgiiLpmwKUicft8zrovO8wKVU5rkly6CBcEO_eGAvyqQHSjFLHXKGDrutrFVdLTLB0vUu3J1Lkw31CiJF_y6Y3r2VytOF8evcYwh_Ye-5eoAxIr5avR8j_T51RPkLG53QSJ-tA5utDgHGWa65T5-mmeZxI-ThYxfyLori1uS8TSchJBdwrWwv8pkklHn6lZrFfgiuviRjLrOOLVUL_fzIod_eOKjo31YHhUzfm-QD3vvQkqnWNcdQ4D0UaTxKW291fHFafQZ9SkH9m0BD9nn56QBqijUBhvA8qMZC_cObb3DpR0GR_xA"
+
+ print("=" * 60)
+ print("Microsoft Graph API Test Suite")
+ print("=" * 60)
+
+ # First test: Direct folder listing (should work better than search)
+ print("\nTEST 0: Direct Graph API folder listing")
+ test_graph_folders_direct(access_token)
+
+ # Test different query types to find both files and folders
+ test_queries = [
+ # Test 1: Test with Venus folder (empty folder created for testing)
+ "Venus",
+
+ # Test 2: Folder-specific searches for Venus
+ "kind:folder AND Venus",
+
+ # Test 3: Original specific query (found 8 results - all files)
+ "Druckersteuerung AND Eskalation AND Logobject",
+
+ # Test 4: Broader folder-focused queries
+ "Druckersteuerung",
+ "Eskalation",
+ "Logobject",
+
+ # Test 5: Folder-specific searches
+ "kind:folder AND Druckersteuerung",
+ "kind:folder AND Eskalation",
+
+ # Test 6: General folder search to see what folders exist
+ "kind:folder",
+ ]
+
+ for i, query in enumerate(test_queries, 1):
+ print(f"\nTEST {i}: {query}")
+ test_graph_search(access_token, query)
+ print()
+
+if __name__ == "__main__":
+ main()