From a1ebcac5887464c19b830355c43048b47a534c34 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Thu, 4 Sep 2025 23:40:07 +0200 Subject: [PATCH] Sharepoint integration completed --- modules/chat/handling/handlingTasks.py | 2 +- modules/chat/handling/promptFactory.py | 93 ++- modules/interfaces/interfaceChatModel.py | 36 + modules/methods/methodDocument.py | 35 +- modules/methods/methodOutlook.py | 5 +- modules/methods/methodSharepoint.py | 873 ++++++++++++++--------- test_graph_search.py | 311 ++++++++ 7 files changed, 992 insertions(+), 363 deletions(-) create mode 100644 test_graph_search.py diff --git a/modules/chat/handling/handlingTasks.py b/modules/chat/handling/handlingTasks.py index 88465b0e..feb7b335 100644 --- a/modules/chat/handling/handlingTasks.py +++ b/modules/chat/handling/handlingTasks.py @@ -792,7 +792,7 @@ class HandlingTasks: # Add specific error details if available if retry_context and hasattr(retry_context, 'previous_review_result') and retry_context.previous_review_result: - reason = retry_context.previous_review_result.get('reason', '') + reason = retry_context.previous_review_result.reason or '' if reason and reason != "Task failed after all retries.": error_message += f"{reason}\n\n" diff --git a/modules/chat/handling/promptFactory.py b/modules/chat/handling/promptFactory.py index 640aebba..ada386ba 100644 --- a/modules/chat/handling/promptFactory.py +++ b/modules/chat/handling/promptFactory.py @@ -228,20 +228,81 @@ async def createActionDefinitionPrompt(context: TaskContext, service) -> str: connRefs = service.getConnectionReferenceList() - # Debug logging for connections - logging.debug(f"Connection references retrieved: {connRefs}") - logging.debug(f"Connection references type: {type(connRefs)}") - logging.debug(f"Connection references length: {len(connRefs) if connRefs else 0}") - - # Log document availability for debugging - logging.debug(f"Enhanced document context length: {len(available_documents_str)}") - - available_methods_str = '' + # Create a structured JSON format for better AI parsing + # This replaces the old hard-to-read format with a clean JSON structure + # that the AI can easily parse and understand + available_methods_json = {} for method, actions in method_actions.items(): - available_methods_str += f"- {method}:\n" + available_methods_json[method] = {} + # Get the method instance for accessing docstrings + method_instance = service.methods.get(method, {}).get('instance') if hasattr(service, 'methods') else None + for action, sig in actions: - available_methods_str += f" - {action}: {sig}\n" - + # Parse the signature to extract parameters + if '(' in sig and ')' in sig: + # Extract parameters from signature + params_start = sig.find('(') + params_end = sig.find(')') + params_str = sig[params_start+1:params_end] + + # Parse parameters directly from the docstring - much simpler and more reliable! + parameters = [] + + # Get the actual function's docstring + if method_instance and hasattr(method_instance, action): + func = getattr(method_instance, action) + if hasattr(func, '__doc__') and func.__doc__: + docstring = func.__doc__ + + # Parse Parameters section from docstring + lines = docstring.split('\n') + in_parameters = False + for i, line in enumerate(lines): + original_line = line + line = line.strip() + + if line == 'Parameters:': + in_parameters = True + continue + elif in_parameters and line and not original_line.startswith(' ') and not original_line.startswith('\t'): + # End of parameters section + break + elif in_parameters and (original_line.startswith(' ') or original_line.startswith('\t')): + # This is a parameter line - already stripped + # Format: "paramName (type): description" + if ':' in line: + # Find the colon that separates param from description + colon_pos = line.find(':') + param_part = line[:colon_pos].strip() + description = line[colon_pos+1:].strip() + + # Parse parameter name and type + if '(' in param_part and ')' in param_part: + param_name = param_part.split('(')[0].strip() + type_part = param_part[param_part.find('(')+1:param_part.find(')')].strip() + + # Check if optional + is_optional = 'optional' in type_part + param_type = type_part.replace('optional', '').strip().rstrip(',').strip() + + parameters.append({ + "name": param_name, + "type": param_type, + "description": description, + "required": not is_optional + }) + + available_methods_json[method][action] = { + "signature": sig, + "parameters": parameters, + "description": f"{method}.{action} action" + } + + # Convert to a compact, AI-friendly format + available_methods_str = f""" +AVAILABLE ACTIONS (JSON format for better AI parsing): +{json.dumps(available_methods_json, indent=1, separators=(',', ':'))} +""" retry_context = "" if context.retry_count and context.retry_count > 0: retry_context = f""" @@ -264,10 +325,10 @@ Previous action results that failed or were incomplete: if context.previous_review_result: retry_context += f""" Previous review feedback: -- Status: {context.previous_review_result.get('status', 'unknown') or 'unknown'} -- Reason: {context.previous_review_result.get('reason', 'No reason provided') or 'No reason provided'} -- Quality Score: {context.previous_review_result.get('quality_score', 0) or 0}/10 -- Unmet Criteria: {', '.join(context.previous_review_result.get('unmet_criteria', []) or [])} +- Status: {context.previous_review_result.status or 'unknown'} +- Reason: {context.previous_review_result.reason or 'No reason provided'} +- Quality Score: {context.previous_review_result.quality_score or 0}/10 +- Unmet Criteria: {', '.join(context.previous_review_result.unmet_criteria or [])} """ # Use Pydantic model directly - no need for getattr diff --git a/modules/interfaces/interfaceChatModel.py b/modules/interfaces/interfaceChatModel.py index 769e94c9..629b59ee 100644 --- a/modules/interfaces/interfaceChatModel.py +++ b/modules/interfaces/interfaceChatModel.py @@ -845,4 +845,40 @@ register_model_labels( } ) +# ===== Centralized AI Call Response Models ===== + +class AiResult(BaseModel, ModelMixin): + """Document result from centralized AI call""" + filename: str = Field(description="Name of the result document") + mimetype: str = Field(description="MIME type of the result document") + content: str = Field(description="Content of the result document") + +# Register labels for AiResult +register_model_labels( + "AiResult", + {"en": "Result Document", "fr": "Document de résultat"}, + { + "filename": {"en": "Filename", "fr": "Nom de fichier"}, + "mimetype": {"en": "MIME Type", "fr": "Type MIME"}, + "content": {"en": "Content", "fr": "Contenu"} + } +) + +class CentralizedAiResponse(BaseModel, ModelMixin): + """Standardized response format from centralized AI calls""" + aiResults: List[AiResult] = Field(default_factory=list, description="List of result documents") + success: bool = Field(description="Whether the AI call was successful") + error: Optional[str] = Field(None, description="Error message if the call failed") + +# Register labels for CentralizedAiResponse +register_model_labels( + "CentralizedAiResponse", + {"en": "Centralized AI Response", "fr": "Réponse IA centralisée"}, + { + "aiResults": {"en": "Result Documents", "fr": "Documents de résultat"}, + "success": {"en": "Success", "fr": "Succès"}, + "error": {"en": "Error", "fr": "Erreur"} + } +) + diff --git a/modules/methods/methodDocument.py b/modules/methods/methodDocument.py index 72d6d81b..d7dae427 100644 --- a/modules/methods/methodDocument.py +++ b/modules/methods/methodDocument.py @@ -618,23 +618,24 @@ class MethodDocument(MethodBase): # Create AI prompt for comprehensive report generation using user's prompt combinedContent = "\n\n".join(allContent) aiPrompt = f""" - {prompt} - - Report Title: {title} - - Additional Requirements: - 1. Create a professional, well-formatted HTML report - 2. Include an executive summary at the beginning - 3. Organize information logically with clear sections - 4. Highlight key findings and insights - 5. Include relevant data, statistics, and conclusions - 6. Use proper HTML formatting with headers, lists, and styling - 7. Make it readable and professional - - Document Content: - {combinedContent} - - Generate a complete HTML report that addresses the user's specific requirements and integrates all the information into a cohesive, professional document. +{prompt} + +Report Title: {title} + +Additional Requirements: +1. Create a professional, well-formatted HTML report +2. Include an executive summary at the beginning +3. Organize information logically with clear sections +4. Highlight key findings and insights +5. Include relevant data, statistics, and conclusions +6. Use proper HTML formatting with headers, lists, and styling +7. Make it readable and professional + +Document Content: +---START OF DOCUMENT CONTENT----------------------------------------------- +{combinedContent} +---END OF DOCUMENT CONTENT----------------------------------------------- +Generate a complete HTML report that addresses the user's specific requirements and integrates all the information into a cohesive, professional document. """ # Call AI to generate the report diff --git a/modules/methods/methodOutlook.py b/modules/methods/methodOutlook.py index 2c2c90ce..8a2b5b7d 100644 --- a/modules/methods/methodOutlook.py +++ b/modules/methods/methodOutlook.py @@ -379,10 +379,7 @@ class MethodOutlook(MethodBase): connectionReference (str): Reference to the Microsoft connection folder (str, optional): Email folder to read from (default: "Inbox") limit (int, optional): Maximum number of emails to read (default: 10) - filter (str, optional): Filter criteria for emails. Supports: - - Email address (e.g., "user@domain.com") - filters by sender - - Search queries (e.g., "from:user@domain.com", "subject:meeting") - - Text content (e.g., "project update") - searches in subject + filter (str, optional): Filter criteria for emails. Supports: Email address (e.g., "user@domain.com") - filters by sender, Search queries (e.g., "from:user@domain.com", "subject:meeting"), Text content (e.g., "project update") - searches in subject expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description """ try: diff --git a/modules/methods/methodSharepoint.py b/modules/methods/methodSharepoint.py index 4312bf58..19af4c1e 100644 --- a/modules/methods/methodSharepoint.py +++ b/modules/methods/methodSharepoint.py @@ -167,9 +167,16 @@ class MethodSharepoint(MethodBase): """ Parse searchQuery to extract path, search terms, search type, and search options. + CRITICAL: NEVER convert words to paths! Words stay as search terms. + - "root document lesson" → fileQuery="root document lesson" (NOT "/root/document/lesson") + - "root, gose" → fileQuery="root, gose" (NOT "/root/gose") + - "druckersteuerung eskalation logobject" → fileQuery="druckersteuerung eskalation logobject" + Parameters: searchQuery (str): Enhanced search query with options: - "budget" -> pathQuery="*", fileQuery="budget", searchType="all", options={} + - "root document lesson" -> pathQuery="*", fileQuery="root document lesson", searchType="all", options={} + - "root, gose" -> pathQuery="*", fileQuery="root, gose", searchType="all", options={} - "/Documents:budget" -> pathQuery="/Documents", fileQuery="budget", searchType="all", options={} - "files:budget" -> pathQuery="*", fileQuery="budget", searchType="files", options={} - "folders:DELTA" -> pathQuery="*", fileQuery="DELTA", searchType="folders", options={} @@ -187,6 +194,10 @@ class MethodSharepoint(MethodBase): searchQuery = searchQuery.strip() searchOptions = {} + + # CRITICAL: Do NOT convert space-separated or comma-separated words to paths! + # "root document lesson" should stay as "root document lesson", NOT "/root/document/lesson" + # "root, gose" should stay as "root, gose", NOT "/root/gose" # Check for search type specification (files:, folders:, all:) FIRST searchType = "all" # Default @@ -277,12 +288,14 @@ class MethodSharepoint(MethodBase): return searchQuery, "*", searchType, searchOptions else: - # It's a search term only + # It's a search term only - keep words as-is, do NOT convert to paths + # "root document lesson" stays as "root document lesson" + # "root, gose" stays as "root, gose" return "*", searchQuery, searchType, searchOptions except Exception as e: logger.error(f"Error parsing searchQuery '{searchQuery}': {str(e)}") - return "*", "*", "all", {} + raise ValueError(f"Failed to parse searchQuery '{searchQuery}': {str(e)}") def _resolvePathQuery(self, pathQuery: str) -> List[str]: """ @@ -293,8 +306,9 @@ class MethodSharepoint(MethodBase): - Direct paths (e.g., "/Documents/Project1") - Wildcards (e.g., "/Documents/*") - Multiple paths separated by semicolons (e.g., "/Docs; /Files") - - Relative paths (e.g., "Project1" -> resolved to default folder) + - Single word relative paths (e.g., "Project1" -> resolved to default folder) - Empty string or "*" for global search + - Space-separated words are treated as search terms, NOT folder paths Returns: List[str]: List of resolved paths @@ -314,9 +328,21 @@ class MethodSharepoint(MethodBase): # Handle absolute paths elif raw_path.startswith('/'): resolved_paths.append(raw_path) - # Handle relative paths - prepend default folder - else: + # Handle single word relative paths - prepend default folder + # BUT NOT space-separated words (those are search terms, not paths) + elif ' ' not in raw_path: resolved_paths.append(f"/Documents/{raw_path}") + else: + # Check if this looks like a path (has path separators) or search terms + if '\\' in raw_path or '/' in raw_path: + # This looks like a path with spaces in folder names - treat as valid path + resolved_paths.append(raw_path) + logger.info(f"Path with spaces '{raw_path}' treated as valid folder path") + else: + # Space-separated words without path separators are search terms + # Return as "*" to search globally + logger.info(f"Space-separated words '{raw_path}' treated as search terms, not folder path") + resolved_paths.append("*") # Remove duplicates while preserving order seen = set() @@ -331,7 +357,7 @@ class MethodSharepoint(MethodBase): except Exception as e: logger.error(f"Error resolving pathQuery '{pathQuery}': {str(e)}") - return ["*"] # Fallback to global search + raise ValueError(f"Failed to resolve pathQuery '{pathQuery}': {str(e)}") def _parseSiteUrl(self, siteUrl: str) -> Dict[str, str]: """Parse SharePoint site URL to extract hostname and site path""" @@ -423,74 +449,28 @@ class MethodSharepoint(MethodBase): except Exception as e: logger.error(f"Error getting site ID: {str(e)}") return "" - + + @action async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult: """ - Find documents by searching their content, names, or metadata across all accessible SharePoint sites + Find documents/folders by searching their NAMES across SharePoint sites. Parameters: - connectionReference (str): Reference to the Microsoft connection - site (str, optional): SharePoint site name or hint to search within (e.g., "SSS", "KM XYZ"). If not provided, searches all accessible sites - searchQuery (str): [path:][type:][mode:]query - Enhanced search syntax: - - "budget", "/Documents:budget", "files:budget", "folders:DELTA", "*" - - "exact:\"Operations 2025\"" - exact phrase matching - - "regex:^Operations.*2025$" - regex pattern matching - - "case:DELTA" - case-sensitive search - - "and:DELTA AND 2025 Mars AND Group" - all terms must be present - - "folders:and:DELTA AND 2025 Mars AND Group" - combined options - - Site hint support: "folders:site=KM LayerFinance;name=Work" or "folders:site:KM LayerFinance Work" - - For quoted names: "folders:site=KM;name=\"page staten\"" - - For folder search: words like "part1 part2" will search for folders containing BOTH terms - Note: For storage locations, use "folders:" prefix. When searching for folders, multiple words are treated as search terms that must all appear in the folder name or path. - Site hints help narrow search to specific SharePoint sites for better accuracy. - resultDocument (str, optional): JSON result document from previous findDocumentPath action to refine search - searchScope (str, optional): Search scope - options: "all" (default), "documents" (files only), "pages" (SharePoint pages only) - maxResults (int, optional): Maximum number of results to return (default: 100) - expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description + connectionReference (str): Microsoft connection reference + site (str, optional): Site hint (e.g., "SSS", "KM XYZ") + searchQuery (str): Search query - "budget", "folders:alpha", "files:budget", "/Documents/Project1", "namepart1 namepart2 namepart3". Use "folders:" prefix when user wants to store files or find folders + maxResults (int, optional): Max results (default: 100) """ try: connectionReference = parameters.get("connectionReference") site = parameters.get("site") searchQuery = parameters.get("searchQuery", "*") - resultDocument = parameters.get("resultDocument") - searchScope = parameters.get("searchScope", "all") maxResults = parameters.get("maxResults", 100) - expectedDocumentFormats = parameters.get("expectedDocumentFormats", []) if not connectionReference: return ActionResult.isFailure(error="Connection reference is required") - # If resultDocument is provided, extract site information to refine search - if resultDocument: - try: - import json - # Resolve the reference label to get the actual document list - document_list = self.service.getChatDocumentsFromDocumentList([resultDocument]) - if not document_list or len(document_list) == 0: - return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}") - - # Get the first document's content (which should be the JSON) - first_document = document_list[0] - file_data = self.service.getFileData(first_document.fileId) - if not file_data: - return ActionResult.isFailure(error=f"No file data found for document: {resultDocument}") - - # Parse the JSON content - result_data = json.loads(file_data) - found_documents = result_data.get("foundDocuments", []) - - # Extract site information from the result for context - if found_documents: - # Use the site information from the previous search to refine current search - # This could be used to limit search to specific sites or add context - logger.info(f"Refining search using {len(found_documents)} documents from previous result") - - except json.JSONDecodeError as e: - return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}") - except Exception as e: - return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}") - # Parse searchQuery to extract path, search terms, search type, and options pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery) @@ -498,17 +478,22 @@ class MethodSharepoint(MethodBase): if not connection: return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") - # Discover all SharePoint sites accessible to the user - sites = await self._discoverSharePointSites(connection["accessToken"]) - if not sites: - return ActionResult.isFailure(error="No SharePoint sites found or accessible") - - # Filter sites by site parameter if provided + # Discover SharePoint sites - use targeted approach when site parameter is provided if site: - sites = self._filter_sites_by_hint(sites, site) + # When site parameter is provided, discover all sites first, then filter + all_sites = await self._discoverSharePointSites(connection["accessToken"]) + if not all_sites: + return ActionResult.isFailure(error="No SharePoint sites found or accessible") + + sites = self._filter_sites_by_hint(all_sites, site) logger.info(f"Filtered sites by site parameter: '{site}' -> {len(sites)} sites") if not sites: return ActionResult.isFailure(error=f"No SharePoint sites found matching '{site}'") + else: + # No site parameter - discover all sites + sites = await self._discoverSharePointSites(connection["accessToken"]) + if not sites: + return ActionResult.isFailure(error="No SharePoint sites found or accessible") # Resolve path query into search paths search_paths = self._resolvePathQuery(pathQuery) @@ -518,25 +503,8 @@ class MethodSharepoint(MethodBase): found_documents = [] all_sites_searched = [] - # Apply site hint filtering if provided in search options + # Use simple approach like test file - no complex filtering site_scoped_sites = sites - strict_folder_name: Optional[str] = None - - # First check for explicit site hint in search options - if searchOptions.get("site_hint"): - site_scoped_sites = self._filter_sites_by_hint(sites, searchOptions["site_hint"]) - logger.info(f"Filtered sites by explicit site hint: '{searchOptions['site_hint']}' -> {len(site_scoped_sites)} sites") - - # Heuristic: if user searched for folders with pattern " ", - # prefer filtering sites by the first token(s) and match folder name exactly for the last token - elif searchType == "folders" and fileQuery and ' ' in fileQuery and not searchOptions.get("regex_match"): - # treat last token as folder name, preceding tokens combined as site hint - tokens = [t for t in fileQuery.split(' ') if t] - if len(tokens) >= 2: - strict_folder_name = tokens[-1] - site_hint = ' '.join(tokens[:-1]) - site_scoped_sites = self._filter_sites_by_hint(sites, site_hint) - logger.info(f"Filtered sites by heuristic site hint: '{site_hint}' -> {len(site_scoped_sites)} sites") for site in site_scoped_sites: site_id = site["id"] @@ -554,38 +522,20 @@ class MethodSharepoint(MethodBase): # For specific queries, use different approaches based on search type if searchType == "folders": # Use Microsoft Graph unified search endpoint: POST /search/query - # Scope by all drives in the site (e.g., Shared Documents, Documents, language variants) + # This approach works reliably for finding folders try: import json - # Discover drives for the site to build precise path scopes - drives_resp = await self._makeGraphApiCall(connection["accessToken"], f"sites/{site_id}/drives") - path_filters = [] - if not ("error" in drives_resp): - for drv in (drives_resp.get("value", []) or []): - web_url = (drv.get("webUrl") or "").rstrip('/') + '/' - if web_url: - # path:"/" - path_filters.append(f"path:\"{web_url}\"") - if not path_filters: - # fallback to site root if no drives found - scoped_path = site_url.rstrip('/') + '/' - path_filters = [f"path:\"{scoped_path}\""] - - # Use KQL syntax for folder search + + # Use Microsoft Graph Search API syntax (simple term search only) terms = [t for t in fileQuery.split() if t.strip()] + if len(terms) > 1: - # Multiple terms: first search for folders containing ANY of the terms (OR) - # This broadens the search to catch all potential matches - name_terms = " OR ".join([f"foldername:*{t}*" for t in terms]) - name_filter = f"({name_terms})" + # Multiple terms: search for ALL terms (AND) - more specific results + query_string = " AND ".join(terms) else: - # Single term: search for folders containing the term - single_term = terms[0] if terms else fileQuery - name_filter = f"foldername:*{single_term}*" - - # Use KQL syntax with isFolder:true - query_string = f"isFolder:true AND {name_filter}" - logger.info(f"Using KQL query: {query_string}") + # Single term: search for the term + query_string = terms[0] if terms else fileQuery + logger.info(f"Using search query for folders: {query_string}") payload = { "requests": [ @@ -598,14 +548,15 @@ class MethodSharepoint(MethodBase): ] } logger.info(f"Using unified search API for folders with queryString: {query_string}") - logger.info(f"Payload: {json.dumps(payload, indent=2)}") + + # Use global search endpoint (site-specific search not available) unified_result = await self._makeGraphApiCall( connection["accessToken"], "search/query", method="POST", data=json.dumps(payload).encode("utf-8") ) - logger.info(f"Unified search response: {json.dumps(unified_result, indent=2)}") + if "error" in unified_result: logger.warning(f"Unified search failed for site {site_name}: {unified_result['error']}") items = [] @@ -618,18 +569,32 @@ class MethodSharepoint(MethodBase): resource = hit.get("resource") if resource: items.append(resource) - logger.info(f"Unified search returned {len(items)} items (pre-filter)") - - # Post-filter: For multiple terms, filter results to only include folders that contain ALL terms - if len(terms) > 1: - filtered_items = [] + + logger.info(f"Unified search returned {len(items)} items (pre-filter)") + + # Apply our improved folder detection logic + folder_items = [] for item in items: - folder_name = item.get("name", "").lower() - # Check if folder name contains ALL search terms - if all(term.lower() in folder_name for term in terms): - filtered_items.append(item) - items = filtered_items - logger.info(f"Post-filtered to {len(items)} items containing all terms: {terms}") + resource = item + + # Use the same detection logic as our test + is_folder = False + if 'folder' in resource: + is_folder = True + else: + # Try to detect by URL pattern or other indicators + web_url = resource.get('webUrl', '') + name = resource.get('name', '') + + # Check if URL has no file extension and looks like a folder path + if '.' not in name and ('/' in web_url or '\\' in web_url): + is_folder = True + + if is_folder: + folder_items.append(item) + + items = folder_items + logger.info(f"Filtered to {len(items)} folders using improved detection logic") except Exception as e: logger.error(f"Error performing unified folder search: {str(e)}") @@ -652,79 +617,60 @@ class MethodSharepoint(MethodBase): for item in items: item_name = item.get("name", "") - item_type = "folder" if "folder" in item else "file" + + # Use improved folder detection logic + is_folder = False + if 'folder' in item: + is_folder = True + else: + # Try to detect by URL pattern or other indicators + web_url = item.get('webUrl', '') + name = item.get('name', '') + + # Check if URL has no file extension and looks like a folder path + if '.' not in name and ('/' in web_url or '\\' in web_url): + is_folder = True + + item_type = "folder" if is_folder else "file" item_path = item.get("parentReference", {}).get("path", "") logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'") - # Filter by search scope if specified - if searchScope == "documents" and "folder" in item: - logger.debug(f"Skipping folder '{item_name}' due to documents scope") - continue - elif searchScope == "pages" and "file" in item and not item["file"].get("mimeType", "").startswith("text/html"): - logger.debug(f"Skipping file '{item_name}' due to pages scope") - continue + # Simple filtering like test file - just check search type + if searchType == "files" and is_folder: + continue # Skip folders when searching for files + elif searchType == "folders" and not is_folder: + continue # Skip files when searching for folders - # Filter by search type (files, folders, all) - if searchType == "files" and "folder" in item: - logger.debug(f"Skipping folder '{item_name}' due to files search type") - continue - elif searchType == "folders" and "file" in item: - logger.debug(f"Skipping file '{item_name}' due to folders search type") - continue - - # Enhanced post-filtering based on search options - if fileQuery != "*" and fileQuery.strip() and searchType != "folders": - # For non-folder searches, apply name filtering - # (Folder searches are already filtered by the recursive search) - search_target = item_name - - # Apply different filtering based on search options - if searchOptions.get("exact_match"): - # Exact phrase matching - if searchOptions.get("case_sensitive"): - if fileQuery not in search_target: - continue - else: - if fileQuery.lower() not in search_target.lower(): - continue - elif searchOptions.get("regex_match"): - # Regex pattern matching - import re - flags = 0 if searchOptions.get("case_sensitive") else re.IGNORECASE - if not re.search(fileQuery, search_target, flags): - continue - elif searchOptions.get("and_terms"): - # AND terms mode: Split by " AND " and ensure ALL terms are present - search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target - and_terms = [term.strip() for term in fileQuery.split(" AND ") if term.strip()] - and_terms = [term.lower() if not searchOptions.get("case_sensitive") else term for term in and_terms] - if not all(term in search_name for term in and_terms): - continue # Skip this item if not all AND terms match - else: - # Default: ALL search terms must be present (space-separated) - search_name = search_target.lower() if not searchOptions.get("case_sensitive") else search_target - search_terms = [term.strip().lower() if not searchOptions.get("case_sensitive") else term.strip() - for term in fileQuery.split() if term.strip()] - if not all(term in search_name for term in search_terms): - continue # Skip this item if not all terms match - - # If strict folder name requested, enforce exact (case-insensitive) match on folders - if strict_folder_name: - item_is_folder = "folder" in item - item_name_ci = (item.get("name") or "").strip().lower() - if item_is_folder and item_name_ci != strict_folder_name.lower(): - logger.debug(f"Skipping folder '{item_name}' - doesn't match strict name '{strict_folder_name}'") - continue - - logger.debug(f"Item '{item_name}' passed all filters - adding to results") + # Simple approach like test file - no complex filtering + logger.debug(f"Item '{item_name}' found - adding to results") - # Create minimal result with only essential reference information + # Create result with full path information for proper action chaining + web_url = item.get("webUrl", "") + parent_path = item.get("parentReference", {}).get("path", "") + + # Extract the full SharePoint path from webUrl or parentReference + full_path = "" + if web_url: + # Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung + if '/sites/' in web_url: + path_part = web_url.split('/sites/')[1] + # Decode URL encoding and convert to backslash format + import urllib.parse + decoded_path = urllib.parse.unquote(path_part) + full_path = "\\" + decoded_path.replace('/', '\\') + elif parent_path: + # Use parentReference path if available + full_path = parent_path.replace('/', '\\') + doc_info = { "id": item.get("id"), "name": item.get("name"), - "type": "folder" if "folder" in item else "file", + "type": "folder" if is_folder else "file", "siteName": site_name, - "siteId": site_id + "siteId": site_id, + "webUrl": web_url, + "fullPath": full_path, + "parentPath": parent_path } site_documents.append(doc_info) @@ -756,18 +702,10 @@ class MethodSharepoint(MethodBase): logger.error(f"Error searching SharePoint: {str(e)}") return ActionResult.isFailure(error=str(e)) - # Determine output format based on expected formats + # Use default JSON format for output output_extension = ".json" # Default output_mime_type = "application/json" # Default - if expectedDocumentFormats and len(expectedDocumentFormats) > 0: - # Use the first expected format - expected_format = expectedDocumentFormats[0] - output_extension = expected_format.get("extension", ".json") - output_mime_type = expected_format.get("mimeType", "application/json") - logger.info(f"Using expected format: {output_extension} ({output_mime_type})") - else: - logger.info("No expected format specified, using default .json format") return ActionResult( success=True, @@ -792,36 +730,37 @@ class MethodSharepoint(MethodBase): Parameters: documentList (str): Reference to the document list to read connectionReference (str): Reference to the Microsoft connection - pathQuery (str): Path query to locate documents (e.g., "/Documents/Project1", "*" for all sites) - resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to pathQuery) + pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action + pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites) includeMetadata (bool, optional): Whether to include metadata (default: True) - expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description """ try: documentList = parameters.get("documentList") connectionReference = parameters.get("connectionReference") pathQuery = parameters.get("pathQuery", "*") - resultDocument = parameters.get("resultDocument") + pathObject = parameters.get("pathObject") includeMetadata = parameters.get("includeMetadata", True) - expectedDocumentFormats = parameters.get("expectedDocumentFormats", []) if not documentList or not connectionReference: return ActionResult.isFailure(error="Document list reference and connection reference are required") - # If resultDocument is provided, extract folder IDs from it - if resultDocument: + # If pathObject is provided, extract folder IDs from it + # Note: pathObject takes precedence over pathQuery when both are provided + if pathObject: + if pathQuery and pathQuery != "*": + logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)") try: import json # Resolve the reference label to get the actual document list - document_list = self.service.getChatDocumentsFromDocumentList([resultDocument]) + document_list = self.service.getChatDocumentsFromDocumentList([pathObject]) if not document_list or len(document_list) == 0: - return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}") + return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}") # Get the first document's content (which should be the JSON) first_document = document_list[0] file_data = self.service.getFileData(first_document.fileId) if not file_data: - return ActionResult.isFailure(error=f"No file data found for document: {resultDocument}") + return ActionResult.isFailure(error=f"No file data found for document: {pathObject}") # Parse the JSON content result_data = json.loads(file_data) @@ -836,14 +775,14 @@ class MethodSharepoint(MethodBase): if folder_ids: # Use the first folder ID found as pathQuery pathQuery = folder_ids[0] - logger.info(f"Using folder ID from resultDocument: {pathQuery}") + logger.info(f"Using folder ID from pathObject: {pathQuery}") else: - return ActionResult.isFailure(error="No folders found in resultDocument") + return ActionResult.isFailure(error="No folders found in pathObject") except json.JSONDecodeError as e: - return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}") + return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}") except Exception as e: - return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}") + return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}") # Get documents from reference - ensure documentList is a list, not a string if isinstance(documentList, str): @@ -857,10 +796,58 @@ class MethodSharepoint(MethodBase): if not connection: return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") - # Discover all SharePoint sites accessible to the user - sites = await self._discoverSharePointSites(connection["accessToken"]) + # Determine sites to use - strict validation: pathObject → pathQuery → ERROR + sites = None + + # Step 1: Check pathObject first + if pathObject: + # When pathObject is provided, we should have specific site information + # Extract site information from the pathObject result + try: + # Get the site information from the first folder in pathObject + if 'found_documents' in locals() and found_documents: + first_folder = found_documents[0] + site_name = first_folder.get("siteName") + site_id = first_folder.get("siteId") + + if site_name and site_id: + # Use the specific site from pathObject instead of discovering all sites + sites = [{ + "id": site_id, + "displayName": site_name, + "webUrl": first_folder.get("webUrl", "") + }] + logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})") + else: + # Site info missing from pathObject - this is an error + return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.") + else: + # No documents found in pathObject - this is an error + return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for read operation.") + except Exception as e: + # Error processing pathObject - this is an error + return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for read operation.") + + # Step 2: If no pathObject, check pathQuery + elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": + # Validate pathQuery format + if not pathQuery.startswith('/'): + return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work") + + # Check if pathQuery contains search terms (words without proper path structure) + if not pathQuery.startswith('/site:') and not pathQuery.startswith('/Documents') and not pathQuery.startswith('/Shared Documents'): + return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.") + + # For pathQuery, we need to discover sites to find the specific one + sites = await self._discoverSharePointSites(connection["accessToken"]) + if not sites: + return ActionResult.isFailure(error="No SharePoint sites found or accessible") + else: + # Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK + return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.") + if not sites: - return ActionResult.isFailure(error="No SharePoint sites found or accessible") + return ActionResult.isFailure(error="No valid target site determined for read operation") # Resolve path query into search paths search_paths = self._resolvePathQuery(pathQuery) @@ -988,18 +975,10 @@ class MethodSharepoint(MethodBase): "timestamp": get_utc_timestamp() } - # Determine output format based on expected formats + # Use default JSON format for output output_extension = ".json" # Default output_mime_type = "application/json" # Default - if expectedDocumentFormats and len(expectedDocumentFormats) > 0: - # Use the first expected format - expected_format = expectedDocumentFormats[0] - output_extension = expected_format.get("extension", ".json") - output_mime_type = expected_format.get("mimeType", "application/json") - logger.info(f"Using expected format: {output_extension} ({output_mime_type})") - else: - logger.info("No expected format specified, using default .json format") return ActionResult( success=True, @@ -1025,61 +1004,121 @@ class MethodSharepoint(MethodBase): Parameters: connectionReference (str): Reference to the Microsoft connection - sitePath (str): REQUIRED - Specific SharePoint path where to upload documents. Must be a valid SharePoint path format: - - For direct upload: "/site://" (e.g., "/site:KM XYZ/Documents/Work") - - If user provides words like "word1 word2", the system MUST call findDocumentPath first to locate the actual folder path, the result then to give to this parameter + pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action + pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites) documentList (str): Reference to the document list to upload fileNames (List[str]): List of names for the uploaded files - resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to sitePath) - expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description """ try: connectionReference = parameters.get("connectionReference") - sitePath = parameters.get("sitePath", "/Documents") + pathQuery = parameters.get("pathQuery") documentList = parameters.get("documentList") fileNames = parameters.get("fileNames") - resultDocument = parameters.get("resultDocument") - expectedDocumentFormats = parameters.get("expectedDocumentFormats", []) + pathObject = parameters.get("pathObject") + + upload_path = pathQuery + logger.debug(f"Using pathQuery: {pathQuery}") if not connectionReference or not documentList or not fileNames: return ActionResult.isFailure(error="Connection reference, document list, and file names are required") - # If resultDocument is provided, extract folder IDs from it - if resultDocument: + # If pathObject is provided, extract folder IDs from it + if pathObject: try: import json # Resolve the reference label to get the actual document list - document_list = self.service.getChatDocumentsFromDocumentList([resultDocument]) + document_list = self.service.getChatDocumentsFromDocumentList([pathObject]) if not document_list or len(document_list) == 0: - return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}") + return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}") # Get the first document's content (which should be the JSON) first_document = document_list[0] file_data = self.service.getFileData(first_document.fileId) if not file_data: - return ActionResult.isFailure(error=f"No file data found for document: {resultDocument}") + return ActionResult.isFailure(error=f"No file data found for document: {pathObject}") # Parse the JSON content result_data = json.loads(file_data) - found_documents = result_data.get("foundDocuments", []) - # Extract folder IDs from the result - folder_ids = [] + # Debug: Log the structure of the result document + logger.info(f"Result document keys: {list(result_data.keys())}") + + # Handle different result document formats + found_documents = [] + + # Check if it's a direct SharePoint result (has foundDocuments) + if "foundDocuments" in result_data: + found_documents = result_data.get("foundDocuments", []) + logger.info(f"Found {len(found_documents)} documents in foundDocuments array") + # Check if it's an AI validation result (has result string with validationReport) + elif "result" in result_data and "validationReport" in result_data["result"]: + try: + # Parse the nested JSON in the result field + nested_result = json.loads(result_data["result"]) + validation_report = nested_result.get("validationReport", {}) + document_details = validation_report.get("documentDetails", {}) + + if document_details: + # Convert the single document details to the expected format + doc = { + "id": document_details.get("id"), + "name": document_details.get("name"), + "type": document_details.get("type", "").lower(), # Convert "Folder" to "folder" + "siteName": document_details.get("siteName"), + "siteId": document_details.get("siteId"), + "fullPath": document_details.get("fullPath"), + "webUrl": document_details.get("webUrl", ""), + "parentPath": document_details.get("parentPath", "") + } + found_documents = [doc] + logger.info(f"Extracted 1 document from validation report") + except json.JSONDecodeError as e: + logger.error(f"Failed to parse nested JSON in result field: {e}") + return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}") + + # Debug: Log what we found in the result document + logger.info(f"Result document contains {len(found_documents)} documents") + for i, doc in enumerate(found_documents): + logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'") + + # Extract folder information from the result + folders = [] for doc in found_documents: if doc.get("type") == "folder": - folder_ids.append(doc.get("id")) + folders.append(doc) - if folder_ids: - # Use the first folder ID found as sitePath - sitePath = folder_ids[0] - logger.info(f"Using folder ID from resultDocument: {sitePath}") + logger.info(f"Found {len(folders)} folders in result document") + + if folders: + # Use the first folder found - prefer folder ID for direct API calls + first_folder = folders[0] + if first_folder.get("id"): + # Use folder ID directly for most reliable API calls + upload_path = first_folder.get("id") + logger.info(f"Using folder ID from pathObject: {upload_path}") + elif first_folder.get("fullPath"): + # Extract the correct path portion from fullPath by removing site name + full_path = first_folder.get("fullPath") + # fullPath format: \\SiteName\\Library\\Folder\\SubFolder + # We need to remove the first two parts (\\SiteName\\) to get the actual folder path + path_parts = full_path.lstrip('\\').split('\\') + if len(path_parts) > 1: + # Remove the first part (site name) and reconstruct the path + actual_path = '\\'.join(path_parts[1:]) + upload_path = actual_path + logger.info(f"Extracted path from fullPath: {upload_path}") + else: + upload_path = full_path + logger.info(f"Using full path from pathObject (no site name to remove): {upload_path}") + else: + return ActionResult.isFailure(error="No valid folder information found in pathObject") else: - return ActionResult.isFailure(error="No folders found in resultDocument") + return ActionResult.isFailure(error="No folders found in pathObject") except json.JSONDecodeError as e: - return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}") + return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}") except Exception as e: - return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}") + return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}") # Get Microsoft connection connection = self._getMicrosoftConnection(connectionReference) @@ -1093,39 +1132,85 @@ class MethodSharepoint(MethodBase): if not chatDocuments: return ActionResult.isFailure(error="No documents found for the provided reference") - # Discover all SharePoint sites accessible to the user - sites = await self._discoverSharePointSites(connection["accessToken"]) + # Determine sites to use based on whether pathObject was provided + sites = None + if pathObject: + # When pathObject is provided, we should have specific site information + # Extract site information from the pathObject result + try: + # Get the site information from the first folder in pathObject + if 'found_documents' in locals() and found_documents: + first_folder = found_documents[0] + site_name = first_folder.get("siteName") + site_id = first_folder.get("siteId") + + if site_name and site_id: + # Use the specific site from pathObject instead of discovering all sites + sites = [{ + "id": site_id, + "displayName": site_name, + "webUrl": first_folder.get("webUrl", "") + }] + logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})") + else: + # Site info missing from pathObject - this is an error, not a fallback + return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for upload.") + else: + # No documents found in pathObject - this is an error + return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for upload.") + except Exception as e: + # Error processing pathObject - this is an error, not a fallback + return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for upload.") + else: + # No pathObject provided - check if pathQuery is valid + if not upload_path or upload_path.strip() == "" or upload_path.strip() == "*": + return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.") + + # Validate pathQuery format + if not upload_path.startswith('/'): + return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work") + + # Check if upload_path contains search terms (words without proper path structure) + if not upload_path.startswith('/site:') and not upload_path.startswith('/Documents') and not upload_path.startswith('/Shared Documents'): + return ActionResult.isFailure(error=f"Invalid pathQuery '{upload_path}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.") + + # For pathQuery, we need to discover sites to find the specific one + sites = await self._discoverSharePointSites(connection["accessToken"]) + if not sites: + return ActionResult.isFailure(error="No SharePoint sites found or accessible") + if not sites: - return ActionResult.isFailure(error="No SharePoint sites found or accessible") + return ActionResult.isFailure(error="No valid target site determined for upload") - # Enforce site-scoped path usage when using sitePath directly (without resultDocument) + # Process upload paths based on whether pathObject was provided upload_site_scope = None - if not resultDocument: - if not sitePath or not sitePath.startswith('/'): - return ActionResult.isFailure(error="sitePath must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work") - - # Check if sitePath contains search terms (words without proper path structure) - if not sitePath.startswith('/site:') and not sitePath.startswith('/Documents') and not sitePath.startswith('/Shared Documents'): - # This looks like search terms, not a valid path - return ActionResult.isFailure(error=f"Invalid sitePath '{sitePath}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as sitePath.") - - parsed = self._parse_site_scoped_path(sitePath) + if not pathObject: + # Parse the validated pathQuery to extract site and path information + parsed = self._parse_site_scoped_path(upload_path) if not parsed: - return ActionResult.isFailure(error="Invalid sitePath. Use /site:/") - # find matching site + return ActionResult.isFailure(error="Invalid upload_path. Use /site:/") + + # Find matching site candidate_sites = self._filter_sites_by_hint(sites, parsed["siteName"]) # substring match - # choose exact displayName match if available + # Choose exact displayName match if available exact = [s for s in candidate_sites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()] selected_site = exact[0] if exact else (candidate_sites[0] if candidate_sites else None) if not selected_site: return ActionResult.isFailure(error=f"SharePoint site '{parsed['siteName']}' not found or not accessible") + upload_site_scope = selected_site # Use the inner path portion as the actual upload target path upload_paths = [f"/{parsed['innerPath'].lstrip('/')}"] sites = [selected_site] else: - # Resolve path query into upload paths (fallback behavior when using resultDocument) - upload_paths = self._resolvePathQuery(sitePath) + # When using pathObject, check if upload_path is a folder ID or a path + if upload_path.startswith('01PPXICCB') or upload_path.startswith('01'): + # It's a folder ID - use it directly + upload_paths = [upload_path] + logger.info(f"Using folder ID directly for upload: {upload_path}") + else: + # It's a path - resolve it normally + upload_paths = self._resolvePathQuery(upload_path) # Process each document upload upload_results = [] @@ -1155,12 +1240,26 @@ class MethodSharepoint(MethodBase): # Use the first upload path or default to Documents upload_path = upload_paths[0] if upload_paths else "/Documents" - upload_path = upload_path.rstrip('/') + '/' + fileName - upload_path_clean = upload_path.lstrip('/') + + # Handle wildcard paths - replace with default Documents folder + if upload_path == "*": + upload_path = "/Documents" + logger.warning(f"Wildcard path '*' detected, using default '/Documents' folder for upload") + + # Check if upload_path is a folder ID or a regular path + if upload_path.startswith('01PPXICCB') or upload_path.startswith('01'): + # It's a folder ID - use the folder-specific upload endpoint + upload_endpoint = f"sites/{site_id}/drive/items/{upload_path}:/{fileName}:/content" + logger.info(f"Using folder ID upload endpoint: {upload_endpoint}") + else: + # It's a regular path - use the root-based upload endpoint + upload_path = upload_path.rstrip('/') + '/' + fileName + upload_path_clean = upload_path.lstrip('/') + upload_endpoint = f"sites/{site_id}/drive/root:/{upload_path_clean}:/content" + logger.info(f"Using path-based upload endpoint: {upload_endpoint}") # Upload endpoint for small files (< 4MB) if len(file_data) < 4 * 1024 * 1024: # 4MB - upload_endpoint = f"sites/{site_id}/drive/root:/{upload_path_clean}:/content" # Upload the file upload_result = await self._makeGraphApiCall( @@ -1178,6 +1277,7 @@ class MethodSharepoint(MethodBase): "siteName": site_name, "siteUrl": site_url, "uploadPath": upload_path, + "uploadEndpoint": upload_endpoint, "sharepointFileId": upload_result.get("id"), "webUrl": upload_result.get("webUrl"), "size": upload_result.get("size"), @@ -1212,7 +1312,7 @@ class MethodSharepoint(MethodBase): # Create result data result_data = { "connectionReference": connectionReference, - "sitePath": sitePath, + "pathQuery": upload_path, "documentList": documentList, "fileNames": fileNames, "sitesAvailable": len(sites), @@ -1225,18 +1325,10 @@ class MethodSharepoint(MethodBase): "timestamp": get_utc_timestamp() } - # Determine output format based on expected formats + # Use default JSON format for output output_extension = ".json" # Default output_mime_type = "application/json" # Default - if expectedDocumentFormats and len(expectedDocumentFormats) > 0: - # Use the first expected format - expected_format = expectedDocumentFormats[0] - output_extension = expected_format.get("extension", ".json") - output_mime_type = expected_format.get("mimeType", "application/json") - logger.info(f"Using expected format: {output_extension} ({output_mime_type})") - else: - logger.info("No expected format specified, using default .json format") return ActionResult( success=True, @@ -1263,82 +1355,194 @@ class MethodSharepoint(MethodBase): Parameters: connectionReference (str): Reference to the Microsoft connection - searchQuery (str): [path:][type:][mode:]query - "Test Plan", "folders:Test Plan", "/Documents", "*" - Note: Use "folders:Name" to search for folders anywhere, not "path:/Name" which looks only in root - resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to searchQuery) + pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action + pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites) includeSubfolders (bool, optional): Whether to include subfolders (default: False) - expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description """ try: connectionReference = parameters.get("connectionReference") - searchQuery = parameters.get("searchQuery", "*") - resultDocument = parameters.get("resultDocument") + pathObject = parameters.get("pathObject") + pathQuery = parameters.get("pathQuery") includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX - expectedDocumentFormats = parameters.get("expectedDocumentFormats", []) + + list_query = pathQuery + logger.info(f"Using pathQuery: {pathQuery}") if not connectionReference: return ActionResult.isFailure(error="Connection reference is required") - # If resultDocument is provided, resolve the reference and extract folder IDs from it - if resultDocument: + # If pathObject is provided, resolve the reference and extract folder IDs from it + # Note: pathObject takes precedence over pathQuery when both are provided + if pathObject: + if pathQuery and pathQuery != "*": + logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)") try: import json # Resolve the reference label to get the actual document list - document_list = self.service.getChatDocumentsFromDocumentList([resultDocument]) + document_list = self.service.getChatDocumentsFromDocumentList([pathObject]) if not document_list or len(document_list) == 0: - return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}") + return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}") # Get the first document's content (which should be the JSON) first_document = document_list[0] logger.info(f"Document fileId: {first_document.fileId}, fileName: {first_document.fileName}") file_data = self.service.getFileData(first_document.fileId) if not file_data: - return ActionResult.isFailure(error=f"No file data found for document: {resultDocument} (fileId: {first_document.fileId})") + return ActionResult.isFailure(error=f"No file data found for document: {pathObject} (fileId: {first_document.fileId})") logger.info(f"File data length: {len(file_data) if file_data else 0}") # Parse the JSON content result_data = json.loads(file_data) - found_documents = result_data.get("foundDocuments", []) - # Extract folder IDs from the result - folder_ids = [] + # Debug: Log the structure of the result document + logger.info(f"Result document keys: {list(result_data.keys())}") + + # Handle different result document formats + found_documents = [] + + # Check if it's a direct SharePoint result (has foundDocuments) + if "foundDocuments" in result_data: + found_documents = result_data.get("foundDocuments", []) + logger.info(f"Found {len(found_documents)} documents in foundDocuments array") + # Check if it's an AI validation result (has result string with validationReport) + elif "result" in result_data and "validationReport" in result_data["result"]: + try: + # Parse the nested JSON in the result field + nested_result = json.loads(result_data["result"]) + validation_report = nested_result.get("validationReport", {}) + document_details = validation_report.get("documentDetails", {}) + + if document_details: + # Convert the single document details to the expected format + doc = { + "id": document_details.get("id"), + "name": document_details.get("name"), + "type": document_details.get("type", "").lower(), # Convert "Folder" to "folder" + "siteName": document_details.get("siteName"), + "siteId": document_details.get("siteId"), + "fullPath": document_details.get("fullPath"), + "webUrl": document_details.get("webUrl", ""), + "parentPath": document_details.get("parentPath", "") + } + found_documents = [doc] + logger.info(f"Extracted 1 document from validation report") + except json.JSONDecodeError as e: + logger.error(f"Failed to parse nested JSON in result field: {e}") + return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}") + + # Debug: Log what we found in the result document + logger.info(f"Result document contains {len(found_documents)} documents") + for i, doc in enumerate(found_documents): + logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'") + + # Extract folder information from the result + folders = [] for doc in found_documents: if doc.get("type") == "folder": - folder_ids.append(doc.get("id")) + folders.append(doc) - if folder_ids: - # Use the first folder ID found - searchQuery = folder_ids[0] - logger.info(f"Using folder ID from resultDocument: {searchQuery}") + logger.info(f"Found {len(folders)} folders in result document") + + if folders: + # Use the first folder found - prefer folder ID for direct API calls + first_folder = folders[0] + if first_folder.get("id"): + # Use folder ID directly for most reliable API calls + list_query = first_folder.get("id") + logger.info(f"Using folder ID from pathObject: {list_query}") + elif first_folder.get("fullPath"): + # Extract the correct path portion from fullPath by removing site name + full_path = first_folder.get("fullPath") + # fullPath format: \\SiteName\\Library\\Folder\\SubFolder + # We need to remove the first two parts (\\SiteName\\) to get the actual folder path + path_parts = full_path.lstrip('\\').split('\\') + if len(path_parts) > 1: + # Remove the first part (site name) and reconstruct the path + actual_path = '\\'.join(path_parts[1:]) + list_query = actual_path + logger.info(f"Extracted path from fullPath: {list_query}") + else: + list_query = full_path + logger.info(f"Using full path from pathObject (no site name to remove): {list_query}") + else: + return ActionResult.isFailure(error="No valid folder information found in pathObject") else: - return ActionResult.isFailure(error="No folders found in resultDocument") + return ActionResult.isFailure(error="No folders found in pathObject") except json.JSONDecodeError as e: - return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}") + return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}") except Exception as e: - return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}") + return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}") # Get Microsoft connection connection = self._getMicrosoftConnection(connectionReference) if not connection: return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") - logger.info(f"Starting SharePoint listDocuments for searchQuery: {searchQuery}") + logger.info(f"Starting SharePoint listDocuments for list_query: {list_query}") logger.debug(f"Connection ID: {connection['id']}") - # Parse searchQuery to extract path, search terms, search type, and options - pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery) + # Parse list_query to extract path, search terms, search type, and options + pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(list_query) + + # Determine sites to use - strict validation: pathObject → pathQuery → ERROR + sites = None + + # Step 1: Check pathObject first + if pathObject: + # When pathObject is provided, we should have specific site information + # Extract site information from the pathObject result + try: + # Get the site information from the first folder in pathObject + if 'found_documents' in locals() and found_documents: + first_folder = found_documents[0] + site_name = first_folder.get("siteName") + site_id = first_folder.get("siteId") + + if site_name and site_id: + # Use the specific site from pathObject instead of discovering all sites + sites = [{ + "id": site_id, + "displayName": site_name, + "webUrl": first_folder.get("webUrl", "") + }] + logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})") + else: + # Site info missing from pathObject - this is an error + return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for list operation.") + else: + # No documents found in pathObject - this is an error + return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for list operation.") + except Exception as e: + # Error processing pathObject - this is an error + return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for list operation.") + + # Step 2: If no pathObject, check pathQuery + elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": + # Validate pathQuery format + if not pathQuery.startswith('/'): + return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work") + + # Check if pathQuery contains search terms (words without proper path structure) + if not pathQuery.startswith('/site:') and not pathQuery.startswith('/Documents') and not pathQuery.startswith('/Shared Documents'): + return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.") + + # For pathQuery, we need to discover sites to find the specific one + sites = await self._discoverSharePointSites(connection["accessToken"]) + if not sites: + return ActionResult.isFailure(error="No SharePoint sites found or accessible") + else: + # Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK + return ActionResult.isFailure(error="No valid list path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.") - # Discover all SharePoint sites accessible to the user - sites = await self._discoverSharePointSites(connection["accessToken"]) if not sites: - return ActionResult.isFailure(error="No SharePoint sites found or accessible") + return ActionResult.isFailure(error="No valid target site determined for list operation") - # Check if searchQuery is a folder ID (starts with 01PPXICCB...) - if searchQuery.startswith('01PPXICCB') or searchQuery.startswith('01'): + # Check if list_query is a folder ID (starts with 01PPXICCB...) + if list_query.startswith('01PPXICCB') or list_query.startswith('01'): # Direct folder ID - use it directly - folder_paths = [searchQuery] - logger.info(f"Using direct folder ID: {searchQuery}") + folder_paths = [list_query] + logger.info(f"Using direct folder ID: {list_query}") else: # Resolve path query into folder paths folder_paths = self._resolvePathQuery(pathQuery) @@ -1382,6 +1586,19 @@ class MethodSharepoint(MethodBase): processed_items = [] for item in items: + # Use improved folder detection logic + is_folder = False + if 'folder' in item: + is_folder = True + else: + # Try to detect by URL pattern or other indicators + web_url = item.get('webUrl', '') + name = item.get('name', '') + + # Check if URL has no file extension and looks like a folder path + if '.' not in name and ('/' in web_url or '\\' in web_url): + is_folder = True + item_info = { "id": item.get("id"), "name": item.get("name"), @@ -1389,7 +1606,7 @@ class MethodSharepoint(MethodBase): "createdDateTime": item.get("createdDateTime"), "lastModifiedDateTime": item.get("lastModifiedDateTime"), "webUrl": item.get("webUrl"), - "type": "folder" if "folder" in item else "file", + "type": "folder" if is_folder else "file", "siteName": site_name, "siteUrl": site_url } @@ -1411,7 +1628,8 @@ class MethodSharepoint(MethodBase): # If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only) if includeSubfolders: - logger.info(f"Including subfolders - processing {len([item for item in processed_items if item['type'] == 'folder'])} folders") + folder_items = [item for item in processed_items if item['type'] == 'folder'] + logger.info(f"Including subfolders - processing {len(folder_items)} folders") subfolder_count = 0 max_subfolders = 10 # Limit to prevent infinite loops @@ -1428,6 +1646,19 @@ class MethodSharepoint(MethodBase): logger.debug(f"Found {len(subfolder_items)} items in subfolder {item['name']}") for subfolder_item in subfolder_items: + # Use improved folder detection logic for subfolder items + subfolder_is_folder = False + if 'folder' in subfolder_item: + subfolder_is_folder = True + else: + # Try to detect by URL pattern or other indicators + subfolder_web_url = subfolder_item.get('webUrl', '') + subfolder_name = subfolder_item.get('name', '') + + # Check if URL has no file extension and looks like a folder path + if '.' not in subfolder_name and ('/' in subfolder_web_url or '\\' in subfolder_web_url): + subfolder_is_folder = True + # Only add files and direct subfolders, NO RECURSION subfolder_item_info = { "id": subfolder_item.get("id"), @@ -1436,7 +1667,7 @@ class MethodSharepoint(MethodBase): "createdDateTime": subfolder_item.get("createdDateTime"), "lastModifiedDateTime": subfolder_item.get("lastModifiedDateTime"), "webUrl": subfolder_item.get("webUrl"), - "type": "folder" if "folder" in subfolder_item else "file", + "type": "folder" if subfolder_is_folder else "file", "parentPath": subfolder_path, "siteName": site_name, "siteUrl": site_url @@ -1480,25 +1711,17 @@ class MethodSharepoint(MethodBase): # Create result data result_data = { - "searchQuery": searchQuery, + "pathQuery": list_query, "includeSubfolders": includeSubfolders, "sitesSearched": len(sites), "listResults": list_results, "timestamp": get_utc_timestamp() } - # Determine output format based on expected formats + # Use default JSON format for output output_extension = ".json" # Default output_mime_type = "application/json" # Default - if expectedDocumentFormats and len(expectedDocumentFormats) > 0: - # Use the first expected format - expected_format = expectedDocumentFormats[0] - output_extension = expected_format.get("extension", ".json") - output_mime_type = expected_format.get("mimeType", "application/json") - logger.info(f"Using expected format: {output_extension} ({output_mime_type})") - else: - logger.info("No expected format specified, using default .json format") return ActionResult( success=True, diff --git a/test_graph_search.py b/test_graph_search.py new file mode 100644 index 00000000..981aa778 --- /dev/null +++ b/test_graph_search.py @@ -0,0 +1,311 @@ +#!/usr/bin/env python3 +""" +Simple test script for Microsoft Graph Search API +Tests folder search queries directly +""" + +import requests +import json +import sys +import os + +# Add the gateway modules to the path +sys.path.append(os.path.dirname(os.path.abspath(__file__))) + +def test_graph_folders_direct(access_token): + """Test direct Microsoft Graph API call to list folders""" + print("🔍 Testing direct Graph API folder listing...") + + # Try to list folders from the main site - need to get site ID first + # Let's try to find the site by name first + url = "https://graph.microsoft.com/v1.0/sites/pcuster.sharepoint.com:/sites/SSSRESYNachfolge:/drive/root/children" + + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json" + } + + try: + response = requests.get(url, headers=headers) + + if response.status_code == 200: + data = response.json() + items = data.get('value', []) + print(f"✅ SUCCESS - Found {len(items)} items in root") + + folders = [] + files = [] + + for item in items: + if 'folder' in item: + folders.append(item) + elif 'file' in item: + files.append(item) + + print(f" 📁 Folders: {len(folders)}") + print(f" 📄 Files: {len(files)}") + + if folders: + print("\n📁 FOLDERS found:") + for i, folder in enumerate(folders[:5], 1): + name = folder.get('name', 'No name') + web_url = folder.get('webUrl', 'No URL') + print(f" {i}. {name}") + print(f" URL: {web_url}") + print() + + else: + print(f"❌ ERROR - Status {response.status_code}") + print(f"Error: {response.text[:200]}") + + except Exception as e: + print(f"Exception: {str(e)}") + +def test_graph_search(access_token, query_string): + """Test a Microsoft Graph Search API query and show resulting paths""" + + url = "https://graph.microsoft.com/v1.0/search/query" + + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json" + } + + payload = { + "requests": [ + { + "entityTypes": ["driveItem"], + "query": { + "queryString": query_string + }, + "from": 0, + "size": 50 + } + ] + } + + print(f"Testing: {query_string}") + print("-" * 50) + + try: + response = requests.post(url, headers=headers, json=payload) + + if response.status_code == 200: + data = response.json() + + # Extract useful info + if "value" in data and len(data["value"]) > 0: + hits = data["value"][0].get("hitsContainers", []) + if hits: + total = hits[0].get("total", 0) + results = hits[0].get("hits", []) + print(f"✅ SUCCESS - Found {total} results") + + # First, let's see what types of results we're getting + print(f"📊 Analyzing {len(results)} results...") + + # Count different types of results with better detection + file_count = 0 + folder_count = 0 + other_count = 0 + + # Debug: Let's see what the actual resource structure looks like + if results: + print("🔍 DEBUG: First result structure:") + first_result = results[0] + print(f" Keys: {list(first_result.keys())}") + if 'resource' in first_result: + resource = first_result['resource'] + print(f" Resource keys: {list(resource.keys())}") + if 'folder' in resource: + print(f" Folder info: {resource['folder']}") + if 'file' in resource: + print(f" File info: {resource['file']}") + print() + + for result in results: + resource = result.get('resource', {}) + + # Better detection logic + is_folder = False + is_file = False + + # Check for explicit folder/file indicators + if 'folder' in resource: + is_folder = True + elif 'file' in resource: + is_file = True + else: + # Try to detect by URL pattern or other indicators + web_url = resource.get('webUrl', '') + name = resource.get('name', '') + + # Check if URL ends with a file extension (likely a file) + if '.' in name and any(name.lower().endswith(ext) for ext in ['.pdf', '.docx', '.xlsx', '.pptx', '.txt', '.cs', '.py', '.js', '.html', '.css']): + is_file = True + # Check if URL has no file extension and looks like a folder path + elif '.' not in name and ('/' in web_url or '\\' in web_url): + is_folder = True + + if is_folder: + folder_count += 1 + elif is_file: + file_count += 1 + else: + other_count += 1 + + print(f" 📄 Files: {file_count}") + print(f" 📁 Folders: {folder_count}") + print(f" ❓ Other: {other_count}") + print() + + # Show sample results regardless of type + print(f"📋 Sample results (showing first 5):") + for i, result in enumerate(results[:5], 1): + resource = result.get('resource', {}) + web_url = resource.get('webUrl', 'No URL') + name = resource.get('name', 'No name') + + # Determine type using same logic as counting + is_folder = False + is_file = False + + if 'folder' in resource: + is_folder = True + elif 'file' in resource: + is_file = True + else: + # Try to detect by URL pattern or other indicators + web_url = resource.get('webUrl', '') + name = resource.get('name', '') + + # Check if URL ends with a file extension (likely a file) + if '.' in name and any(name.lower().endswith(ext) for ext in ['.pdf', '.docx', '.xlsx', '.pptx', '.txt', '.cs', '.py', '.js', '.html', '.css']): + is_file = True + # Check if URL has no file extension and looks like a folder path + elif '.' not in name and ('/' in web_url or '\\' in web_url): + is_folder = True + + if is_folder: + item_type = "📁 FOLDER" + elif is_file: + file_info = resource.get('file', {}) + mime_type = file_info.get('mimeType', 'Unknown type') if file_info else 'Detected by extension' + item_type = f"📄 FILE ({mime_type})" + else: + item_type = "❓ UNKNOWN" + + # Extract path from webUrl + if '/sites/SSSRESYNachfolge/' in web_url: + path_part = web_url.split('/sites/SSSRESYNachfolge/')[-1] + path_with_backslashes = path_part.replace('/', '\\') + display_path = f"\\{path_with_backslashes}" + else: + display_path = web_url + + print(f" {i}. {item_type} - {name}") + print(f" Path: {display_path}") + print(f" URL: {web_url}") + print() + + if len(results) > 5: + print(f" ... and {len(results) - 5} more results") + + # Now filter and show only FOLDER results if any exist + folder_results = [] + for result in results: + resource = result.get('resource', {}) + + # Use the same detection logic as counting + is_folder = False + if 'folder' in resource: + is_folder = True + else: + # Try to detect by URL pattern or other indicators + web_url = resource.get('webUrl', '') + name = resource.get('name', '') + + # Check if URL has no file extension and looks like a folder path + if '.' not in name and ('/' in web_url or '\\' in web_url): + is_folder = True + + if is_folder: + folder_results.append(result) + + if folder_results: + print(f"\n📁 FOLDER DETAILS ({len(folder_results)} folders found):") + for i, result in enumerate(folder_results, 1): + web_url = result.get('resource', {}).get('webUrl', 'No URL') + name = result.get('resource', {}).get('name', 'No name') + + if '/sites/SSSRESYNachfolge/' in web_url: + path_part = web_url.split('/sites/SSSRESYNachfolge/')[-1] + path_with_backslashes = path_part.replace('/', '\\') + folder_path = f"\\{path_with_backslashes}" + else: + folder_path = web_url + + print(f" {i}. 📁 {name}") + print(f" Path: {folder_path}") + print(f" URL: {web_url}") + print() + else: + print(f"\n❌ No folders found in results - all {total} results are files or other types") + else: + print("❌ SUCCESS but no hits containers found") + else: + print("❌ SUCCESS but no value array in response") + + else: + print(f"❌ ERROR - Status {response.status_code}") + error_text = response.text[:200] + "..." if len(response.text) > 200 else response.text + print(f"Error: {error_text}") + + except Exception as e: + print(f"Exception: {str(e)}") + +def main(): + """Main test function""" + + # Use the access token from the database + access_token = "eyJ0eXAiOiJKV1QiLCJub25jZSI6IkxwTjBjTXo2SGlja2ZPLUpnekRwTFE1QktfQmVOWHBwRWZ2UzZBMDh2REUiLCJhbGciOiJSUzI1NiIsIng1dCI6IkpZaEFjVFBNWl9MWDZEQmxPV1E3SG4wTmVYRSIsImtpZCI6IkpZaEFjVFBNWl9MWDZEQmxPV1E3SG4wTmVYRSJ9.eyJhdWQiOiIwMDAwMDAwMy0wMDAwLTAwMDAtYzAwMC0wMDAwMDAwMDAwMDAiLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC82YTUxYWFlYi0yNDY3LTQxODYtOTUwNC0yYTA1YWVkYzU5MWYvIiwiaWF0IjoxNzU3MDEwNTc0LCJuYmYiOjE3NTcwMTA1NzQsImV4cCI6MTc1NzAxNTQ1MSwiYWNjdCI6MCwiYWNyIjoiMSIsImFjcnMiOlsicDEiXSwiYWlvIjoiQVpRQWEvOFpBQUFBcU0xNVFOMkhaQld5QXNsbStiT0QzbzRuU1RhUzg5bGdTV3ZUQVZvYVhqcUhlT1VaNFE1aEh0bE51WUdxelEvM0tDRnZlZktycU1HTUp2VmlVaWVibUhjbnBtL0FaRFA1Sk1YNnI4c1FCSVdLVTZPY29sUUNuOWpvcVZLb1VIOFl3WTJhM3picTlkeGdqVC94dU5NaCtKcXhMV1JMdEUrUjBZeGl0c3J0QXhpd0pRaGZmalIzK0xPSGtmVkxhOExaIiwiYW1yIjpbInB3ZCIsIm1mYSJdLCJhcHBfZGlzcGxheW5hbWUiOiJQb3dlck9uIEFwcCIsImFwcGlkIjoiYzdlNzExMmQtNjFkYy00ZjNhLThjZDMtMDhjYzRjZDc1MDRjIiwiYXBwaWRhY3IiOiIxIiwiZmFtaWx5X25hbWUiOiJNb3RzY2giLCJnaXZlbl9uYW1lIjoiUGF0cmljayIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjE3OC4xOTcuMjE4LjQ4IiwibmFtZSI6IlBhdHJpY2sgTW90c2NoIiwib2lkIjoiN2QwOGFhYjktYTE3MC00OTc1LTg4OTgtYmM3ZTBhOTU0ODhlIiwicGxhdGYiOiIzIiwicHVpZCI6IjEwMDM3RkZFOENERDZBODIiLCJyaCI6IjEuQVFzQTY2cFJhbWNraGtHVkJDb0ZydHhaSHdNQUFBQUFBQUFBd0FBQUFBQUFBQUNFQURBTEFBLiIsInNjcCI6IkZpbGVzLlJlYWRXcml0ZS5BbGwgTWFpbC5SZWFkV3JpdGUgTWFpbC5SZWFkV3JpdGUuU2hhcmVkIE1haWwuU2VuZCBvcGVuaWQgcHJvZmlsZSBTaXRlcy5SZWFkV3JpdGUuQWxsIFVzZXIuUmVhZCBlbWFpbCIsInNpZCI6IjAwNmY5Mjk5LTY3ZDUtYmU3Zi1kYWI4LWQwYTBlZTI1MTBkNiIsInNpZ25pbl9zdGF0ZSI6WyJrbXNpIl0sInN1YiI6IklnMGlwM3hhZGJMaXVLemJGZ3dWaE5JTV9Eekcwd3B4aUVGYjJKWXVjbjQiLCJ0ZW5hbnRfcmVnaW9uX3Njb3BlIjoiRVUiLCJ0aWQiOiI2YTUxYWFlYi0yNDY3LTQxODYtOTUwNC0yYTA1YWVkYzU5MWYiLCJ1bmlxdWVfbmFtZSI6InAubW90c2NoQHZhbHVlb24uY2giLCJ1cG4iOiJwLm1vdHNjaEB2YWx1ZW9uLmNoIiwidXRpIjoieTh5ZGhEcWRDMG1nVTBpLV94azFBUSIsInZlciI6IjEuMCIsIndpZHMiOlsiOWI4OTVkOTItMmNkMy00NGM3LTlkMDItYTZhYzJkNWVhNWMzIiwiY2YxYzM4ZTUtMzYyMS00MDA0LWE3Y2ItODc5NjI0ZGNlZDdjIiwiMTU4YzA0N2EtYzkwNy00NTU2LWI3ZWYtNDQ2NTUxYTZiNWY3IiwiODkyYzU4NDItYTlhNi00NjNhLTgwNDEtNzJhYTA4Y2EzY2Y2IiwiOWYwNjIwNGQtNzNjMS00ZDRjLTg4MGEtNmVkYjkwNjA2ZmQ4IiwiYjc5ZmJmNGQtM2VmOS00Njg5LTgxNDMtNzZiMTk0ZTg1NTA5Il0sInhtc19mdGQiOiIwcEZ4RVctQnl6Y3M5UW5HdXNDbU1Ka1V4MHNQWlEzOUkzWUwxRGZJdnpzQmMzZGxaR1Z1WXkxa2MyMXoiLCJ4bXNfaWRyZWwiOiIxIDI0IiwieG1zX3N0Ijp7InN1YiI6IlIydkQwRzFtbWFZUkM3SllXY0lTWlcyS0RQZ05CakJMRmw2ZUxBQl9QVU0ifSwieG1zX3RjZHQiOjE0MTgyMTQ1MDEsInhtc190ZGJyIjoiRVUifQ.JYEWH2YxBrgWSn-9WN3BixJ91q19RGd0U7HgiiLpmwKUicft8zrovO8wKVU5rkly6CBcEO_eGAvyqQHSjFLHXKGDrutrFVdLTLB0vUu3J1Lkw31CiJF_y6Y3r2VytOF8evcYwh_Ye-5eoAxIr5avR8j_T51RPkLG53QSJ-tA5utDgHGWa65T5-mmeZxI-ThYxfyLori1uS8TSchJBdwrWwv8pkklHn6lZrFfgiuviRjLrOOLVUL_fzIod_eOKjo31YHhUzfm-QD3vvQkqnWNcdQ4D0UaTxKW291fHFafQZ9SkH9m0BD9nn56QBqijUBhvA8qMZC_cObb3DpR0GR_xA" + + print("=" * 60) + print("Microsoft Graph API Test Suite") + print("=" * 60) + + # First test: Direct folder listing (should work better than search) + print("\nTEST 0: Direct Graph API folder listing") + test_graph_folders_direct(access_token) + + # Test different query types to find both files and folders + test_queries = [ + # Test 1: Test with Venus folder (empty folder created for testing) + "Venus", + + # Test 2: Folder-specific searches for Venus + "kind:folder AND Venus", + + # Test 3: Original specific query (found 8 results - all files) + "Druckersteuerung AND Eskalation AND Logobject", + + # Test 4: Broader folder-focused queries + "Druckersteuerung", + "Eskalation", + "Logobject", + + # Test 5: Folder-specific searches + "kind:folder AND Druckersteuerung", + "kind:folder AND Eskalation", + + # Test 6: General folder search to see what folders exist + "kind:folder", + ] + + for i, query in enumerate(test_queries, 1): + print(f"\nTEST {i}: {query}") + test_graph_search(access_token, query) + print() + +if __name__ == "__main__": + main()