diff --git a/modules/chat/handling/handlingTasks.py b/modules/chat/handling/handlingTasks.py index 88465b0e..feb7b335 100644 --- a/modules/chat/handling/handlingTasks.py +++ b/modules/chat/handling/handlingTasks.py @@ -792,7 +792,7 @@ class HandlingTasks: # Add specific error details if available if retry_context and hasattr(retry_context, 'previous_review_result') and retry_context.previous_review_result: - reason = retry_context.previous_review_result.get('reason', '') + reason = retry_context.previous_review_result.reason or '' if reason and reason != "Task failed after all retries.": error_message += f"{reason}\n\n" diff --git a/modules/chat/handling/promptFactory.py b/modules/chat/handling/promptFactory.py index 9faa06b3..ada386ba 100644 --- a/modules/chat/handling/promptFactory.py +++ b/modules/chat/handling/promptFactory.py @@ -11,6 +11,85 @@ logger = logging.getLogger(__name__) # Prompt creation helpers extracted from managerChat.py +def _getPreviousRoundContext(service, workflow) -> str: + """Get context from previous workflow rounds to help understand follow-up prompts""" + try: + if not workflow or not hasattr(workflow, 'messages') or not workflow.messages: + return "" + + # Get current round number + current_round = getattr(workflow, 'currentRound', 0) + + # If this is round 0 or 1, there's no previous context + if current_round <= 1: + return "" + + # Find messages from previous rounds (rounds before current) + previous_messages = [] + for message in workflow.messages: + message_round = getattr(message, 'roundNumber', 0) + if message_round > 0 and message_round < current_round: + previous_messages.append(message) + + if not previous_messages: + return "" + + # Sort by round number and sequence to get chronological order + previous_messages.sort(key=lambda msg: (getattr(msg, 'roundNumber', 0), getattr(msg, 'sequenceNr', 0))) + + # Build context summary + context_parts = [] + current_round_context = {} + + for message in previous_messages: + round_num = getattr(message, 'roundNumber', 0) + if round_num not in current_round_context: + current_round_context[round_num] = { + 'user_inputs': [], + 'assistant_responses': [], + 'task_outcomes': [], + 'documents_processed': [] + } + + # Categorize messages + if message.role == 'user': + current_round_context[round_num]['user_inputs'].append(message.message) + elif message.role == 'assistant': + # Check if it's a task completion or error message + if 'task' in message.message.lower() and ('completed' in message.message.lower() or 'failed' in message.message.lower() or 'error' in message.message.lower()): + current_round_context[round_num]['task_outcomes'].append(message.message) + else: + current_round_context[round_num]['assistant_responses'].append(message.message) + + # Check for document processing + if hasattr(message, 'documents') and message.documents: + doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')] + if doc_names: + current_round_context[round_num]['documents_processed'].extend(doc_names) + + # Build context summary + for round_num in sorted(current_round_context.keys()): + round_data = current_round_context[round_num] + context_parts.append(f"ROUND {round_num} CONTEXT:") + + if round_data['user_inputs']: + context_parts.append(f" User requests: {'; '.join(round_data['user_inputs'])}") + + if round_data['task_outcomes']: + context_parts.append(f" Task outcomes: {'; '.join(round_data['task_outcomes'])}") + + if round_data['documents_processed']: + context_parts.append(f" Documents processed: {', '.join(set(round_data['documents_processed']))}") + + if context_parts: + return "\n".join(context_parts) + else: + return "" + + except Exception as e: + logger.error(f"Error getting previous round context: {str(e)}") + return "" + def createTaskPlanningPrompt(context: TaskContext, service) -> str: """Create enhanced prompt for task planning with user-friendly message generation and language detection""" # Get user language directly from service.user.language @@ -22,21 +101,29 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str: # Extract available documents from context - use Pydantic model directly available_documents = context.available_documents or "No documents available" + # Get previous workflow round context for better understanding of follow-up prompts + previous_round_context = _getPreviousRoundContext(service, context.workflow) + return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages. USER REQUEST: {user_request} AVAILABLE DOCUMENTS: {available_documents} +PREVIOUS WORKFLOW ROUNDS CONTEXT: +{previous_round_context if previous_round_context else "No previous workflow rounds - this is the first round."} + INSTRUCTIONS: -1. Analyze the user request and available documents -2. Group related topics and sequential steps into single, comprehensive tasks -3. Focus on business outcomes, not technical operations -4. Each task should produce meaningful, usable outputs -5. Ensure proper handover between tasks using result labels -6. Detect the language of the user request and include it in languageUserDetected -7. Generate user-friendly messages for each task in the user's request language -8. Return a JSON object with the exact structure shown below +1. Analyze the user request, available documents, and previous workflow rounds context +2. If the user request appears to be a follow-up (like "try again", "versuche es nochmals", "retry", etc.), + use the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what the user wants to retry or continue +3. Group related topics and sequential steps into single, comprehensive tasks +4. Focus on business outcomes, not technical operations +5. Each task should produce meaningful, usable outputs +6. Ensure proper handover between tasks using result labels +7. Detect the language of the user request and include it in languageUserDetected +8. Generate user-friendly messages for each task in the user's request language +9. Return a JSON object with the exact structure shown below TASK GROUPING PRINCIPLES: - COMBINE RELATED TOPICS: Group related subjects, sequential steps, or workflow-structured activities into single tasks @@ -67,6 +154,21 @@ TASK PLANNING PRINCIPLES: - Group related activities to minimize task fragmentation - Only create multiple tasks when dealing with truly different, independent objectives +FOLLOW-UP PROMPT HANDLING: +- If the user request is a follow-up (e.g., "try again", "versuche es nochmals", "retry", "continue", "proceed"), + analyze the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what failed or was incomplete +- Use the previous round's user requests and task outcomes to determine what the user wants to retry +- If previous rounds failed due to missing documents, and documents are now available, + create tasks that use the newly available documents to accomplish the original request +- Maintain the same business objective from previous rounds but adapt to current available resources + +SPECIFIC SCENARIO HANDLING: +- If previous round failed with "documents missing" error and current round has documents available, + the user likely wants to retry the same operation with the newly provided documents +- Example: Previous round "speichere mir die 3 dokumente im sharepoint unter xxx" failed due to missing documents, + current round "versuche es nochmals" with documents should retry the SharePoint save operation +- Always check if the current request is a retry by looking for retry keywords and previous round context + REQUIRED JSON STRUCTURE: {{ "overview": "Brief description of the overall plan", @@ -126,20 +228,81 @@ async def createActionDefinitionPrompt(context: TaskContext, service) -> str: connRefs = service.getConnectionReferenceList() - # Debug logging for connections - logging.debug(f"Connection references retrieved: {connRefs}") - logging.debug(f"Connection references type: {type(connRefs)}") - logging.debug(f"Connection references length: {len(connRefs) if connRefs else 0}") - - # Log document availability for debugging - logging.debug(f"Enhanced document context length: {len(available_documents_str)}") - - available_methods_str = '' + # Create a structured JSON format for better AI parsing + # This replaces the old hard-to-read format with a clean JSON structure + # that the AI can easily parse and understand + available_methods_json = {} for method, actions in method_actions.items(): - available_methods_str += f"- {method}:\n" + available_methods_json[method] = {} + # Get the method instance for accessing docstrings + method_instance = service.methods.get(method, {}).get('instance') if hasattr(service, 'methods') else None + for action, sig in actions: - available_methods_str += f" - {action}: {sig}\n" - + # Parse the signature to extract parameters + if '(' in sig and ')' in sig: + # Extract parameters from signature + params_start = sig.find('(') + params_end = sig.find(')') + params_str = sig[params_start+1:params_end] + + # Parse parameters directly from the docstring - much simpler and more reliable! + parameters = [] + + # Get the actual function's docstring + if method_instance and hasattr(method_instance, action): + func = getattr(method_instance, action) + if hasattr(func, '__doc__') and func.__doc__: + docstring = func.__doc__ + + # Parse Parameters section from docstring + lines = docstring.split('\n') + in_parameters = False + for i, line in enumerate(lines): + original_line = line + line = line.strip() + + if line == 'Parameters:': + in_parameters = True + continue + elif in_parameters and line and not original_line.startswith(' ') and not original_line.startswith('\t'): + # End of parameters section + break + elif in_parameters and (original_line.startswith(' ') or original_line.startswith('\t')): + # This is a parameter line - already stripped + # Format: "paramName (type): description" + if ':' in line: + # Find the colon that separates param from description + colon_pos = line.find(':') + param_part = line[:colon_pos].strip() + description = line[colon_pos+1:].strip() + + # Parse parameter name and type + if '(' in param_part and ')' in param_part: + param_name = param_part.split('(')[0].strip() + type_part = param_part[param_part.find('(')+1:param_part.find(')')].strip() + + # Check if optional + is_optional = 'optional' in type_part + param_type = type_part.replace('optional', '').strip().rstrip(',').strip() + + parameters.append({ + "name": param_name, + "type": param_type, + "description": description, + "required": not is_optional + }) + + available_methods_json[method][action] = { + "signature": sig, + "parameters": parameters, + "description": f"{method}.{action} action" + } + + # Convert to a compact, AI-friendly format + available_methods_str = f""" +AVAILABLE ACTIONS (JSON format for better AI parsing): +{json.dumps(available_methods_json, indent=1, separators=(',', ':'))} +""" retry_context = "" if context.retry_count and context.retry_count > 0: retry_context = f""" @@ -162,10 +325,10 @@ Previous action results that failed or were incomplete: if context.previous_review_result: retry_context += f""" Previous review feedback: -- Status: {context.previous_review_result.get('status', 'unknown') or 'unknown'} -- Reason: {context.previous_review_result.get('reason', 'No reason provided') or 'No reason provided'} -- Quality Score: {context.previous_review_result.get('quality_score', 0) or 0}/10 -- Unmet Criteria: {', '.join(context.previous_review_result.get('unmet_criteria', []) or [])} +- Status: {context.previous_review_result.status or 'unknown'} +- Reason: {context.previous_review_result.reason or 'No reason provided'} +- Quality Score: {context.previous_review_result.quality_score or 0}/10 +- Unmet Criteria: {', '.join(context.previous_review_result.unmet_criteria or [])} """ # Use Pydantic model directly - no need for getattr diff --git a/modules/interfaces/interfaceChatModel.py b/modules/interfaces/interfaceChatModel.py index 769e94c9..629b59ee 100644 --- a/modules/interfaces/interfaceChatModel.py +++ b/modules/interfaces/interfaceChatModel.py @@ -845,4 +845,40 @@ register_model_labels( } ) +# ===== Centralized AI Call Response Models ===== + +class AiResult(BaseModel, ModelMixin): + """Document result from centralized AI call""" + filename: str = Field(description="Name of the result document") + mimetype: str = Field(description="MIME type of the result document") + content: str = Field(description="Content of the result document") + +# Register labels for AiResult +register_model_labels( + "AiResult", + {"en": "Result Document", "fr": "Document de résultat"}, + { + "filename": {"en": "Filename", "fr": "Nom de fichier"}, + "mimetype": {"en": "MIME Type", "fr": "Type MIME"}, + "content": {"en": "Content", "fr": "Contenu"} + } +) + +class CentralizedAiResponse(BaseModel, ModelMixin): + """Standardized response format from centralized AI calls""" + aiResults: List[AiResult] = Field(default_factory=list, description="List of result documents") + success: bool = Field(description="Whether the AI call was successful") + error: Optional[str] = Field(None, description="Error message if the call failed") + +# Register labels for CentralizedAiResponse +register_model_labels( + "CentralizedAiResponse", + {"en": "Centralized AI Response", "fr": "Réponse IA centralisée"}, + { + "aiResults": {"en": "Result Documents", "fr": "Documents de résultat"}, + "success": {"en": "Success", "fr": "Succès"}, + "error": {"en": "Error", "fr": "Erreur"} + } +) + diff --git a/modules/methods/methodDocument.py b/modules/methods/methodDocument.py index 72d6d81b..d7dae427 100644 --- a/modules/methods/methodDocument.py +++ b/modules/methods/methodDocument.py @@ -618,23 +618,24 @@ class MethodDocument(MethodBase): # Create AI prompt for comprehensive report generation using user's prompt combinedContent = "\n\n".join(allContent) aiPrompt = f""" - {prompt} - - Report Title: {title} - - Additional Requirements: - 1. Create a professional, well-formatted HTML report - 2. Include an executive summary at the beginning - 3. Organize information logically with clear sections - 4. Highlight key findings and insights - 5. Include relevant data, statistics, and conclusions - 6. Use proper HTML formatting with headers, lists, and styling - 7. Make it readable and professional - - Document Content: - {combinedContent} - - Generate a complete HTML report that addresses the user's specific requirements and integrates all the information into a cohesive, professional document. +{prompt} + +Report Title: {title} + +Additional Requirements: +1. Create a professional, well-formatted HTML report +2. Include an executive summary at the beginning +3. Organize information logically with clear sections +4. Highlight key findings and insights +5. Include relevant data, statistics, and conclusions +6. Use proper HTML formatting with headers, lists, and styling +7. Make it readable and professional + +Document Content: +---START OF DOCUMENT CONTENT----------------------------------------------- +{combinedContent} +---END OF DOCUMENT CONTENT----------------------------------------------- +Generate a complete HTML report that addresses the user's specific requirements and integrates all the information into a cohesive, professional document. """ # Call AI to generate the report diff --git a/modules/methods/methodOutlook.py b/modules/methods/methodOutlook.py index 2c2c90ce..8a2b5b7d 100644 --- a/modules/methods/methodOutlook.py +++ b/modules/methods/methodOutlook.py @@ -379,10 +379,7 @@ class MethodOutlook(MethodBase): connectionReference (str): Reference to the Microsoft connection folder (str, optional): Email folder to read from (default: "Inbox") limit (int, optional): Maximum number of emails to read (default: 10) - filter (str, optional): Filter criteria for emails. Supports: - - Email address (e.g., "user@domain.com") - filters by sender - - Search queries (e.g., "from:user@domain.com", "subject:meeting") - - Text content (e.g., "project update") - searches in subject + filter (str, optional): Filter criteria for emails. Supports: Email address (e.g., "user@domain.com") - filters by sender, Search queries (e.g., "from:user@domain.com", "subject:meeting"), Text content (e.g., "project update") - searches in subject expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description """ try: diff --git a/modules/methods/methodSharepoint.py b/modules/methods/methodSharepoint.py index a8257bc3..19af4c1e 100644 --- a/modules/methods/methodSharepoint.py +++ b/modules/methods/methodSharepoint.py @@ -117,13 +117,66 @@ class MethodSharepoint(MethodBase): logger.error(f"Error discovering SharePoint sites: {str(e)}") return [] + def _filter_sites_by_hint(self, sites: List[Dict[str, Any]], site_hint: str) -> List[Dict[str, Any]]: + """Filter discovered sites by a human-entered site hint (case-insensitive substring).""" + try: + if not site_hint: + return sites + hint = site_hint.strip().lower() + filtered: List[Dict[str, Any]] = [] + for site in sites: + name = (site.get("displayName") or "").lower() + web_url = (site.get("webUrl") or "").lower() + if hint in name or hint in web_url: + filtered.append(site) + return filtered if filtered else sites + except Exception as e: + logger.error(f"Error filtering sites by hint '{site_hint}': {str(e)}") + return sites + + + def _parse_site_scoped_path(self, path_query: str) -> Optional[Dict[str, str]]: + """ + Parse a site-scoped path of the form: + /site:KM LayerFinance/Documents/Work or /site:KM LayerFinance/Shared Documents/Work + + Returns dict with keys: siteName, innerPath (no leading slash) on success, else None. + """ + try: + if not path_query or not path_query.startswith('/'): + return None + # expected syntax prefix + prefix = '/site:' + if not path_query.startswith(prefix): + return None + remainder = path_query[len(prefix):] + # split once on the next '/' + if '/' not in remainder: + return None + site_name, inner = remainder.split('/', 1) + site_name = site_name.strip() + inner_path = inner.strip() + if not site_name or not inner_path: + return None + return {"siteName": site_name, "innerPath": inner_path} + except Exception as e: + logger.error(f"Error parsing site-scoped path '{path_query}': {str(e)}") + return None + def _parseSearchQuery(self, searchQuery: str) -> tuple[str, str, str, dict]: """ Parse searchQuery to extract path, search terms, search type, and search options. + CRITICAL: NEVER convert words to paths! Words stay as search terms. + - "root document lesson" → fileQuery="root document lesson" (NOT "/root/document/lesson") + - "root, gose" → fileQuery="root, gose" (NOT "/root/gose") + - "druckersteuerung eskalation logobject" → fileQuery="druckersteuerung eskalation logobject" + Parameters: searchQuery (str): Enhanced search query with options: - "budget" -> pathQuery="*", fileQuery="budget", searchType="all", options={} + - "root document lesson" -> pathQuery="*", fileQuery="root document lesson", searchType="all", options={} + - "root, gose" -> pathQuery="*", fileQuery="root, gose", searchType="all", options={} - "/Documents:budget" -> pathQuery="/Documents", fileQuery="budget", searchType="all", options={} - "files:budget" -> pathQuery="*", fileQuery="budget", searchType="files", options={} - "folders:DELTA" -> pathQuery="*", fileQuery="DELTA", searchType="folders", options={} @@ -142,12 +195,51 @@ class MethodSharepoint(MethodBase): searchQuery = searchQuery.strip() searchOptions = {} - # Check for search type specification (files:, folders:, all:) + # CRITICAL: Do NOT convert space-separated or comma-separated words to paths! + # "root document lesson" should stay as "root document lesson", NOT "/root/document/lesson" + # "root, gose" should stay as "root, gose", NOT "/root/gose" + + # Check for search type specification (files:, folders:, all:) FIRST searchType = "all" # Default if searchQuery.startswith(("files:", "folders:", "all:")): type_parts = searchQuery.split(':', 1) searchType = type_parts[0].strip() searchQuery = type_parts[1].strip() + + # Extract optional site hint tokens: support "site=Name" or leading "site:Name" + def _extract_site_hint(q: str) -> tuple[str, Optional[str]]: + try: + q_strip = q.strip() + # Leading form: site:KM LayerFinance ... + if q_strip.lower().startswith("site:"): + after = q_strip[5:].lstrip() + # site name until next space or end + if ' ' in after: + site_name, rest = after.split(' ', 1) + else: + site_name, rest = after, '' + return rest.strip(), site_name.strip() + # Inline key=value form anywhere + m = re.search(r"\bsite=([^;\s]+)", q_strip, flags=re.IGNORECASE) + if m: + site_name = m.group(1).strip() + # remove the token from query + q_new = re.sub(r"\bsite=[^;\s]+;?", "", q_strip, flags=re.IGNORECASE).strip() + return q_new, site_name + except Exception: + pass + return q, None + + searchQuery, extracted_site = _extract_site_hint(searchQuery) + if extracted_site: + searchOptions["site_hint"] = extracted_site + logger.info(f"Extracted site hint: '{extracted_site}'") + + # Extract name="..." if present (for quoted multi-word names) + name_match = re.search(r"name=\"([^\"]+)\"", searchQuery) + if name_match: + searchQuery = name_match.group(1) + logger.info(f"Extracted name from quotes: '{searchQuery}'") # Check for search mode specification (exact:, regex:, case:, and:) if searchQuery.startswith(("exact:", "regex:", "case:", "and:")): @@ -187,6 +279,7 @@ class MethodSharepoint(MethodBase): else: fileQuery = search_part + # Use search_part as fileQuery (name extraction already handled above) return pathQuery, fileQuery, searchType, searchOptions # No colon - check if it looks like a path @@ -195,12 +288,14 @@ class MethodSharepoint(MethodBase): return searchQuery, "*", searchType, searchOptions else: - # It's a search term only + # It's a search term only - keep words as-is, do NOT convert to paths + # "root document lesson" stays as "root document lesson" + # "root, gose" stays as "root, gose" return "*", searchQuery, searchType, searchOptions except Exception as e: logger.error(f"Error parsing searchQuery '{searchQuery}': {str(e)}") - return "*", "*", "all", {} + raise ValueError(f"Failed to parse searchQuery '{searchQuery}': {str(e)}") def _resolvePathQuery(self, pathQuery: str) -> List[str]: """ @@ -211,8 +306,9 @@ class MethodSharepoint(MethodBase): - Direct paths (e.g., "/Documents/Project1") - Wildcards (e.g., "/Documents/*") - Multiple paths separated by semicolons (e.g., "/Docs; /Files") - - Relative paths (e.g., "Project1" -> resolved to default folder) + - Single word relative paths (e.g., "Project1" -> resolved to default folder) - Empty string or "*" for global search + - Space-separated words are treated as search terms, NOT folder paths Returns: List[str]: List of resolved paths @@ -232,9 +328,21 @@ class MethodSharepoint(MethodBase): # Handle absolute paths elif raw_path.startswith('/'): resolved_paths.append(raw_path) - # Handle relative paths - prepend default folder - else: + # Handle single word relative paths - prepend default folder + # BUT NOT space-separated words (those are search terms, not paths) + elif ' ' not in raw_path: resolved_paths.append(f"/Documents/{raw_path}") + else: + # Check if this looks like a path (has path separators) or search terms + if '\\' in raw_path or '/' in raw_path: + # This looks like a path with spaces in folder names - treat as valid path + resolved_paths.append(raw_path) + logger.info(f"Path with spaces '{raw_path}' treated as valid folder path") + else: + # Space-separated words without path separators are search terms + # Return as "*" to search globally + logger.info(f"Space-separated words '{raw_path}' treated as search terms, not folder path") + resolved_paths.append("*") # Remove duplicates while preserving order seen = set() @@ -249,7 +357,7 @@ class MethodSharepoint(MethodBase): except Exception as e: logger.error(f"Error resolving pathQuery '{pathQuery}': {str(e)}") - return ["*"] # Fallback to global search + raise ValueError(f"Failed to resolve pathQuery '{pathQuery}': {str(e)}") def _parseSiteUrl(self, siteUrl: str) -> Dict[str, str]: """Parse SharePoint site URL to extract hostname and site path""" @@ -341,68 +449,28 @@ class MethodSharepoint(MethodBase): except Exception as e: logger.error(f"Error getting site ID: {str(e)}") return "" - + + @action async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult: """ - Find documents by searching their content, names, or metadata across all accessible SharePoint sites + Find documents/folders by searching their NAMES across SharePoint sites. Parameters: - connectionReference (str): Reference to the Microsoft connection - searchQuery (str): [path:][type:][mode:]query - Enhanced search syntax: - - "budget", "/Documents:budget", "files:budget", "folders:DELTA", "*" - - "exact:\"Operations 2025\"" - exact phrase matching - - "regex:^Operations.*2025$" - regex pattern matching - - "case:DELTA" - case-sensitive search - - "and:DELTA AND 2025 Mars AND Group" - all terms must be present - - "folders:and:DELTA AND 2025 Mars AND Group" - combined options - Note: For storage locations, use "folders:" prefix. All search terms must be present by default. - resultDocument (str, optional): JSON result document from previous findDocumentPath action to refine search - searchScope (str, optional): Search scope - options: "all" (default), "documents" (files only), "pages" (SharePoint pages only) - maxResults (int, optional): Maximum number of results to return (default: 100) - expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description + connectionReference (str): Microsoft connection reference + site (str, optional): Site hint (e.g., "SSS", "KM XYZ") + searchQuery (str): Search query - "budget", "folders:alpha", "files:budget", "/Documents/Project1", "namepart1 namepart2 namepart3". Use "folders:" prefix when user wants to store files or find folders + maxResults (int, optional): Max results (default: 100) """ try: connectionReference = parameters.get("connectionReference") + site = parameters.get("site") searchQuery = parameters.get("searchQuery", "*") - resultDocument = parameters.get("resultDocument") - searchScope = parameters.get("searchScope", "all") maxResults = parameters.get("maxResults", 100) - expectedDocumentFormats = parameters.get("expectedDocumentFormats", []) if not connectionReference: return ActionResult.isFailure(error="Connection reference is required") - # If resultDocument is provided, extract site information to refine search - if resultDocument: - try: - import json - # Resolve the reference label to get the actual document list - document_list = self.service.getChatDocumentsFromDocumentList([resultDocument]) - if not document_list or len(document_list) == 0: - return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}") - - # Get the first document's content (which should be the JSON) - first_document = document_list[0] - file_data = self.service.getFileData(first_document.fileId) - if not file_data: - return ActionResult.isFailure(error=f"No file data found for document: {resultDocument}") - - # Parse the JSON content - result_data = json.loads(file_data) - found_documents = result_data.get("foundDocuments", []) - - # Extract site information from the result for context - if found_documents: - # Use the site information from the previous search to refine current search - # This could be used to limit search to specific sites or add context - logger.info(f"Refining search using {len(found_documents)} documents from previous result") - - except json.JSONDecodeError as e: - return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}") - except Exception as e: - return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}") - # Parse searchQuery to extract path, search terms, search type, and options pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery) @@ -410,10 +478,22 @@ class MethodSharepoint(MethodBase): if not connection: return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") - # Discover all SharePoint sites accessible to the user - sites = await self._discoverSharePointSites(connection["accessToken"]) - if not sites: - return ActionResult.isFailure(error="No SharePoint sites found or accessible") + # Discover SharePoint sites - use targeted approach when site parameter is provided + if site: + # When site parameter is provided, discover all sites first, then filter + all_sites = await self._discoverSharePointSites(connection["accessToken"]) + if not all_sites: + return ActionResult.isFailure(error="No SharePoint sites found or accessible") + + sites = self._filter_sites_by_hint(all_sites, site) + logger.info(f"Filtered sites by site parameter: '{site}' -> {len(sites)} sites") + if not sites: + return ActionResult.isFailure(error=f"No SharePoint sites found matching '{site}'") + else: + # No site parameter - discover all sites + sites = await self._discoverSharePointSites(connection["accessToken"]) + if not sites: + return ActionResult.isFailure(error="No SharePoint sites found or accessible") # Resolve path query into search paths search_paths = self._resolvePathQuery(pathQuery) @@ -423,87 +503,174 @@ class MethodSharepoint(MethodBase): found_documents = [] all_sites_searched = [] - for site in sites: + # Use simple approach like test file - no complex filtering + site_scoped_sites = sites + + for site in site_scoped_sites: site_id = site["id"] site_name = site["displayName"] site_url = site["webUrl"] logger.info(f"Searching in site: {site_name} ({site_url})") - # Use Microsoft Graph search API for this specific site + # Use Microsoft Graph API for this specific site # Handle empty or wildcard queries if not fileQuery or fileQuery.strip() == "" or fileQuery.strip() == "*": # For wildcard/empty queries, list all items in the drive endpoint = f"sites/{site_id}/drive/root/children" else: - # For specific queries, use search API - search_query = fileQuery.replace("'", "''") # Escape single quotes for OData - endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')" - - # Make the search API call - search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint) - - if "error" in search_result: - logger.warning(f"Search failed for site {site_name}: {search_result['error']}") - continue - - # Process search results for this site - items = search_result.get("value", []) + # For specific queries, use different approaches based on search type + if searchType == "folders": + # Use Microsoft Graph unified search endpoint: POST /search/query + # This approach works reliably for finding folders + try: + import json + + # Use Microsoft Graph Search API syntax (simple term search only) + terms = [t for t in fileQuery.split() if t.strip()] + + if len(terms) > 1: + # Multiple terms: search for ALL terms (AND) - more specific results + query_string = " AND ".join(terms) + else: + # Single term: search for the term + query_string = terms[0] if terms else fileQuery + logger.info(f"Using search query for folders: {query_string}") + + payload = { + "requests": [ + { + "entityTypes": ["driveItem"], + "query": {"queryString": query_string}, + "from": 0, + "size": 50 + } + ] + } + logger.info(f"Using unified search API for folders with queryString: {query_string}") + + # Use global search endpoint (site-specific search not available) + unified_result = await self._makeGraphApiCall( + connection["accessToken"], + "search/query", + method="POST", + data=json.dumps(payload).encode("utf-8") + ) + + if "error" in unified_result: + logger.warning(f"Unified search failed for site {site_name}: {unified_result['error']}") + items = [] + else: + # Flatten hits -> driveItem resources + items = [] + for container in (unified_result.get("value", []) or []): + for hits_container in (container.get("hitsContainers", []) or []): + for hit in (hits_container.get("hits", []) or []): + resource = hit.get("resource") + if resource: + items.append(resource) + + logger.info(f"Unified search returned {len(items)} items (pre-filter)") + + # Apply our improved folder detection logic + folder_items = [] + for item in items: + resource = item + + # Use the same detection logic as our test + is_folder = False + if 'folder' in resource: + is_folder = True + else: + # Try to detect by URL pattern or other indicators + web_url = resource.get('webUrl', '') + name = resource.get('name', '') + + # Check if URL has no file extension and looks like a folder path + if '.' not in name and ('/' in web_url or '\\' in web_url): + is_folder = True + + if is_folder: + folder_items.append(item) + + items = folder_items + logger.info(f"Filtered to {len(items)} folders using improved detection logic") + + except Exception as e: + logger.error(f"Error performing unified folder search: {str(e)}") + items = [] + else: + # For files, use regular search API + search_query = fileQuery.replace("'", "''") # Escape single quotes for OData + endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')" + logger.info(f"Using search API for files with query: '{search_query}'") + + # Make the search API call (files) + search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint) + if "error" in search_result: + logger.warning(f"Search failed for site {site_name}: {search_result['error']}") + continue + # Process search results for this site (files) + items = search_result.get("value", []) + logger.info(f"Retrieved {len(items)} items from site {site_name}") site_documents = [] for item in items: - # Filter by search scope if specified - if searchScope == "documents" and "folder" in item: - continue - elif searchScope == "pages" and "file" in item and not item["file"].get("mimeType", "").startswith("text/html"): - continue - - # Filter by search type (files, folders, all) - if searchType == "files" and "folder" in item: - continue - elif searchType == "folders" and "file" in item: - continue - - # Enhanced post-filtering based on search options item_name = item.get("name", "") - if fileQuery != "*" and fileQuery.strip(): - # Apply different filtering based on search options - if searchOptions.get("exact_match"): - # Exact phrase matching - if searchOptions.get("case_sensitive"): - if fileQuery not in item_name: - continue - else: - if fileQuery.lower() not in item_name.lower(): - continue - elif searchOptions.get("regex_match"): - # Regex pattern matching - import re - flags = 0 if searchOptions.get("case_sensitive") else re.IGNORECASE - if not re.search(fileQuery, item_name, flags): - continue - elif searchOptions.get("and_terms"): - # AND terms mode: Split by " AND " and ensure ALL terms are present - search_name = item_name.lower() if not searchOptions.get("case_sensitive") else item_name - and_terms = [term.strip() for term in fileQuery.split(" AND ") if term.strip()] - and_terms = [term.lower() if not searchOptions.get("case_sensitive") else term for term in and_terms] - if not all(term in search_name for term in and_terms): - continue # Skip this item if not all AND terms match - else: - # Default: ALL search terms must be present (space-separated) - search_name = item_name.lower() if not searchOptions.get("case_sensitive") else item_name - search_terms = [term.strip().lower() if not searchOptions.get("case_sensitive") else term.strip() - for term in fileQuery.split() if term.strip()] - if not all(term in search_name for term in search_terms): - continue # Skip this item if not all terms match - # Create minimal result with only essential reference information + # Use improved folder detection logic + is_folder = False + if 'folder' in item: + is_folder = True + else: + # Try to detect by URL pattern or other indicators + web_url = item.get('webUrl', '') + name = item.get('name', '') + + # Check if URL has no file extension and looks like a folder path + if '.' not in name and ('/' in web_url or '\\' in web_url): + is_folder = True + + item_type = "folder" if is_folder else "file" + item_path = item.get("parentReference", {}).get("path", "") + logger.debug(f"Processing {item_type}: '{item_name}' at path: '{item_path}'") + + # Simple filtering like test file - just check search type + if searchType == "files" and is_folder: + continue # Skip folders when searching for files + elif searchType == "folders" and not is_folder: + continue # Skip files when searching for folders + + # Simple approach like test file - no complex filtering + logger.debug(f"Item '{item_name}' found - adding to results") + + # Create result with full path information for proper action chaining + web_url = item.get("webUrl", "") + parent_path = item.get("parentReference", {}).get("path", "") + + # Extract the full SharePoint path from webUrl or parentReference + full_path = "" + if web_url: + # Extract path from webUrl: https://pcuster.sharepoint.com/sites/SSSRESYNachfolge/Freigegebene%20Dokumente/General/Eskalation%20LogObject/Druckersteuerung + if '/sites/' in web_url: + path_part = web_url.split('/sites/')[1] + # Decode URL encoding and convert to backslash format + import urllib.parse + decoded_path = urllib.parse.unquote(path_part) + full_path = "\\" + decoded_path.replace('/', '\\') + elif parent_path: + # Use parentReference path if available + full_path = parent_path.replace('/', '\\') + doc_info = { "id": item.get("id"), "name": item.get("name"), - "type": "folder" if "folder" in item else "file", + "type": "folder" if is_folder else "file", "siteName": site_name, - "siteId": site_id + "siteId": site_id, + "webUrl": web_url, + "fullPath": full_path, + "parentPath": parent_path } site_documents.append(doc_info) @@ -535,18 +702,10 @@ class MethodSharepoint(MethodBase): logger.error(f"Error searching SharePoint: {str(e)}") return ActionResult.isFailure(error=str(e)) - # Determine output format based on expected formats + # Use default JSON format for output output_extension = ".json" # Default output_mime_type = "application/json" # Default - if expectedDocumentFormats and len(expectedDocumentFormats) > 0: - # Use the first expected format - expected_format = expectedDocumentFormats[0] - output_extension = expected_format.get("extension", ".json") - output_mime_type = expected_format.get("mimeType", "application/json") - logger.info(f"Using expected format: {output_extension} ({output_mime_type})") - else: - logger.info("No expected format specified, using default .json format") return ActionResult( success=True, @@ -571,36 +730,37 @@ class MethodSharepoint(MethodBase): Parameters: documentList (str): Reference to the document list to read connectionReference (str): Reference to the Microsoft connection - pathQuery (str): Path query to locate documents (e.g., "/Documents/Project1", "*" for all sites) - resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to pathQuery) + pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action + pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites) includeMetadata (bool, optional): Whether to include metadata (default: True) - expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description """ try: documentList = parameters.get("documentList") connectionReference = parameters.get("connectionReference") pathQuery = parameters.get("pathQuery", "*") - resultDocument = parameters.get("resultDocument") + pathObject = parameters.get("pathObject") includeMetadata = parameters.get("includeMetadata", True) - expectedDocumentFormats = parameters.get("expectedDocumentFormats", []) if not documentList or not connectionReference: return ActionResult.isFailure(error="Document list reference and connection reference are required") - # If resultDocument is provided, extract folder IDs from it - if resultDocument: + # If pathObject is provided, extract folder IDs from it + # Note: pathObject takes precedence over pathQuery when both are provided + if pathObject: + if pathQuery and pathQuery != "*": + logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)") try: import json # Resolve the reference label to get the actual document list - document_list = self.service.getChatDocumentsFromDocumentList([resultDocument]) + document_list = self.service.getChatDocumentsFromDocumentList([pathObject]) if not document_list or len(document_list) == 0: - return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}") + return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}") # Get the first document's content (which should be the JSON) first_document = document_list[0] file_data = self.service.getFileData(first_document.fileId) if not file_data: - return ActionResult.isFailure(error=f"No file data found for document: {resultDocument}") + return ActionResult.isFailure(error=f"No file data found for document: {pathObject}") # Parse the JSON content result_data = json.loads(file_data) @@ -615,14 +775,14 @@ class MethodSharepoint(MethodBase): if folder_ids: # Use the first folder ID found as pathQuery pathQuery = folder_ids[0] - logger.info(f"Using folder ID from resultDocument: {pathQuery}") + logger.info(f"Using folder ID from pathObject: {pathQuery}") else: - return ActionResult.isFailure(error="No folders found in resultDocument") + return ActionResult.isFailure(error="No folders found in pathObject") except json.JSONDecodeError as e: - return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}") + return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}") except Exception as e: - return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}") + return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}") # Get documents from reference - ensure documentList is a list, not a string if isinstance(documentList, str): @@ -636,10 +796,58 @@ class MethodSharepoint(MethodBase): if not connection: return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") - # Discover all SharePoint sites accessible to the user - sites = await self._discoverSharePointSites(connection["accessToken"]) + # Determine sites to use - strict validation: pathObject → pathQuery → ERROR + sites = None + + # Step 1: Check pathObject first + if pathObject: + # When pathObject is provided, we should have specific site information + # Extract site information from the pathObject result + try: + # Get the site information from the first folder in pathObject + if 'found_documents' in locals() and found_documents: + first_folder = found_documents[0] + site_name = first_folder.get("siteName") + site_id = first_folder.get("siteId") + + if site_name and site_id: + # Use the specific site from pathObject instead of discovering all sites + sites = [{ + "id": site_id, + "displayName": site_name, + "webUrl": first_folder.get("webUrl", "") + }] + logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})") + else: + # Site info missing from pathObject - this is an error + return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.") + else: + # No documents found in pathObject - this is an error + return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for read operation.") + except Exception as e: + # Error processing pathObject - this is an error + return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for read operation.") + + # Step 2: If no pathObject, check pathQuery + elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": + # Validate pathQuery format + if not pathQuery.startswith('/'): + return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work") + + # Check if pathQuery contains search terms (words without proper path structure) + if not pathQuery.startswith('/site:') and not pathQuery.startswith('/Documents') and not pathQuery.startswith('/Shared Documents'): + return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.") + + # For pathQuery, we need to discover sites to find the specific one + sites = await self._discoverSharePointSites(connection["accessToken"]) + if not sites: + return ActionResult.isFailure(error="No SharePoint sites found or accessible") + else: + # Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK + return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.") + if not sites: - return ActionResult.isFailure(error="No SharePoint sites found or accessible") + return ActionResult.isFailure(error="No valid target site determined for read operation") # Resolve path query into search paths search_paths = self._resolvePathQuery(pathQuery) @@ -767,18 +975,10 @@ class MethodSharepoint(MethodBase): "timestamp": get_utc_timestamp() } - # Determine output format based on expected formats + # Use default JSON format for output output_extension = ".json" # Default output_mime_type = "application/json" # Default - if expectedDocumentFormats and len(expectedDocumentFormats) > 0: - # Use the first expected format - expected_format = expectedDocumentFormats[0] - output_extension = expected_format.get("extension", ".json") - output_mime_type = expected_format.get("mimeType", "application/json") - logger.info(f"Using expected format: {output_extension} ({output_mime_type})") - else: - logger.info("No expected format specified, using default .json format") return ActionResult( success=True, @@ -804,59 +1004,121 @@ class MethodSharepoint(MethodBase): Parameters: connectionReference (str): Reference to the Microsoft connection - pathQuery (str): Path query where to upload documents (e.g., "/Documents/Project1", "*" for default location) + pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action + pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites) documentList (str): Reference to the document list to upload fileNames (List[str]): List of names for the uploaded files - resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to pathQuery) - expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description """ try: connectionReference = parameters.get("connectionReference") - pathQuery = parameters.get("pathQuery", "/Documents") + pathQuery = parameters.get("pathQuery") documentList = parameters.get("documentList") fileNames = parameters.get("fileNames") - resultDocument = parameters.get("resultDocument") - expectedDocumentFormats = parameters.get("expectedDocumentFormats", []) + pathObject = parameters.get("pathObject") + + upload_path = pathQuery + logger.debug(f"Using pathQuery: {pathQuery}") if not connectionReference or not documentList or not fileNames: return ActionResult.isFailure(error="Connection reference, document list, and file names are required") - # If resultDocument is provided, extract folder IDs from it - if resultDocument: + # If pathObject is provided, extract folder IDs from it + if pathObject: try: import json # Resolve the reference label to get the actual document list - document_list = self.service.getChatDocumentsFromDocumentList([resultDocument]) + document_list = self.service.getChatDocumentsFromDocumentList([pathObject]) if not document_list or len(document_list) == 0: - return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}") + return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}") # Get the first document's content (which should be the JSON) first_document = document_list[0] file_data = self.service.getFileData(first_document.fileId) if not file_data: - return ActionResult.isFailure(error=f"No file data found for document: {resultDocument}") + return ActionResult.isFailure(error=f"No file data found for document: {pathObject}") # Parse the JSON content result_data = json.loads(file_data) - found_documents = result_data.get("foundDocuments", []) - # Extract folder IDs from the result - folder_ids = [] + # Debug: Log the structure of the result document + logger.info(f"Result document keys: {list(result_data.keys())}") + + # Handle different result document formats + found_documents = [] + + # Check if it's a direct SharePoint result (has foundDocuments) + if "foundDocuments" in result_data: + found_documents = result_data.get("foundDocuments", []) + logger.info(f"Found {len(found_documents)} documents in foundDocuments array") + # Check if it's an AI validation result (has result string with validationReport) + elif "result" in result_data and "validationReport" in result_data["result"]: + try: + # Parse the nested JSON in the result field + nested_result = json.loads(result_data["result"]) + validation_report = nested_result.get("validationReport", {}) + document_details = validation_report.get("documentDetails", {}) + + if document_details: + # Convert the single document details to the expected format + doc = { + "id": document_details.get("id"), + "name": document_details.get("name"), + "type": document_details.get("type", "").lower(), # Convert "Folder" to "folder" + "siteName": document_details.get("siteName"), + "siteId": document_details.get("siteId"), + "fullPath": document_details.get("fullPath"), + "webUrl": document_details.get("webUrl", ""), + "parentPath": document_details.get("parentPath", "") + } + found_documents = [doc] + logger.info(f"Extracted 1 document from validation report") + except json.JSONDecodeError as e: + logger.error(f"Failed to parse nested JSON in result field: {e}") + return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}") + + # Debug: Log what we found in the result document + logger.info(f"Result document contains {len(found_documents)} documents") + for i, doc in enumerate(found_documents): + logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'") + + # Extract folder information from the result + folders = [] for doc in found_documents: if doc.get("type") == "folder": - folder_ids.append(doc.get("id")) + folders.append(doc) - if folder_ids: - # Use the first folder ID found as pathQuery - pathQuery = folder_ids[0] - logger.info(f"Using folder ID from resultDocument: {pathQuery}") + logger.info(f"Found {len(folders)} folders in result document") + + if folders: + # Use the first folder found - prefer folder ID for direct API calls + first_folder = folders[0] + if first_folder.get("id"): + # Use folder ID directly for most reliable API calls + upload_path = first_folder.get("id") + logger.info(f"Using folder ID from pathObject: {upload_path}") + elif first_folder.get("fullPath"): + # Extract the correct path portion from fullPath by removing site name + full_path = first_folder.get("fullPath") + # fullPath format: \\SiteName\\Library\\Folder\\SubFolder + # We need to remove the first two parts (\\SiteName\\) to get the actual folder path + path_parts = full_path.lstrip('\\').split('\\') + if len(path_parts) > 1: + # Remove the first part (site name) and reconstruct the path + actual_path = '\\'.join(path_parts[1:]) + upload_path = actual_path + logger.info(f"Extracted path from fullPath: {upload_path}") + else: + upload_path = full_path + logger.info(f"Using full path from pathObject (no site name to remove): {upload_path}") + else: + return ActionResult.isFailure(error="No valid folder information found in pathObject") else: - return ActionResult.isFailure(error="No folders found in resultDocument") + return ActionResult.isFailure(error="No folders found in pathObject") except json.JSONDecodeError as e: - return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}") + return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}") except Exception as e: - return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}") + return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}") # Get Microsoft connection connection = self._getMicrosoftConnection(connectionReference) @@ -870,13 +1132,85 @@ class MethodSharepoint(MethodBase): if not chatDocuments: return ActionResult.isFailure(error="No documents found for the provided reference") - # Discover all SharePoint sites accessible to the user - sites = await self._discoverSharePointSites(connection["accessToken"]) - if not sites: - return ActionResult.isFailure(error="No SharePoint sites found or accessible") + # Determine sites to use based on whether pathObject was provided + sites = None + if pathObject: + # When pathObject is provided, we should have specific site information + # Extract site information from the pathObject result + try: + # Get the site information from the first folder in pathObject + if 'found_documents' in locals() and found_documents: + first_folder = found_documents[0] + site_name = first_folder.get("siteName") + site_id = first_folder.get("siteId") + + if site_name and site_id: + # Use the specific site from pathObject instead of discovering all sites + sites = [{ + "id": site_id, + "displayName": site_name, + "webUrl": first_folder.get("webUrl", "") + }] + logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})") + else: + # Site info missing from pathObject - this is an error, not a fallback + return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for upload.") + else: + # No documents found in pathObject - this is an error + return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for upload.") + except Exception as e: + # Error processing pathObject - this is an error, not a fallback + return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for upload.") + else: + # No pathObject provided - check if pathQuery is valid + if not upload_path or upload_path.strip() == "" or upload_path.strip() == "*": + return ActionResult.isFailure(error="No valid upload path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.") + + # Validate pathQuery format + if not upload_path.startswith('/'): + return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work") + + # Check if upload_path contains search terms (words without proper path structure) + if not upload_path.startswith('/site:') and not upload_path.startswith('/Documents') and not upload_path.startswith('/Shared Documents'): + return ActionResult.isFailure(error=f"Invalid pathQuery '{upload_path}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.") + + # For pathQuery, we need to discover sites to find the specific one + sites = await self._discoverSharePointSites(connection["accessToken"]) + if not sites: + return ActionResult.isFailure(error="No SharePoint sites found or accessible") - # Resolve path query into upload paths - upload_paths = self._resolvePathQuery(pathQuery) + if not sites: + return ActionResult.isFailure(error="No valid target site determined for upload") + + # Process upload paths based on whether pathObject was provided + upload_site_scope = None + if not pathObject: + # Parse the validated pathQuery to extract site and path information + parsed = self._parse_site_scoped_path(upload_path) + if not parsed: + return ActionResult.isFailure(error="Invalid upload_path. Use /site:/") + + # Find matching site + candidate_sites = self._filter_sites_by_hint(sites, parsed["siteName"]) # substring match + # Choose exact displayName match if available + exact = [s for s in candidate_sites if (s.get("displayName") or "").strip().lower() == parsed["siteName"].strip().lower()] + selected_site = exact[0] if exact else (candidate_sites[0] if candidate_sites else None) + if not selected_site: + return ActionResult.isFailure(error=f"SharePoint site '{parsed['siteName']}' not found or not accessible") + + upload_site_scope = selected_site + # Use the inner path portion as the actual upload target path + upload_paths = [f"/{parsed['innerPath'].lstrip('/')}"] + sites = [selected_site] + else: + # When using pathObject, check if upload_path is a folder ID or a path + if upload_path.startswith('01PPXICCB') or upload_path.startswith('01'): + # It's a folder ID - use it directly + upload_paths = [upload_path] + logger.info(f"Using folder ID directly for upload: {upload_path}") + else: + # It's a path - resolve it normally + upload_paths = self._resolvePathQuery(upload_path) # Process each document upload upload_results = [] @@ -906,12 +1240,26 @@ class MethodSharepoint(MethodBase): # Use the first upload path or default to Documents upload_path = upload_paths[0] if upload_paths else "/Documents" - upload_path = upload_path.rstrip('/') + '/' + fileName - upload_path_clean = upload_path.lstrip('/') + + # Handle wildcard paths - replace with default Documents folder + if upload_path == "*": + upload_path = "/Documents" + logger.warning(f"Wildcard path '*' detected, using default '/Documents' folder for upload") + + # Check if upload_path is a folder ID or a regular path + if upload_path.startswith('01PPXICCB') or upload_path.startswith('01'): + # It's a folder ID - use the folder-specific upload endpoint + upload_endpoint = f"sites/{site_id}/drive/items/{upload_path}:/{fileName}:/content" + logger.info(f"Using folder ID upload endpoint: {upload_endpoint}") + else: + # It's a regular path - use the root-based upload endpoint + upload_path = upload_path.rstrip('/') + '/' + fileName + upload_path_clean = upload_path.lstrip('/') + upload_endpoint = f"sites/{site_id}/drive/root:/{upload_path_clean}:/content" + logger.info(f"Using path-based upload endpoint: {upload_endpoint}") # Upload endpoint for small files (< 4MB) if len(file_data) < 4 * 1024 * 1024: # 4MB - upload_endpoint = f"sites/{site_id}/drive/root:/{upload_path_clean}:/content" # Upload the file upload_result = await self._makeGraphApiCall( @@ -929,6 +1277,7 @@ class MethodSharepoint(MethodBase): "siteName": site_name, "siteUrl": site_url, "uploadPath": upload_path, + "uploadEndpoint": upload_endpoint, "sharepointFileId": upload_result.get("id"), "webUrl": upload_result.get("webUrl"), "size": upload_result.get("size"), @@ -963,7 +1312,7 @@ class MethodSharepoint(MethodBase): # Create result data result_data = { "connectionReference": connectionReference, - "pathQuery": pathQuery, + "pathQuery": upload_path, "documentList": documentList, "fileNames": fileNames, "sitesAvailable": len(sites), @@ -976,18 +1325,10 @@ class MethodSharepoint(MethodBase): "timestamp": get_utc_timestamp() } - # Determine output format based on expected formats + # Use default JSON format for output output_extension = ".json" # Default output_mime_type = "application/json" # Default - if expectedDocumentFormats and len(expectedDocumentFormats) > 0: - # Use the first expected format - expected_format = expectedDocumentFormats[0] - output_extension = expected_format.get("extension", ".json") - output_mime_type = expected_format.get("mimeType", "application/json") - logger.info(f"Using expected format: {output_extension} ({output_mime_type})") - else: - logger.info("No expected format specified, using default .json format") return ActionResult( success=True, @@ -1014,82 +1355,194 @@ class MethodSharepoint(MethodBase): Parameters: connectionReference (str): Reference to the Microsoft connection - searchQuery (str): [path:][type:][mode:]query - "Test Plan", "folders:Test Plan", "/Documents", "*" - Note: Use "folders:Name" to search for folders anywhere, not "path:/Name" which looks only in root - resultDocument (str, optional): JSON result document from findDocumentPath action (alternative to searchQuery) + pathObject (str, optional): Path object to locate documents. This can ONLY be a reference to a result from sharepoint.findDocumentPath action + pathQuery (str, optional): Path query to locate documents, only if no pathObject is provided (e.g., "/Documents/Project1", "*" for all sites) includeSubfolders (bool, optional): Whether to include subfolders (default: False) - expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description """ try: connectionReference = parameters.get("connectionReference") - searchQuery = parameters.get("searchQuery", "*") - resultDocument = parameters.get("resultDocument") + pathObject = parameters.get("pathObject") + pathQuery = parameters.get("pathQuery") includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX - expectedDocumentFormats = parameters.get("expectedDocumentFormats", []) + + list_query = pathQuery + logger.info(f"Using pathQuery: {pathQuery}") if not connectionReference: return ActionResult.isFailure(error="Connection reference is required") - # If resultDocument is provided, resolve the reference and extract folder IDs from it - if resultDocument: + # If pathObject is provided, resolve the reference and extract folder IDs from it + # Note: pathObject takes precedence over pathQuery when both are provided + if pathObject: + if pathQuery and pathQuery != "*": + logger.debug(f"Both pathObject and pathQuery provided - using pathObject (pathQuery '{pathQuery}' will be ignored)") try: import json # Resolve the reference label to get the actual document list - document_list = self.service.getChatDocumentsFromDocumentList([resultDocument]) + document_list = self.service.getChatDocumentsFromDocumentList([pathObject]) if not document_list or len(document_list) == 0: - return ActionResult.isFailure(error=f"No document list found for reference: {resultDocument}") + return ActionResult.isFailure(error=f"No document list found for reference: {pathObject}") # Get the first document's content (which should be the JSON) first_document = document_list[0] logger.info(f"Document fileId: {first_document.fileId}, fileName: {first_document.fileName}") file_data = self.service.getFileData(first_document.fileId) if not file_data: - return ActionResult.isFailure(error=f"No file data found for document: {resultDocument} (fileId: {first_document.fileId})") + return ActionResult.isFailure(error=f"No file data found for document: {pathObject} (fileId: {first_document.fileId})") logger.info(f"File data length: {len(file_data) if file_data else 0}") # Parse the JSON content result_data = json.loads(file_data) - found_documents = result_data.get("foundDocuments", []) - # Extract folder IDs from the result - folder_ids = [] + # Debug: Log the structure of the result document + logger.info(f"Result document keys: {list(result_data.keys())}") + + # Handle different result document formats + found_documents = [] + + # Check if it's a direct SharePoint result (has foundDocuments) + if "foundDocuments" in result_data: + found_documents = result_data.get("foundDocuments", []) + logger.info(f"Found {len(found_documents)} documents in foundDocuments array") + # Check if it's an AI validation result (has result string with validationReport) + elif "result" in result_data and "validationReport" in result_data["result"]: + try: + # Parse the nested JSON in the result field + nested_result = json.loads(result_data["result"]) + validation_report = nested_result.get("validationReport", {}) + document_details = validation_report.get("documentDetails", {}) + + if document_details: + # Convert the single document details to the expected format + doc = { + "id": document_details.get("id"), + "name": document_details.get("name"), + "type": document_details.get("type", "").lower(), # Convert "Folder" to "folder" + "siteName": document_details.get("siteName"), + "siteId": document_details.get("siteId"), + "fullPath": document_details.get("fullPath"), + "webUrl": document_details.get("webUrl", ""), + "parentPath": document_details.get("parentPath", "") + } + found_documents = [doc] + logger.info(f"Extracted 1 document from validation report") + except json.JSONDecodeError as e: + logger.error(f"Failed to parse nested JSON in result field: {e}") + return ActionResult.isFailure(error=f"Invalid nested JSON in pathObject: {str(e)}") + + # Debug: Log what we found in the result document + logger.info(f"Result document contains {len(found_documents)} documents") + for i, doc in enumerate(found_documents): + logger.info(f" Document {i+1}: name='{doc.get('name')}', type='{doc.get('type')}', id='{doc.get('id')}'") + + # Extract folder information from the result + folders = [] for doc in found_documents: if doc.get("type") == "folder": - folder_ids.append(doc.get("id")) + folders.append(doc) - if folder_ids: - # Use the first folder ID found - searchQuery = folder_ids[0] - logger.info(f"Using folder ID from resultDocument: {searchQuery}") + logger.info(f"Found {len(folders)} folders in result document") + + if folders: + # Use the first folder found - prefer folder ID for direct API calls + first_folder = folders[0] + if first_folder.get("id"): + # Use folder ID directly for most reliable API calls + list_query = first_folder.get("id") + logger.info(f"Using folder ID from pathObject: {list_query}") + elif first_folder.get("fullPath"): + # Extract the correct path portion from fullPath by removing site name + full_path = first_folder.get("fullPath") + # fullPath format: \\SiteName\\Library\\Folder\\SubFolder + # We need to remove the first two parts (\\SiteName\\) to get the actual folder path + path_parts = full_path.lstrip('\\').split('\\') + if len(path_parts) > 1: + # Remove the first part (site name) and reconstruct the path + actual_path = '\\'.join(path_parts[1:]) + list_query = actual_path + logger.info(f"Extracted path from fullPath: {list_query}") + else: + list_query = full_path + logger.info(f"Using full path from pathObject (no site name to remove): {list_query}") + else: + return ActionResult.isFailure(error="No valid folder information found in pathObject") else: - return ActionResult.isFailure(error="No folders found in resultDocument") + return ActionResult.isFailure(error="No folders found in pathObject") except json.JSONDecodeError as e: - return ActionResult.isFailure(error=f"Invalid JSON in resultDocument: {str(e)}") + return ActionResult.isFailure(error=f"Invalid JSON in pathObject: {str(e)}") except Exception as e: - return ActionResult.isFailure(error=f"Error resolving resultDocument reference: {str(e)}") + return ActionResult.isFailure(error=f"Error resolving pathObject reference: {str(e)}") # Get Microsoft connection connection = self._getMicrosoftConnection(connectionReference) if not connection: return ActionResult.isFailure(error="No valid Microsoft connection found for the provided connection reference") - logger.info(f"Starting SharePoint listDocuments for searchQuery: {searchQuery}") + logger.info(f"Starting SharePoint listDocuments for list_query: {list_query}") logger.debug(f"Connection ID: {connection['id']}") - # Parse searchQuery to extract path, search terms, search type, and options - pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(searchQuery) + # Parse list_query to extract path, search terms, search type, and options + pathQuery, fileQuery, searchType, searchOptions = self._parseSearchQuery(list_query) + + # Determine sites to use - strict validation: pathObject → pathQuery → ERROR + sites = None + + # Step 1: Check pathObject first + if pathObject: + # When pathObject is provided, we should have specific site information + # Extract site information from the pathObject result + try: + # Get the site information from the first folder in pathObject + if 'found_documents' in locals() and found_documents: + first_folder = found_documents[0] + site_name = first_folder.get("siteName") + site_id = first_folder.get("siteId") + + if site_name and site_id: + # Use the specific site from pathObject instead of discovering all sites + sites = [{ + "id": site_id, + "displayName": site_name, + "webUrl": first_folder.get("webUrl", "") + }] + logger.info(f"Using specific site from pathObject: {site_name} (ID: {site_id})") + else: + # Site info missing from pathObject - this is an error + return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for list operation.") + else: + # No documents found in pathObject - this is an error + return ActionResult.isFailure(error="No valid folder information found in pathObject. Cannot determine target site for list operation.") + except Exception as e: + # Error processing pathObject - this is an error + return ActionResult.isFailure(error=f"Error processing pathObject: {str(e)}. Cannot determine target site for list operation.") + + # Step 2: If no pathObject, check pathQuery + elif pathQuery and pathQuery.strip() != "" and pathQuery.strip() != "*": + # Validate pathQuery format + if not pathQuery.startswith('/'): + return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:/... e.g. /site:KM LayerFinance/Documents/Work") + + # Check if pathQuery contains search terms (words without proper path structure) + if not pathQuery.startswith('/site:') and not pathQuery.startswith('/Documents') and not pathQuery.startswith('/Shared Documents'): + return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.") + + # For pathQuery, we need to discover sites to find the specific one + sites = await self._discoverSharePointSites(connection["accessToken"]) + if not sites: + return ActionResult.isFailure(error="No SharePoint sites found or accessible") + else: + # Step 3: Both pathObject and pathQuery failed - ERROR, NO FALLBACK + return ActionResult.isFailure(error="No valid list path provided. Either provide pathObject (from findDocumentPath) or a valid pathQuery with specific site information.") - # Discover all SharePoint sites accessible to the user - sites = await self._discoverSharePointSites(connection["accessToken"]) if not sites: - return ActionResult.isFailure(error="No SharePoint sites found or accessible") + return ActionResult.isFailure(error="No valid target site determined for list operation") - # Check if searchQuery is a folder ID (starts with 01PPXICCB...) - if searchQuery.startswith('01PPXICCB') or searchQuery.startswith('01'): + # Check if list_query is a folder ID (starts with 01PPXICCB...) + if list_query.startswith('01PPXICCB') or list_query.startswith('01'): # Direct folder ID - use it directly - folder_paths = [searchQuery] - logger.info(f"Using direct folder ID: {searchQuery}") + folder_paths = [list_query] + logger.info(f"Using direct folder ID: {list_query}") else: # Resolve path query into folder paths folder_paths = self._resolvePathQuery(pathQuery) @@ -1133,6 +1586,19 @@ class MethodSharepoint(MethodBase): processed_items = [] for item in items: + # Use improved folder detection logic + is_folder = False + if 'folder' in item: + is_folder = True + else: + # Try to detect by URL pattern or other indicators + web_url = item.get('webUrl', '') + name = item.get('name', '') + + # Check if URL has no file extension and looks like a folder path + if '.' not in name and ('/' in web_url or '\\' in web_url): + is_folder = True + item_info = { "id": item.get("id"), "name": item.get("name"), @@ -1140,7 +1606,7 @@ class MethodSharepoint(MethodBase): "createdDateTime": item.get("createdDateTime"), "lastModifiedDateTime": item.get("lastModifiedDateTime"), "webUrl": item.get("webUrl"), - "type": "folder" if "folder" in item else "file", + "type": "folder" if is_folder else "file", "siteName": site_name, "siteUrl": site_url } @@ -1162,7 +1628,8 @@ class MethodSharepoint(MethodBase): # If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only) if includeSubfolders: - logger.info(f"Including subfolders - processing {len([item for item in processed_items if item['type'] == 'folder'])} folders") + folder_items = [item for item in processed_items if item['type'] == 'folder'] + logger.info(f"Including subfolders - processing {len(folder_items)} folders") subfolder_count = 0 max_subfolders = 10 # Limit to prevent infinite loops @@ -1179,6 +1646,19 @@ class MethodSharepoint(MethodBase): logger.debug(f"Found {len(subfolder_items)} items in subfolder {item['name']}") for subfolder_item in subfolder_items: + # Use improved folder detection logic for subfolder items + subfolder_is_folder = False + if 'folder' in subfolder_item: + subfolder_is_folder = True + else: + # Try to detect by URL pattern or other indicators + subfolder_web_url = subfolder_item.get('webUrl', '') + subfolder_name = subfolder_item.get('name', '') + + # Check if URL has no file extension and looks like a folder path + if '.' not in subfolder_name and ('/' in subfolder_web_url or '\\' in subfolder_web_url): + subfolder_is_folder = True + # Only add files and direct subfolders, NO RECURSION subfolder_item_info = { "id": subfolder_item.get("id"), @@ -1187,7 +1667,7 @@ class MethodSharepoint(MethodBase): "createdDateTime": subfolder_item.get("createdDateTime"), "lastModifiedDateTime": subfolder_item.get("lastModifiedDateTime"), "webUrl": subfolder_item.get("webUrl"), - "type": "folder" if "folder" in subfolder_item else "file", + "type": "folder" if subfolder_is_folder else "file", "parentPath": subfolder_path, "siteName": site_name, "siteUrl": site_url @@ -1231,25 +1711,17 @@ class MethodSharepoint(MethodBase): # Create result data result_data = { - "searchQuery": searchQuery, + "pathQuery": list_query, "includeSubfolders": includeSubfolders, "sitesSearched": len(sites), "listResults": list_results, "timestamp": get_utc_timestamp() } - # Determine output format based on expected formats + # Use default JSON format for output output_extension = ".json" # Default output_mime_type = "application/json" # Default - if expectedDocumentFormats and len(expectedDocumentFormats) > 0: - # Use the first expected format - expected_format = expectedDocumentFormats[0] - output_extension = expected_format.get("extension", ".json") - output_mime_type = expected_format.get("mimeType", "application/json") - logger.info(f"Using expected format: {output_extension} ({output_mime_type})") - else: - logger.info("No expected format specified, using default .json format") return ActionResult( success=True, diff --git a/modules/routes/routeSecurityGoogle.py b/modules/routes/routeSecurityGoogle.py index 33f77c49..d3921b62 100644 --- a/modules/routes/routeSecurityGoogle.py +++ b/modules/routes/routeSecurityGoogle.py @@ -120,7 +120,7 @@ async def login( access_type="offline", include_granted_scopes="true", state=state_param, - prompt="select_account" + prompt="consent select_account" ) logger.info(f"Generated Google OAuth URL using OAuth2Session: {auth_url}") @@ -166,6 +166,33 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse "token_type": token_data.get("token_type", "bearer"), "expires_in": token_data.get("expires_in", 0) } + + # If Google did not return a refresh_token, try to reuse an existing one for this user/connection + if not token_response.get("refresh_token"): + try: + rootInterface = getRootInterface() + # Prefer connection flow reuse; fallback to user access token + if connection_id: + existing_tokens = rootInterface.db.getRecordset("tokens", recordFilter={ + "connectionId": connection_id, + "authority": AuthAuthority.GOOGLE + }) + if existing_tokens: + # Use most recent by createdAt + existing_tokens.sort(key=lambda x: x.get("createdAt", 0), reverse=True) + token_response["refresh_token"] = existing_tokens[0].get("tokenRefresh", "") + if not token_response.get("refresh_token") and user_id: + existing_access_tokens = rootInterface.db.getRecordset("tokens", recordFilter={ + "userId": user_id, + "connectionId": None, + "authority": AuthAuthority.GOOGLE + }) + if existing_access_tokens: + existing_access_tokens.sort(key=lambda x: x.get("createdAt", 0), reverse=True) + token_response["refresh_token"] = existing_access_tokens[0].get("tokenRefresh", "") + except Exception: + # Non-fatal; continue without refresh token + pass diff --git a/modules/security/tokenManager.py b/modules/security/tokenManager.py index c7cf4cf0..ce34433a 100644 --- a/modules/security/tokenManager.py +++ b/modules/security/tokenManager.py @@ -98,8 +98,7 @@ class TokenManager: "client_id": self.google_client_id, "client_secret": self.google_client_secret, "grant_type": "refresh_token", - "refresh_token": refresh_token, - "scope": "https://www.googleapis.com/auth/gmail.readonly https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email openid" + "refresh_token": refresh_token } # Make refresh request diff --git a/test_graph_search.py b/test_graph_search.py new file mode 100644 index 00000000..981aa778 --- /dev/null +++ b/test_graph_search.py @@ -0,0 +1,311 @@ +#!/usr/bin/env python3 +""" +Simple test script for Microsoft Graph Search API +Tests folder search queries directly +""" + +import requests +import json +import sys +import os + +# Add the gateway modules to the path +sys.path.append(os.path.dirname(os.path.abspath(__file__))) + +def test_graph_folders_direct(access_token): + """Test direct Microsoft Graph API call to list folders""" + print("🔍 Testing direct Graph API folder listing...") + + # Try to list folders from the main site - need to get site ID first + # Let's try to find the site by name first + url = "https://graph.microsoft.com/v1.0/sites/pcuster.sharepoint.com:/sites/SSSRESYNachfolge:/drive/root/children" + + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json" + } + + try: + response = requests.get(url, headers=headers) + + if response.status_code == 200: + data = response.json() + items = data.get('value', []) + print(f"✅ SUCCESS - Found {len(items)} items in root") + + folders = [] + files = [] + + for item in items: + if 'folder' in item: + folders.append(item) + elif 'file' in item: + files.append(item) + + print(f" 📁 Folders: {len(folders)}") + print(f" 📄 Files: {len(files)}") + + if folders: + print("\n📁 FOLDERS found:") + for i, folder in enumerate(folders[:5], 1): + name = folder.get('name', 'No name') + web_url = folder.get('webUrl', 'No URL') + print(f" {i}. {name}") + print(f" URL: {web_url}") + print() + + else: + print(f"❌ ERROR - Status {response.status_code}") + print(f"Error: {response.text[:200]}") + + except Exception as e: + print(f"Exception: {str(e)}") + +def test_graph_search(access_token, query_string): + """Test a Microsoft Graph Search API query and show resulting paths""" + + url = "https://graph.microsoft.com/v1.0/search/query" + + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json" + } + + payload = { + "requests": [ + { + "entityTypes": ["driveItem"], + "query": { + "queryString": query_string + }, + "from": 0, + "size": 50 + } + ] + } + + print(f"Testing: {query_string}") + print("-" * 50) + + try: + response = requests.post(url, headers=headers, json=payload) + + if response.status_code == 200: + data = response.json() + + # Extract useful info + if "value" in data and len(data["value"]) > 0: + hits = data["value"][0].get("hitsContainers", []) + if hits: + total = hits[0].get("total", 0) + results = hits[0].get("hits", []) + print(f"✅ SUCCESS - Found {total} results") + + # First, let's see what types of results we're getting + print(f"📊 Analyzing {len(results)} results...") + + # Count different types of results with better detection + file_count = 0 + folder_count = 0 + other_count = 0 + + # Debug: Let's see what the actual resource structure looks like + if results: + print("🔍 DEBUG: First result structure:") + first_result = results[0] + print(f" Keys: {list(first_result.keys())}") + if 'resource' in first_result: + resource = first_result['resource'] + print(f" Resource keys: {list(resource.keys())}") + if 'folder' in resource: + print(f" Folder info: {resource['folder']}") + if 'file' in resource: + print(f" File info: {resource['file']}") + print() + + for result in results: + resource = result.get('resource', {}) + + # Better detection logic + is_folder = False + is_file = False + + # Check for explicit folder/file indicators + if 'folder' in resource: + is_folder = True + elif 'file' in resource: + is_file = True + else: + # Try to detect by URL pattern or other indicators + web_url = resource.get('webUrl', '') + name = resource.get('name', '') + + # Check if URL ends with a file extension (likely a file) + if '.' in name and any(name.lower().endswith(ext) for ext in ['.pdf', '.docx', '.xlsx', '.pptx', '.txt', '.cs', '.py', '.js', '.html', '.css']): + is_file = True + # Check if URL has no file extension and looks like a folder path + elif '.' not in name and ('/' in web_url or '\\' in web_url): + is_folder = True + + if is_folder: + folder_count += 1 + elif is_file: + file_count += 1 + else: + other_count += 1 + + print(f" 📄 Files: {file_count}") + print(f" 📁 Folders: {folder_count}") + print(f" ❓ Other: {other_count}") + print() + + # Show sample results regardless of type + print(f"📋 Sample results (showing first 5):") + for i, result in enumerate(results[:5], 1): + resource = result.get('resource', {}) + web_url = resource.get('webUrl', 'No URL') + name = resource.get('name', 'No name') + + # Determine type using same logic as counting + is_folder = False + is_file = False + + if 'folder' in resource: + is_folder = True + elif 'file' in resource: + is_file = True + else: + # Try to detect by URL pattern or other indicators + web_url = resource.get('webUrl', '') + name = resource.get('name', '') + + # Check if URL ends with a file extension (likely a file) + if '.' in name and any(name.lower().endswith(ext) for ext in ['.pdf', '.docx', '.xlsx', '.pptx', '.txt', '.cs', '.py', '.js', '.html', '.css']): + is_file = True + # Check if URL has no file extension and looks like a folder path + elif '.' not in name and ('/' in web_url or '\\' in web_url): + is_folder = True + + if is_folder: + item_type = "📁 FOLDER" + elif is_file: + file_info = resource.get('file', {}) + mime_type = file_info.get('mimeType', 'Unknown type') if file_info else 'Detected by extension' + item_type = f"📄 FILE ({mime_type})" + else: + item_type = "❓ UNKNOWN" + + # Extract path from webUrl + if '/sites/SSSRESYNachfolge/' in web_url: + path_part = web_url.split('/sites/SSSRESYNachfolge/')[-1] + path_with_backslashes = path_part.replace('/', '\\') + display_path = f"\\{path_with_backslashes}" + else: + display_path = web_url + + print(f" {i}. {item_type} - {name}") + print(f" Path: {display_path}") + print(f" URL: {web_url}") + print() + + if len(results) > 5: + print(f" ... and {len(results) - 5} more results") + + # Now filter and show only FOLDER results if any exist + folder_results = [] + for result in results: + resource = result.get('resource', {}) + + # Use the same detection logic as counting + is_folder = False + if 'folder' in resource: + is_folder = True + else: + # Try to detect by URL pattern or other indicators + web_url = resource.get('webUrl', '') + name = resource.get('name', '') + + # Check if URL has no file extension and looks like a folder path + if '.' not in name and ('/' in web_url or '\\' in web_url): + is_folder = True + + if is_folder: + folder_results.append(result) + + if folder_results: + print(f"\n📁 FOLDER DETAILS ({len(folder_results)} folders found):") + for i, result in enumerate(folder_results, 1): + web_url = result.get('resource', {}).get('webUrl', 'No URL') + name = result.get('resource', {}).get('name', 'No name') + + if '/sites/SSSRESYNachfolge/' in web_url: + path_part = web_url.split('/sites/SSSRESYNachfolge/')[-1] + path_with_backslashes = path_part.replace('/', '\\') + folder_path = f"\\{path_with_backslashes}" + else: + folder_path = web_url + + print(f" {i}. 📁 {name}") + print(f" Path: {folder_path}") + print(f" URL: {web_url}") + print() + else: + print(f"\n❌ No folders found in results - all {total} results are files or other types") + else: + print("❌ SUCCESS but no hits containers found") + else: + print("❌ SUCCESS but no value array in response") + + else: + print(f"❌ ERROR - Status {response.status_code}") + error_text = response.text[:200] + "..." if len(response.text) > 200 else response.text + print(f"Error: {error_text}") + + except Exception as e: + print(f"Exception: {str(e)}") + +def main(): + """Main test function""" + + # Use the access token from the database + access_token = "eyJ0eXAiOiJKV1QiLCJub25jZSI6IkxwTjBjTXo2SGlja2ZPLUpnekRwTFE1QktfQmVOWHBwRWZ2UzZBMDh2REUiLCJhbGciOiJSUzI1NiIsIng1dCI6IkpZaEFjVFBNWl9MWDZEQmxPV1E3SG4wTmVYRSIsImtpZCI6IkpZaEFjVFBNWl9MWDZEQmxPV1E3SG4wTmVYRSJ9.eyJhdWQiOiIwMDAwMDAwMy0wMDAwLTAwMDAtYzAwMC0wMDAwMDAwMDAwMDAiLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC82YTUxYWFlYi0yNDY3LTQxODYtOTUwNC0yYTA1YWVkYzU5MWYvIiwiaWF0IjoxNzU3MDEwNTc0LCJuYmYiOjE3NTcwMTA1NzQsImV4cCI6MTc1NzAxNTQ1MSwiYWNjdCI6MCwiYWNyIjoiMSIsImFjcnMiOlsicDEiXSwiYWlvIjoiQVpRQWEvOFpBQUFBcU0xNVFOMkhaQld5QXNsbStiT0QzbzRuU1RhUzg5bGdTV3ZUQVZvYVhqcUhlT1VaNFE1aEh0bE51WUdxelEvM0tDRnZlZktycU1HTUp2VmlVaWVibUhjbnBtL0FaRFA1Sk1YNnI4c1FCSVdLVTZPY29sUUNuOWpvcVZLb1VIOFl3WTJhM3picTlkeGdqVC94dU5NaCtKcXhMV1JMdEUrUjBZeGl0c3J0QXhpd0pRaGZmalIzK0xPSGtmVkxhOExaIiwiYW1yIjpbInB3ZCIsIm1mYSJdLCJhcHBfZGlzcGxheW5hbWUiOiJQb3dlck9uIEFwcCIsImFwcGlkIjoiYzdlNzExMmQtNjFkYy00ZjNhLThjZDMtMDhjYzRjZDc1MDRjIiwiYXBwaWRhY3IiOiIxIiwiZmFtaWx5X25hbWUiOiJNb3RzY2giLCJnaXZlbl9uYW1lIjoiUGF0cmljayIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjE3OC4xOTcuMjE4LjQ4IiwibmFtZSI6IlBhdHJpY2sgTW90c2NoIiwib2lkIjoiN2QwOGFhYjktYTE3MC00OTc1LTg4OTgtYmM3ZTBhOTU0ODhlIiwicGxhdGYiOiIzIiwicHVpZCI6IjEwMDM3RkZFOENERDZBODIiLCJyaCI6IjEuQVFzQTY2cFJhbWNraGtHVkJDb0ZydHhaSHdNQUFBQUFBQUFBd0FBQUFBQUFBQUNFQURBTEFBLiIsInNjcCI6IkZpbGVzLlJlYWRXcml0ZS5BbGwgTWFpbC5SZWFkV3JpdGUgTWFpbC5SZWFkV3JpdGUuU2hhcmVkIE1haWwuU2VuZCBvcGVuaWQgcHJvZmlsZSBTaXRlcy5SZWFkV3JpdGUuQWxsIFVzZXIuUmVhZCBlbWFpbCIsInNpZCI6IjAwNmY5Mjk5LTY3ZDUtYmU3Zi1kYWI4LWQwYTBlZTI1MTBkNiIsInNpZ25pbl9zdGF0ZSI6WyJrbXNpIl0sInN1YiI6IklnMGlwM3hhZGJMaXVLemJGZ3dWaE5JTV9Eekcwd3B4aUVGYjJKWXVjbjQiLCJ0ZW5hbnRfcmVnaW9uX3Njb3BlIjoiRVUiLCJ0aWQiOiI2YTUxYWFlYi0yNDY3LTQxODYtOTUwNC0yYTA1YWVkYzU5MWYiLCJ1bmlxdWVfbmFtZSI6InAubW90c2NoQHZhbHVlb24uY2giLCJ1cG4iOiJwLm1vdHNjaEB2YWx1ZW9uLmNoIiwidXRpIjoieTh5ZGhEcWRDMG1nVTBpLV94azFBUSIsInZlciI6IjEuMCIsIndpZHMiOlsiOWI4OTVkOTItMmNkMy00NGM3LTlkMDItYTZhYzJkNWVhNWMzIiwiY2YxYzM4ZTUtMzYyMS00MDA0LWE3Y2ItODc5NjI0ZGNlZDdjIiwiMTU4YzA0N2EtYzkwNy00NTU2LWI3ZWYtNDQ2NTUxYTZiNWY3IiwiODkyYzU4NDItYTlhNi00NjNhLTgwNDEtNzJhYTA4Y2EzY2Y2IiwiOWYwNjIwNGQtNzNjMS00ZDRjLTg4MGEtNmVkYjkwNjA2ZmQ4IiwiYjc5ZmJmNGQtM2VmOS00Njg5LTgxNDMtNzZiMTk0ZTg1NTA5Il0sInhtc19mdGQiOiIwcEZ4RVctQnl6Y3M5UW5HdXNDbU1Ka1V4MHNQWlEzOUkzWUwxRGZJdnpzQmMzZGxaR1Z1WXkxa2MyMXoiLCJ4bXNfaWRyZWwiOiIxIDI0IiwieG1zX3N0Ijp7InN1YiI6IlIydkQwRzFtbWFZUkM3SllXY0lTWlcyS0RQZ05CakJMRmw2ZUxBQl9QVU0ifSwieG1zX3RjZHQiOjE0MTgyMTQ1MDEsInhtc190ZGJyIjoiRVUifQ.JYEWH2YxBrgWSn-9WN3BixJ91q19RGd0U7HgiiLpmwKUicft8zrovO8wKVU5rkly6CBcEO_eGAvyqQHSjFLHXKGDrutrFVdLTLB0vUu3J1Lkw31CiJF_y6Y3r2VytOF8evcYwh_Ye-5eoAxIr5avR8j_T51RPkLG53QSJ-tA5utDgHGWa65T5-mmeZxI-ThYxfyLori1uS8TSchJBdwrWwv8pkklHn6lZrFfgiuviRjLrOOLVUL_fzIod_eOKjo31YHhUzfm-QD3vvQkqnWNcdQ4D0UaTxKW291fHFafQZ9SkH9m0BD9nn56QBqijUBhvA8qMZC_cObb3DpR0GR_xA" + + print("=" * 60) + print("Microsoft Graph API Test Suite") + print("=" * 60) + + # First test: Direct folder listing (should work better than search) + print("\nTEST 0: Direct Graph API folder listing") + test_graph_folders_direct(access_token) + + # Test different query types to find both files and folders + test_queries = [ + # Test 1: Test with Venus folder (empty folder created for testing) + "Venus", + + # Test 2: Folder-specific searches for Venus + "kind:folder AND Venus", + + # Test 3: Original specific query (found 8 results - all files) + "Druckersteuerung AND Eskalation AND Logobject", + + # Test 4: Broader folder-focused queries + "Druckersteuerung", + "Eskalation", + "Logobject", + + # Test 5: Folder-specific searches + "kind:folder AND Druckersteuerung", + "kind:folder AND Eskalation", + + # Test 6: General folder search to see what folders exist + "kind:folder", + ] + + for i, query in enumerate(test_queries, 1): + print(f"\nTEST {i}: {query}") + test_graph_search(access_token, query) + print() + +if __name__ == "__main__": + main()