gateway/test_graph_search.py

#!/usr/bin/env python3
"""
Simple test script for Microsoft Graph Search API
Tests folder search queries directly
"""

import requests
import json
import sys
import os

# Add the gateway modules to the path
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

def test_graph_folders_direct(access_token):
    """Test direct Microsoft Graph API call to list folders"""
    print("🔍 Testing direct Graph API folder listing...")

    # Try to list folders from the main site - need to get site ID first
    # Let's try to find the site by name first
    url = "https://graph.microsoft.com/v1.0/sites/pcuster.sharepoint.com:/sites/SSSRESYNachfolge:/drive/root/children"

    headers = {
        "Authorization": f"Bearer {access_token}",
        "Content-Type": "application/json"
    }

    try:
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            data = response.json()
            items = data.get('value', [])
            print(f"✅ SUCCESS - Found {len(items)} items in root")

            folders = []
            files = []

            for item in items:
                if 'folder' in item:
                    folders.append(item)
                elif 'file' in item:
                    files.append(item)

            print(f"   📁 Folders: {len(folders)}")
            print(f"   📄 Files: {len(files)}")

            if folders:
                print("\n📁 FOLDERS found:")
                for i, folder in enumerate(folders[:5], 1):
                    name = folder.get('name', 'No name')
                    web_url = folder.get('webUrl', 'No URL')
                    print(f"  {i}. {name}")
                    print(f"     URL: {web_url}")
                    print()

        else:
            print(f"❌ ERROR - Status {response.status_code}")
            print(f"Error: {response.text[:200]}")

    except Exception as e:
        print(f"Exception: {str(e)}")

def test_graph_search(access_token, query_string):
    """Test a Microsoft Graph Search API query and show resulting paths"""

    url = "https://graph.microsoft.com/v1.0/search/query"

    headers = {
        "Authorization": f"Bearer {access_token}",
        "Content-Type": "application/json"
    }

    payload = {
        "requests": [
            {
                "entityTypes": ["driveItem"],
                "query": {
                    "queryString": query_string
                },
                "from": 0,
                "size": 50
            }
        ]
    }

    print(f"Testing: {query_string}")
    print("-" * 50)

    try:
        response = requests.post(url, headers=headers, json=payload)

        if response.status_code == 200:
            data = response.json()

            # Extract useful info
            if "value" in data and len(data["value"]) > 0:
                hits = data["value"][0].get("hitsContainers", [])
                if hits:
                    total = hits[0].get("total", 0)
                    results = hits[0].get("hits", [])
                    print(f"✅ SUCCESS - Found {total} results")

                    # First, let's see what types of results we're getting
                    print(f"📊 Analyzing {len(results)} results...")

                    # Count different types of results with better detection
                    file_count = 0
                    folder_count = 0
                    other_count = 0

                    # Debug: Let's see what the actual resource structure looks like
                    if results:
                        print("🔍 DEBUG: First result structure:")
                        first_result = results[0]
                        print(f"   Keys: {list(first_result.keys())}")
                        if 'resource' in first_result:
                            resource = first_result['resource']
                            print(f"   Resource keys: {list(resource.keys())}")
                            if 'folder' in resource:
                                print(f"   Folder info: {resource['folder']}")
                            if 'file' in resource:
                                print(f"   File info: {resource['file']}")
                        print()

                    for result in results:
                        resource = result.get('resource', {})

                        # Better detection logic
                        is_folder = False
                        is_file = False

                        # Check for explicit folder/file indicators
                        if 'folder' in resource:
                            is_folder = True
                        elif 'file' in resource:
                            is_file = True
                        else:
                            # Try to detect by URL pattern or other indicators
                            web_url = resource.get('webUrl', '')
                            name = resource.get('name', '')

                            # Check if URL ends with a file extension (likely a file)
                            if '.' in name and any(name.lower().endswith(ext) for ext in ['.pdf', '.docx', '.xlsx', '.pptx', '.txt', '.cs', '.py', '.js', '.html', '.css']):
                                is_file = True
                            # Check if URL has no file extension and looks like a folder path
                            elif '.' not in name and ('/' in web_url or '\\' in web_url):
                                is_folder = True

                        if is_folder:
                            folder_count += 1
                        elif is_file:
                            file_count += 1
                        else:
                            other_count += 1

                    print(f"   📄 Files: {file_count}")
                    print(f"   📁 Folders: {folder_count}")
                    print(f"   ❓ Other: {other_count}")
                    print()

                    # Show sample results regardless of type
                    print(f"📋 Sample results (showing first 5):")
                    for i, result in enumerate(results[:5], 1):
                        resource = result.get('resource', {})
                        web_url = resource.get('webUrl', 'No URL')
                        name = resource.get('name', 'No name')

                        # Determine type using same logic as counting
                        is_folder = False
                        is_file = False

                        if 'folder' in resource:
                            is_folder = True
                        elif 'file' in resource:
                            is_file = True
                        else:
                            # Try to detect by URL pattern or other indicators
                            web_url = resource.get('webUrl', '')
                            name = resource.get('name', '')

                            # Check if URL ends with a file extension (likely a file)
                            if '.' in name and any(name.lower().endswith(ext) for ext in ['.pdf', '.docx', '.xlsx', '.pptx', '.txt', '.cs', '.py', '.js', '.html', '.css']):
                                is_file = True
                            # Check if URL has no file extension and looks like a folder path
                            elif '.' not in name and ('/' in web_url or '\\' in web_url):
                                is_folder = True

                        if is_folder:
                            item_type = "📁 FOLDER"
                        elif is_file:
                            file_info = resource.get('file', {})
                            mime_type = file_info.get('mimeType', 'Unknown type') if file_info else 'Detected by extension'
                            item_type = f"📄 FILE ({mime_type})"
                        else:
                            item_type = "❓ UNKNOWN"

                        # Extract path from webUrl
                        if '/sites/SSSRESYNachfolge/' in web_url:
                            path_part = web_url.split('/sites/SSSRESYNachfolge/')[-1]
                            path_with_backslashes = path_part.replace('/', '\\')
                            display_path = f"\\{path_with_backslashes}"
                        else:
                            display_path = web_url

                        print(f"  {i}. {item_type} - {name}")
                        print(f"     Path: {display_path}")
                        print(f"     URL: {web_url}")
                        print()

                    if len(results) > 5:
                        print(f"     ... and {len(results) - 5} more results")

                    # Now filter and show only FOLDER results if any exist
                    folder_results = []
                    for result in results:
                        resource = result.get('resource', {})

                        # Use the same detection logic as counting
                        is_folder = False
                        if 'folder' in resource:
                            is_folder = True
                        else:
                            # Try to detect by URL pattern or other indicators
                            web_url = resource.get('webUrl', '')
                            name = resource.get('name', '')

                            # Check if URL has no file extension and looks like a folder path
                            if '.' not in name and ('/' in web_url or '\\' in web_url):
                                is_folder = True

                        if is_folder:
                            folder_results.append(result)

                    if folder_results:
                        print(f"\n📁 FOLDER DETAILS ({len(folder_results)} folders found):")
                        for i, result in enumerate(folder_results, 1):
                            web_url = result.get('resource', {}).get('webUrl', 'No URL')
                            name = result.get('resource', {}).get('name', 'No name')

                            if '/sites/SSSRESYNachfolge/' in web_url:
                                path_part = web_url.split('/sites/SSSRESYNachfolge/')[-1]
                                path_with_backslashes = path_part.replace('/', '\\')
                                folder_path = f"\\{path_with_backslashes}"
                            else:
                                folder_path = web_url

                            print(f"  {i}. 📁 {name}")
                            print(f"     Path: {folder_path}")
                            print(f"     URL: {web_url}")
                            print()
                    else:
                        print(f"\n❌ No folders found in results - all {total} results are files or other types")
                else:
                    print("❌ SUCCESS but no hits containers found")
            else:
                print("❌ SUCCESS but no value array in response")

        else:
            print(f"❌ ERROR - Status {response.status_code}")
            error_text = response.text[:200] + "..." if len(response.text) > 200 else response.text
            print(f"Error: {error_text}")

    except Exception as e:
        print(f"Exception: {str(e)}")

def main():
    """Main test function"""

    # Use the access token from the database
    access_token = "eyJ0eXAiOiJKV1QiLCJub25jZSI6IkxwTjBjTXo2SGlja2ZPLUpnekRwTFE1QktfQmVOWHBwRWZ2UzZBMDh2REUiLCJhbGciOiJSUzI1NiIsIng1dCI6IkpZaEFjVFBNWl9MWDZEQmxPV1E3SG4wTmVYRSIsImtpZCI6IkpZaEFjVFBNWl9MWDZEQmxPV1E3SG4wTmVYRSJ9.eyJhdWQiOiIwMDAwMDAwMy0wMDAwLTAwMDAtYzAwMC0wMDAwMDAwMDAwMDAiLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC82YTUxYWFlYi0yNDY3LTQxODYtOTUwNC0yYTA1YWVkYzU5MWYvIiwiaWF0IjoxNzU3MDEwNTc0LCJuYmYiOjE3NTcwMTA1NzQsImV4cCI6MTc1NzAxNTQ1MSwiYWNjdCI6MCwiYWNyIjoiMSIsImFjcnMiOlsicDEiXSwiYWlvIjoiQVpRQWEvOFpBQUFBcU0xNVFOMkhaQld5QXNsbStiT0QzbzRuU1RhUzg5bGdTV3ZUQVZvYVhqcUhlT1VaNFE1aEh0bE51WUdxelEvM0tDRnZlZktycU1HTUp2VmlVaWVibUhjbnBtL0FaRFA1Sk1YNnI4c1FCSVdLVTZPY29sUUNuOWpvcVZLb1VIOFl3WTJhM3picTlkeGdqVC94dU5NaCtKcXhMV1JMdEUrUjBZeGl0c3J0QXhpd0pRaGZmalIzK0xPSGtmVkxhOExaIiwiYW1yIjpbInB3ZCIsIm1mYSJdLCJhcHBfZGlzcGxheW5hbWUiOiJQb3dlck9uIEFwcCIsImFwcGlkIjoiYzdlNzExMmQtNjFkYy00ZjNhLThjZDMtMDhjYzRjZDc1MDRjIiwiYXBwaWRhY3IiOiIxIiwiZmFtaWx5X25hbWUiOiJNb3RzY2giLCJnaXZlbl9uYW1lIjoiUGF0cmljayIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjE3OC4xOTcuMjE4LjQ4IiwibmFtZSI6IlBhdHJpY2sgTW90c2NoIiwib2lkIjoiN2QwOGFhYjktYTE3MC00OTc1LTg4OTgtYmM3ZTBhOTU0ODhlIiwicGxhdGYiOiIzIiwicHVpZCI6IjEwMDM3RkZFOENERDZBODIiLCJyaCI6IjEuQVFzQTY2cFJhbWNraGtHVkJDb0ZydHhaSHdNQUFBQUFBQUFBd0FBQUFBQUFBQUNFQURBTEFBLiIsInNjcCI6IkZpbGVzLlJlYWRXcml0ZS5BbGwgTWFpbC5SZWFkV3JpdGUgTWFpbC5SZWFkV3JpdGUuU2hhcmVkIE1haWwuU2VuZCBvcGVuaWQgcHJvZmlsZSBTaXRlcy5SZWFkV3JpdGUuQWxsIFVzZXIuUmVhZCBlbWFpbCIsInNpZCI6IjAwNmY5Mjk5LTY3ZDUtYmU3Zi1kYWI4LWQwYTBlZTI1MTBkNiIsInNpZ25pbl9zdGF0ZSI6WyJrbXNpIl0sInN1YiI6IklnMGlwM3hhZGJMaXVLemJGZ3dWaE5JTV9Eekcwd3B4aUVGYjJKWXVjbjQiLCJ0ZW5hbnRfcmVnaW9uX3Njb3BlIjoiRVUiLCJ0aWQiOiI2YTUxYWFlYi0yNDY3LTQxODYtOTUwNC0yYTA1YWVkYzU5MWYiLCJ1bmlxdWVfbmFtZSI6InAubW90c2NoQHZhbHVlb24uY2giLCJ1cG4iOiJwLm1vdHNjaEB2YWx1ZW9uLmNoIiwidXRpIjoieTh5ZGhEcWRDMG1nVTBpLV94azFBUSIsInZlciI6IjEuMCIsIndpZHMiOlsiOWI4OTVkOTItMmNkMy00NGM3LTlkMDItYTZhYzJkNWVhNWMzIiwiY2YxYzM4ZTUtMzYyMS00MDA0LWE3Y2ItODc5NjI0ZGNlZDdjIiwiMTU4YzA0N2EtYzkwNy00NTU2LWI3ZWYtNDQ2NTUxYTZiNWY3IiwiODkyYzU4NDItYTlhNi00NjNhLTgwNDEtNzJhYTA4Y2EzY2Y2IiwiOWYwNjIwNGQtNzNjMS00ZDRjLTg4MGEtNmVkYjkwNjA2ZmQ4IiwiYjc5ZmJmNGQtM2VmOS00Njg5LTgxNDMtNzZiMTk0ZTg1NTA5Il0sInhtc19mdGQiOiIwcEZ4RVctQnl6Y3M5UW5HdXNDbU1Ka1V4MHNQWlEzOUkzWUwxRGZJdnpzQmMzZGxaR1Z1WXkxa2MyMXoiLCJ4bXNfaWRyZWwiOiIxIDI0IiwieG1zX3N0Ijp7InN1YiI6IlIydkQwRzFtbWFZUkM3SllXY0lTWlcyS0RQZ05CakJMRmw2ZUxBQl9QVU0ifSwieG1zX3RjZHQiOjE0MTgyMTQ1MDEsInhtc190ZGJyIjoiRVUifQ.JYEWH2YxBrgWSn-9WN3BixJ91q19RGd0U7HgiiLpmwKUicft8zrovO8wKVU5rkly6CBcEO_eGAvyqQHSjFLHXKGDrutrFVdLTLB0vUu3J1Lkw31CiJF_y6Y3r2VytOF8evcYwh_Ye-5eoAxIr5avR8j_T51RPkLG53QSJ-tA5utDgHGWa65T5-mmeZxI-ThYxfyLori1uS8TSchJBdwrWwv8pkklHn6lZrFfgiuviRjLrOOLVUL_fzIod_eOKjo31YHhUzfm-QD3vvQkqnWNcdQ4D0UaTxKW291fHFafQZ9SkH9m0BD9nn56QBqijUBhvA8qMZC_cObb3DpR0GR_xA"

    print("=" * 60)
    print("Microsoft Graph API Test Suite")
    print("=" * 60)

    # First test: Direct folder listing (should work better than search)
    print("\nTEST 0: Direct Graph API folder listing")
    test_graph_folders_direct(access_token)

    # Test different query types to find both files and folders
    test_queries = [
        # Test 1: Test with Venus folder (empty folder created for testing)
        "Venus",

        # Test 2: Folder-specific searches for Venus
        "kind:folder AND Venus",

        # Test 3: Original specific query (found 8 results - all files)
        "Druckersteuerung AND Eskalation AND Logobject",

        # Test 4: Broader folder-focused queries
        "Druckersteuerung",
        "Eskalation",
        "Logobject",

        # Test 5: Folder-specific searches
        "kind:folder AND Druckersteuerung",
        "kind:folder AND Eskalation",

        # Test 6: General folder search to see what folders exist
        "kind:folder",
    ]

    for i, query in enumerate(test_queries, 1):
        print(f"\nTEST {i}: {query}")
        test_graph_search(access_token, query)
        print()

if __name__ == "__main__":
    main()