gateway/test_graph_search.py
2025-09-04 23:40:07 +02:00

311 lines
16 KiB
Python

#!/usr/bin/env python3
"""
Simple test script for Microsoft Graph Search API
Tests folder search queries directly
"""
import requests
import json
import sys
import os
# Add the gateway modules to the path
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
def test_graph_folders_direct(access_token):
"""Test direct Microsoft Graph API call to list folders"""
print("🔍 Testing direct Graph API folder listing...")
# Try to list folders from the main site - need to get site ID first
# Let's try to find the site by name first
url = "https://graph.microsoft.com/v1.0/sites/pcuster.sharepoint.com:/sites/SSSRESYNachfolge:/drive/root/children"
headers = {
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/json"
}
try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
data = response.json()
items = data.get('value', [])
print(f"✅ SUCCESS - Found {len(items)} items in root")
folders = []
files = []
for item in items:
if 'folder' in item:
folders.append(item)
elif 'file' in item:
files.append(item)
print(f" 📁 Folders: {len(folders)}")
print(f" 📄 Files: {len(files)}")
if folders:
print("\n📁 FOLDERS found:")
for i, folder in enumerate(folders[:5], 1):
name = folder.get('name', 'No name')
web_url = folder.get('webUrl', 'No URL')
print(f" {i}. {name}")
print(f" URL: {web_url}")
print()
else:
print(f"❌ ERROR - Status {response.status_code}")
print(f"Error: {response.text[:200]}")
except Exception as e:
print(f"Exception: {str(e)}")
def test_graph_search(access_token, query_string):
"""Test a Microsoft Graph Search API query and show resulting paths"""
url = "https://graph.microsoft.com/v1.0/search/query"
headers = {
"Authorization": f"Bearer {access_token}",
"Content-Type": "application/json"
}
payload = {
"requests": [
{
"entityTypes": ["driveItem"],
"query": {
"queryString": query_string
},
"from": 0,
"size": 50
}
]
}
print(f"Testing: {query_string}")
print("-" * 50)
try:
response = requests.post(url, headers=headers, json=payload)
if response.status_code == 200:
data = response.json()
# Extract useful info
if "value" in data and len(data["value"]) > 0:
hits = data["value"][0].get("hitsContainers", [])
if hits:
total = hits[0].get("total", 0)
results = hits[0].get("hits", [])
print(f"✅ SUCCESS - Found {total} results")
# First, let's see what types of results we're getting
print(f"📊 Analyzing {len(results)} results...")
# Count different types of results with better detection
file_count = 0
folder_count = 0
other_count = 0
# Debug: Let's see what the actual resource structure looks like
if results:
print("🔍 DEBUG: First result structure:")
first_result = results[0]
print(f" Keys: {list(first_result.keys())}")
if 'resource' in first_result:
resource = first_result['resource']
print(f" Resource keys: {list(resource.keys())}")
if 'folder' in resource:
print(f" Folder info: {resource['folder']}")
if 'file' in resource:
print(f" File info: {resource['file']}")
print()
for result in results:
resource = result.get('resource', {})
# Better detection logic
is_folder = False
is_file = False
# Check for explicit folder/file indicators
if 'folder' in resource:
is_folder = True
elif 'file' in resource:
is_file = True
else:
# Try to detect by URL pattern or other indicators
web_url = resource.get('webUrl', '')
name = resource.get('name', '')
# Check if URL ends with a file extension (likely a file)
if '.' in name and any(name.lower().endswith(ext) for ext in ['.pdf', '.docx', '.xlsx', '.pptx', '.txt', '.cs', '.py', '.js', '.html', '.css']):
is_file = True
# Check if URL has no file extension and looks like a folder path
elif '.' not in name and ('/' in web_url or '\\' in web_url):
is_folder = True
if is_folder:
folder_count += 1
elif is_file:
file_count += 1
else:
other_count += 1
print(f" 📄 Files: {file_count}")
print(f" 📁 Folders: {folder_count}")
print(f" ❓ Other: {other_count}")
print()
# Show sample results regardless of type
print(f"📋 Sample results (showing first 5):")
for i, result in enumerate(results[:5], 1):
resource = result.get('resource', {})
web_url = resource.get('webUrl', 'No URL')
name = resource.get('name', 'No name')
# Determine type using same logic as counting
is_folder = False
is_file = False
if 'folder' in resource:
is_folder = True
elif 'file' in resource:
is_file = True
else:
# Try to detect by URL pattern or other indicators
web_url = resource.get('webUrl', '')
name = resource.get('name', '')
# Check if URL ends with a file extension (likely a file)
if '.' in name and any(name.lower().endswith(ext) for ext in ['.pdf', '.docx', '.xlsx', '.pptx', '.txt', '.cs', '.py', '.js', '.html', '.css']):
is_file = True
# Check if URL has no file extension and looks like a folder path
elif '.' not in name and ('/' in web_url or '\\' in web_url):
is_folder = True
if is_folder:
item_type = "📁 FOLDER"
elif is_file:
file_info = resource.get('file', {})
mime_type = file_info.get('mimeType', 'Unknown type') if file_info else 'Detected by extension'
item_type = f"📄 FILE ({mime_type})"
else:
item_type = "❓ UNKNOWN"
# Extract path from webUrl
if '/sites/SSSRESYNachfolge/' in web_url:
path_part = web_url.split('/sites/SSSRESYNachfolge/')[-1]
path_with_backslashes = path_part.replace('/', '\\')
display_path = f"\\{path_with_backslashes}"
else:
display_path = web_url
print(f" {i}. {item_type} - {name}")
print(f" Path: {display_path}")
print(f" URL: {web_url}")
print()
if len(results) > 5:
print(f" ... and {len(results) - 5} more results")
# Now filter and show only FOLDER results if any exist
folder_results = []
for result in results:
resource = result.get('resource', {})
# Use the same detection logic as counting
is_folder = False
if 'folder' in resource:
is_folder = True
else:
# Try to detect by URL pattern or other indicators
web_url = resource.get('webUrl', '')
name = resource.get('name', '')
# Check if URL has no file extension and looks like a folder path
if '.' not in name and ('/' in web_url or '\\' in web_url):
is_folder = True
if is_folder:
folder_results.append(result)
if folder_results:
print(f"\n📁 FOLDER DETAILS ({len(folder_results)} folders found):")
for i, result in enumerate(folder_results, 1):
web_url = result.get('resource', {}).get('webUrl', 'No URL')
name = result.get('resource', {}).get('name', 'No name')
if '/sites/SSSRESYNachfolge/' in web_url:
path_part = web_url.split('/sites/SSSRESYNachfolge/')[-1]
path_with_backslashes = path_part.replace('/', '\\')
folder_path = f"\\{path_with_backslashes}"
else:
folder_path = web_url
print(f" {i}. 📁 {name}")
print(f" Path: {folder_path}")
print(f" URL: {web_url}")
print()
else:
print(f"\n❌ No folders found in results - all {total} results are files or other types")
else:
print("❌ SUCCESS but no hits containers found")
else:
print("❌ SUCCESS but no value array in response")
else:
print(f"❌ ERROR - Status {response.status_code}")
error_text = response.text[:200] + "..." if len(response.text) > 200 else response.text
print(f"Error: {error_text}")
except Exception as e:
print(f"Exception: {str(e)}")
def main():
"""Main test function"""
# Use the access token from the database
access_token = "eyJ0eXAiOiJKV1QiLCJub25jZSI6IkxwTjBjTXo2SGlja2ZPLUpnekRwTFE1QktfQmVOWHBwRWZ2UzZBMDh2REUiLCJhbGciOiJSUzI1NiIsIng1dCI6IkpZaEFjVFBNWl9MWDZEQmxPV1E3SG4wTmVYRSIsImtpZCI6IkpZaEFjVFBNWl9MWDZEQmxPV1E3SG4wTmVYRSJ9.eyJhdWQiOiIwMDAwMDAwMy0wMDAwLTAwMDAtYzAwMC0wMDAwMDAwMDAwMDAiLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC82YTUxYWFlYi0yNDY3LTQxODYtOTUwNC0yYTA1YWVkYzU5MWYvIiwiaWF0IjoxNzU3MDEwNTc0LCJuYmYiOjE3NTcwMTA1NzQsImV4cCI6MTc1NzAxNTQ1MSwiYWNjdCI6MCwiYWNyIjoiMSIsImFjcnMiOlsicDEiXSwiYWlvIjoiQVpRQWEvOFpBQUFBcU0xNVFOMkhaQld5QXNsbStiT0QzbzRuU1RhUzg5bGdTV3ZUQVZvYVhqcUhlT1VaNFE1aEh0bE51WUdxelEvM0tDRnZlZktycU1HTUp2VmlVaWVibUhjbnBtL0FaRFA1Sk1YNnI4c1FCSVdLVTZPY29sUUNuOWpvcVZLb1VIOFl3WTJhM3picTlkeGdqVC94dU5NaCtKcXhMV1JMdEUrUjBZeGl0c3J0QXhpd0pRaGZmalIzK0xPSGtmVkxhOExaIiwiYW1yIjpbInB3ZCIsIm1mYSJdLCJhcHBfZGlzcGxheW5hbWUiOiJQb3dlck9uIEFwcCIsImFwcGlkIjoiYzdlNzExMmQtNjFkYy00ZjNhLThjZDMtMDhjYzRjZDc1MDRjIiwiYXBwaWRhY3IiOiIxIiwiZmFtaWx5X25hbWUiOiJNb3RzY2giLCJnaXZlbl9uYW1lIjoiUGF0cmljayIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjE3OC4xOTcuMjE4LjQ4IiwibmFtZSI6IlBhdHJpY2sgTW90c2NoIiwib2lkIjoiN2QwOGFhYjktYTE3MC00OTc1LTg4OTgtYmM3ZTBhOTU0ODhlIiwicGxhdGYiOiIzIiwicHVpZCI6IjEwMDM3RkZFOENERDZBODIiLCJyaCI6IjEuQVFzQTY2cFJhbWNraGtHVkJDb0ZydHhaSHdNQUFBQUFBQUFBd0FBQUFBQUFBQUNFQURBTEFBLiIsInNjcCI6IkZpbGVzLlJlYWRXcml0ZS5BbGwgTWFpbC5SZWFkV3JpdGUgTWFpbC5SZWFkV3JpdGUuU2hhcmVkIE1haWwuU2VuZCBvcGVuaWQgcHJvZmlsZSBTaXRlcy5SZWFkV3JpdGUuQWxsIFVzZXIuUmVhZCBlbWFpbCIsInNpZCI6IjAwNmY5Mjk5LTY3ZDUtYmU3Zi1kYWI4LWQwYTBlZTI1MTBkNiIsInNpZ25pbl9zdGF0ZSI6WyJrbXNpIl0sInN1YiI6IklnMGlwM3hhZGJMaXVLemJGZ3dWaE5JTV9Eekcwd3B4aUVGYjJKWXVjbjQiLCJ0ZW5hbnRfcmVnaW9uX3Njb3BlIjoiRVUiLCJ0aWQiOiI2YTUxYWFlYi0yNDY3LTQxODYtOTUwNC0yYTA1YWVkYzU5MWYiLCJ1bmlxdWVfbmFtZSI6InAubW90c2NoQHZhbHVlb24uY2giLCJ1cG4iOiJwLm1vdHNjaEB2YWx1ZW9uLmNoIiwidXRpIjoieTh5ZGhEcWRDMG1nVTBpLV94azFBUSIsInZlciI6IjEuMCIsIndpZHMiOlsiOWI4OTVkOTItMmNkMy00NGM3LTlkMDItYTZhYzJkNWVhNWMzIiwiY2YxYzM4ZTUtMzYyMS00MDA0LWE3Y2ItODc5NjI0ZGNlZDdjIiwiMTU4YzA0N2EtYzkwNy00NTU2LWI3ZWYtNDQ2NTUxYTZiNWY3IiwiODkyYzU4NDItYTlhNi00NjNhLTgwNDEtNzJhYTA4Y2EzY2Y2IiwiOWYwNjIwNGQtNzNjMS00ZDRjLTg4MGEtNmVkYjkwNjA2ZmQ4IiwiYjc5ZmJmNGQtM2VmOS00Njg5LTgxNDMtNzZiMTk0ZTg1NTA5Il0sInhtc19mdGQiOiIwcEZ4RVctQnl6Y3M5UW5HdXNDbU1Ka1V4MHNQWlEzOUkzWUwxRGZJdnpzQmMzZGxaR1Z1WXkxa2MyMXoiLCJ4bXNfaWRyZWwiOiIxIDI0IiwieG1zX3N0Ijp7InN1YiI6IlIydkQwRzFtbWFZUkM3SllXY0lTWlcyS0RQZ05CakJMRmw2ZUxBQl9QVU0ifSwieG1zX3RjZHQiOjE0MTgyMTQ1MDEsInhtc190ZGJyIjoiRVUifQ.JYEWH2YxBrgWSn-9WN3BixJ91q19RGd0U7HgiiLpmwKUicft8zrovO8wKVU5rkly6CBcEO_eGAvyqQHSjFLHXKGDrutrFVdLTLB0vUu3J1Lkw31CiJF_y6Y3r2VytOF8evcYwh_Ye-5eoAxIr5avR8j_T51RPkLG53QSJ-tA5utDgHGWa65T5-mmeZxI-ThYxfyLori1uS8TSchJBdwrWwv8pkklHn6lZrFfgiuviRjLrOOLVUL_fzIod_eOKjo31YHhUzfm-QD3vvQkqnWNcdQ4D0UaTxKW291fHFafQZ9SkH9m0BD9nn56QBqijUBhvA8qMZC_cObb3DpR0GR_xA"
print("=" * 60)
print("Microsoft Graph API Test Suite")
print("=" * 60)
# First test: Direct folder listing (should work better than search)
print("\nTEST 0: Direct Graph API folder listing")
test_graph_folders_direct(access_token)
# Test different query types to find both files and folders
test_queries = [
# Test 1: Test with Venus folder (empty folder created for testing)
"Venus",
# Test 2: Folder-specific searches for Venus
"kind:folder AND Venus",
# Test 3: Original specific query (found 8 results - all files)
"Druckersteuerung AND Eskalation AND Logobject",
# Test 4: Broader folder-focused queries
"Druckersteuerung",
"Eskalation",
"Logobject",
# Test 5: Folder-specific searches
"kind:folder AND Druckersteuerung",
"kind:folder AND Eskalation",
# Test 6: General folder search to see what folders exist
"kind:folder",
]
for i, query in enumerate(test_queries, 1):
print(f"\nTEST {i}: {query}")
test_graph_search(access_token, query)
print()
if __name__ == "__main__":
main()