gateway/modules/services/serviceSharepoint/mainSharepoint.py
2025-09-23 22:47:54 +02:00

447 lines
20 KiB
Python

"""Connector for SharePoint operations using Microsoft Graph API."""
import logging
import json
import aiohttp
import asyncio
from typing import Dict, Any, List, Optional
from datetime import datetime, UTC
logger = logging.getLogger(__name__)
class SharepointService:
"""SharePoint connector using Microsoft Graph API for reliable authentication."""
def __init__(self, access_token: str):
"""Initialize with access token.
Args:
access_token: Microsoft Graph access token
"""
self.access_token = access_token
self.base_url = "https://graph.microsoft.com/v1.0"
async def _make_graph_api_call(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
"""Make a Microsoft Graph API call with proper error handling."""
try:
headers = {
"Authorization": f"Bearer {self.access_token}",
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
}
# Remove leading slash from endpoint to avoid double slash
clean_endpoint = endpoint.lstrip('/')
url = f"{self.base_url}/{clean_endpoint}"
logger.debug(f"Making Graph API call: {method} {url}")
timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout) as session:
if method == "GET":
async with session.get(url, headers=headers) as response:
if response.status == 200:
return await response.json()
else:
error_text = await response.text()
logger.error(f"Graph API call failed: {response.status} - {error_text}")
return {"error": f"API call failed: {response.status} - {error_text}"}
elif method == "PUT":
async with session.put(url, headers=headers, data=data) as response:
if response.status in [200, 201]:
return await response.json()
else:
error_text = await response.text()
logger.error(f"Graph API call failed: {response.status} - {error_text}")
return {"error": f"API call failed: {response.status} - {error_text}"}
elif method == "POST":
async with session.post(url, headers=headers, data=data) as response:
if response.status in [200, 201]:
return await response.json()
else:
error_text = await response.text()
logger.error(f"Graph API call failed: {response.status} - {error_text}")
return {"error": f"API call failed: {response.status} - {error_text}"}
except asyncio.TimeoutError:
logger.error(f"Graph API call timed out after 30 seconds: {endpoint}")
return {"error": f"API call timed out after 30 seconds: {endpoint}"}
except Exception as e:
logger.error(f"Error making Graph API call: {str(e)}")
return {"error": f"Error making Graph API call: {str(e)}"}
async def discover_sites(self) -> List[Dict[str, Any]]:
"""Discover all SharePoint sites accessible to the user."""
try:
result = await self._make_graph_api_call("sites?search=*")
if "error" in result:
logger.error(f"Error discovering SharePoint sites: {result['error']}")
return []
sites = result.get("value", [])
logger.info(f"Discovered {len(sites)} SharePoint sites")
processed_sites = []
for site in sites:
site_info = {
"id": site.get("id"),
"displayName": site.get("displayName"),
"name": site.get("name"),
"webUrl": site.get("webUrl"),
"description": site.get("description"),
"createdDateTime": site.get("createdDateTime"),
"lastModifiedDateTime": site.get("lastModifiedDateTime")
}
processed_sites.append(site_info)
logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}")
return processed_sites
except Exception as e:
logger.error(f"Error discovering SharePoint sites: {str(e)}")
return []
async def find_site_by_name(self, site_name: str) -> Optional[Dict[str, Any]]:
"""Find a specific SharePoint site by name using direct Graph API call."""
try:
# Try to get the site directly by name using Graph API
endpoint = f"sites/{site_name}"
result = await self._make_graph_api_call(endpoint)
if result and "error" not in result:
site_info = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
"webUrl": result.get("webUrl"),
"description": result.get("description"),
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
logger.info(f"Found site directly: {site_info['displayName']} - {site_info['webUrl']}")
return site_info
except Exception as e:
logger.debug(f"Direct site lookup failed for '{site_name}': {str(e)}")
# Fallback to discovery if direct lookup fails
logger.info(f"Direct lookup failed, trying discovery for site: {site_name}")
sites = await self.discover_sites()
if not sites:
logger.warning("No sites discovered")
return None
logger.info(f"Discovered {len(sites)} SharePoint sites:")
for site in sites:
logger.info(f" - {site.get('displayName', 'Unknown')} (ID: {site.get('id', 'Unknown')})")
# Try exact match first
for site in sites:
if site.get("displayName", "").strip().lower() == site_name.strip().lower():
logger.info(f"Found exact match: {site.get('displayName')}")
return site
# Try partial match
for site in sites:
if site_name.lower() in site.get("displayName", "").lower():
logger.info(f"Found partial match: {site.get('displayName')}")
return site
logger.warning(f"No site found matching: {site_name}")
return None
async def find_site_by_web_url(self, web_url: str) -> Optional[Dict[str, Any]]:
"""Find a SharePoint site using its web URL (useful for guest sites)."""
try:
# Use the web URL format: sites/{hostname}:/sites/{site-path}
# Extract hostname and site path from the web URL
if not web_url.startswith("https://"):
web_url = f"https://{web_url}"
# Parse the URL to extract hostname and site path
from urllib.parse import urlparse
parsed = urlparse(web_url)
hostname = parsed.hostname
path_parts = parsed.path.strip('/').split('/')
if len(path_parts) >= 2 and path_parts[0] == 'sites':
site_path = '/'.join(path_parts[1:]) # Everything after 'sites/'
else:
logger.error(f"Invalid SharePoint URL format: {web_url}")
return None
endpoint = f"sites/{hostname}:/sites/{site_path}"
logger.debug(f"Trying web URL format: {endpoint}")
result = await self._make_graph_api_call(endpoint)
if result and "error" not in result:
site_info = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
"webUrl": result.get("webUrl"),
"description": result.get("description"),
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
logger.info(f"Found site by web URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})")
return site_info
else:
logger.warning(f"Site not found using web URL: {web_url}")
return None
except Exception as e:
logger.error(f"Error finding site by web URL: {str(e)}")
return None
async def find_site_by_url(self, hostname: str, site_path: str) -> Optional[Dict[str, Any]]:
"""Find a SharePoint site using the site URL format."""
try:
# For guest sites, try different URL formats
url_formats = [
f"sites/{hostname}:/sites/{site_path}", # Standard format
f"sites/{hostname}:/sites/{site_path}/", # With trailing slash
f"sites/{hostname}:/sites/{site_path.lower()}", # Lowercase
f"sites/{hostname}:/sites/{site_path.lower()}/", # Lowercase with slash
]
for endpoint in url_formats:
logger.debug(f"Trying URL format: {endpoint}")
result = await self._make_graph_api_call(endpoint)
if result and "error" not in result:
site_info = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
"webUrl": result.get("webUrl"),
"description": result.get("description"),
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
logger.info(f"Found site by URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})")
return site_info
else:
logger.debug(f"URL format failed: {endpoint} - {result.get('error', 'Unknown error')}")
logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{site_path}")
return None
except Exception as e:
logger.error(f"Error finding site by URL: {str(e)}")
return None
async def get_folder_by_path(self, site_id: str, folder_path: str) -> Optional[Dict[str, Any]]:
"""Get folder information by path within a site."""
try:
# Clean the path
clean_path = folder_path.lstrip('/')
endpoint = f"sites/{site_id}/drive/root:/{clean_path}"
result = await self._make_graph_api_call(endpoint)
if "error" in result:
logger.warning(f"Folder not found at path {folder_path}: {result['error']}")
return None
return result
except Exception as e:
logger.error(f"Error getting folder by path: {str(e)}")
return None
async def upload_file(self, site_id: str, folder_path: str, file_name: str, content: bytes) -> Dict[str, Any]:
"""Upload a file to SharePoint."""
try:
# Clean the path
clean_path = folder_path.lstrip('/')
upload_path = f"{clean_path.rstrip('/')}/{file_name}"
endpoint = f"sites/{site_id}/drive/root:/{upload_path}:/content"
logger.info(f"Uploading file to: {endpoint}")
result = await self._make_graph_api_call(endpoint, method="PUT", data=content)
if "error" in result:
logger.error(f"Upload failed: {result['error']}")
return result
logger.info(f"File uploaded successfully: {file_name}")
return result
except Exception as e:
logger.error(f"Error uploading file: {str(e)}")
return {"error": f"Error uploading file: {str(e)}"}
async def download_file(self, site_id: str, file_id: str) -> Optional[bytes]:
"""Download a file from SharePoint."""
try:
endpoint = f"sites/{site_id}/drive/items/{file_id}/content"
headers = {"Authorization": f"Bearer {self.access_token}"}
timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(f"{self.base_url}/{endpoint}", headers=headers) as response:
if response.status == 200:
return await response.read()
else:
logger.error(f"Download failed: {response.status}")
return None
except Exception as e:
logger.error(f"Error downloading file: {str(e)}")
return None
async def list_folder_contents(self, site_id: str, folder_path: str = "") -> List[Dict[str, Any]]:
"""List contents of a folder."""
try:
if not folder_path or folder_path == "/":
endpoint = f"sites/{site_id}/drive/root/children"
else:
clean_path = folder_path.lstrip('/')
endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/children"
result = await self._make_graph_api_call(endpoint)
if "error" in result:
logger.warning(f"Failed to list folder contents: {result['error']}")
return None
items = result.get("value", [])
processed_items = []
for item in items:
# Determine if it's a folder or file
is_folder = 'folder' in item
item_info = {
"id": item.get("id"),
"name": item.get("name"),
"type": "folder" if is_folder else "file",
"size": item.get("size", 0),
"createdDateTime": item.get("createdDateTime"),
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
"webUrl": item.get("webUrl")
}
if "file" in item:
item_info["mimeType"] = item["file"].get("mimeType")
item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
if "folder" in item:
item_info["childCount"] = item["folder"].get("childCount", 0)
processed_items.append(item_info)
return processed_items
except Exception as e:
logger.error(f"Error listing folder contents: {str(e)}")
return []
async def search_files(self, site_id: str, query: str) -> List[Dict[str, Any]]:
"""Search for files in a site."""
try:
search_query = query.replace("'", "''") # Escape single quotes for OData
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
result = await self._make_graph_api_call(endpoint)
if "error" in result:
logger.warning(f"Search failed: {result['error']}")
return []
items = result.get("value", [])
processed_items = []
for item in items:
is_folder = 'folder' in item
item_info = {
"id": item.get("id"),
"name": item.get("name"),
"type": "folder" if is_folder else "file",
"size": item.get("size", 0),
"createdDateTime": item.get("createdDateTime"),
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
"webUrl": item.get("webUrl"),
"parentPath": item.get("parentReference", {}).get("path", "")
}
if "file" in item:
item_info["mimeType"] = item["file"].get("mimeType")
item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
processed_items.append(item_info)
return processed_items
except Exception as e:
logger.error(f"Error searching files: {str(e)}")
return []
async def copy_file_async(self, site_id: str, source_folder: str, source_file: str, dest_folder: str, dest_file: str) -> None:
"""Copy a file from source to destination folder (like original synchronizer)."""
try:
# First, download the source file
source_path = f"{source_folder}/{source_file}"
file_content = await self.download_file_by_path(site_id=site_id, file_path=source_path)
if not file_content:
raise Exception(f"Failed to download source file: {source_path}")
# Upload to destination
await self.upload_file(
site_id=site_id,
folder_path=dest_folder,
file_name=dest_file,
content=file_content
)
logger.info(f"File copied: {source_file} -> {dest_file}")
except Exception as e:
# Provide more specific error information
error_msg = str(e)
if "itemNotFound" in error_msg or "404" in error_msg:
raise Exception(f"Source file not found (404): {source_path} - {error_msg}")
else:
raise Exception(f"Error copying file: {error_msg}")
async def download_file_by_path(self, site_id: str, file_path: str) -> Optional[bytes]:
"""Download a file by its path within a site."""
try:
# Clean the path
clean_path = file_path.strip('/')
endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/content"
# Use direct HTTP call for file downloads (binary content)
headers = {
"Authorization": f"Bearer {self.access_token}",
}
# Remove leading slash from endpoint to avoid double slash
clean_endpoint = endpoint.lstrip('/')
url = f"{self.base_url}/{clean_endpoint}"
logger.debug(f"Downloading file: GET {url}")
timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(url, headers=headers) as response:
if response.status == 200:
return await response.read()
else:
error_text = await response.text()
logger.error(f"File download failed: {response.status} - {error_text}")
return None
except Exception as e:
logger.error(f"Error downloading file by path: {str(e)}")
return None