gateway/modules/services/serviceSharepoint/mainServiceSharepoint.py
2025-09-24 23:18:10 +02:00

492 lines
22 KiB
Python

"""Connector for SharePoint operations using Microsoft Graph API."""
import logging
import json
import aiohttp
import asyncio
from typing import Dict, Any, List, Optional
from datetime import datetime, UTC
logger = logging.getLogger(__name__)
class SharepointService:
"""SharePoint connector using Microsoft Graph API for reliable authentication."""
def __init__(self, serviceCenter=None):
"""Initialize SharePoint service without access token.
Args:
serviceCenter: Service center instance for accessing other services
Use setAccessToken() method to configure the access token before making API calls.
"""
self.serviceCenter = serviceCenter
self.access_token = None
self.base_url = "https://graph.microsoft.com/v1.0"
def setAccessToken(self, userConnection, interfaceApp) -> bool:
"""Set access token from UserConnection.
Args:
userConnection: UserConnection object containing token information
interfaceApp: InterfaceApp instance used by TokenManager to resolve the token
Returns:
bool: True if token was set successfully, False otherwise
"""
try:
if not userConnection:
logger.error("UserConnection is required to set access token")
return False
# Get a fresh token for this specific connection
from modules.security.tokenManager import TokenManager
token = TokenManager().getFreshToken(interfaceApp, userConnection.id)
if not token:
logger.error(f"No token found for connection {userConnection.id}")
return False
self.access_token = token.tokenAccess
logger.info(f"Access token set for connection {userConnection.id}")
return True
except Exception as e:
logger.error(f"Error setting access token: {str(e)}")
return False
async def _make_graph_api_call(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
"""Make a Microsoft Graph API call with proper error handling."""
try:
if self.access_token is None:
logger.error("Access token is not set. Please call setAccessToken() before using the SharePoint service.")
return {"error": "Access token is not set. Please call setAccessToken() before using the SharePoint service."}
headers = {
"Authorization": f"Bearer {self.access_token}",
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
}
# Remove leading slash from endpoint to avoid double slash
clean_endpoint = endpoint.lstrip('/')
url = f"{self.base_url}/{clean_endpoint}"
logger.debug(f"Making Graph API call: {method} {url}")
timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout) as session:
if method == "GET":
async with session.get(url, headers=headers) as response:
if response.status == 200:
return await response.json()
else:
error_text = await response.text()
logger.error(f"Graph API call failed: {response.status} - {error_text}")
return {"error": f"API call failed: {response.status} - {error_text}"}
elif method == "PUT":
async with session.put(url, headers=headers, data=data) as response:
if response.status in [200, 201]:
return await response.json()
else:
error_text = await response.text()
logger.error(f"Graph API call failed: {response.status} - {error_text}")
return {"error": f"API call failed: {response.status} - {error_text}"}
elif method == "POST":
async with session.post(url, headers=headers, data=data) as response:
if response.status in [200, 201]:
return await response.json()
else:
error_text = await response.text()
logger.error(f"Graph API call failed: {response.status} - {error_text}")
return {"error": f"API call failed: {response.status} - {error_text}"}
except asyncio.TimeoutError:
logger.error(f"Graph API call timed out after 30 seconds: {endpoint}")
return {"error": f"API call timed out after 30 seconds: {endpoint}"}
except Exception as e:
logger.error(f"Error making Graph API call: {str(e)}")
return {"error": f"Error making Graph API call: {str(e)}"}
async def discover_sites(self) -> List[Dict[str, Any]]:
"""Discover all SharePoint sites accessible to the user."""
try:
result = await self._make_graph_api_call("sites?search=*")
if "error" in result:
logger.error(f"Error discovering SharePoint sites: {result['error']}")
return []
sites = result.get("value", [])
logger.info(f"Discovered {len(sites)} SharePoint sites")
processed_sites = []
for site in sites:
site_info = {
"id": site.get("id"),
"displayName": site.get("displayName"),
"name": site.get("name"),
"webUrl": site.get("webUrl"),
"description": site.get("description"),
"createdDateTime": site.get("createdDateTime"),
"lastModifiedDateTime": site.get("lastModifiedDateTime")
}
processed_sites.append(site_info)
logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}")
return processed_sites
except Exception as e:
logger.error(f"Error discovering SharePoint sites: {str(e)}")
return []
async def find_site_by_name(self, site_name: str) -> Optional[Dict[str, Any]]:
"""Find a specific SharePoint site by name using direct Graph API call."""
try:
# Try to get the site directly by name using Graph API
endpoint = f"sites/{site_name}"
result = await self._make_graph_api_call(endpoint)
if result and "error" not in result:
site_info = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
"webUrl": result.get("webUrl"),
"description": result.get("description"),
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
logger.info(f"Found site directly: {site_info['displayName']} - {site_info['webUrl']}")
return site_info
except Exception as e:
logger.debug(f"Direct site lookup failed for '{site_name}': {str(e)}")
# Fallback to discovery if direct lookup fails
logger.info(f"Direct lookup failed, trying discovery for site: {site_name}")
sites = await self.discover_sites()
if not sites:
logger.warning("No sites discovered")
return None
logger.info(f"Discovered {len(sites)} SharePoint sites:")
for site in sites:
logger.info(f" - {site.get('displayName', 'Unknown')} (ID: {site.get('id', 'Unknown')})")
# Try exact match first
for site in sites:
if site.get("displayName", "").strip().lower() == site_name.strip().lower():
logger.info(f"Found exact match: {site.get('displayName')}")
return site
# Try partial match
for site in sites:
if site_name.lower() in site.get("displayName", "").lower():
logger.info(f"Found partial match: {site.get('displayName')}")
return site
logger.warning(f"No site found matching: {site_name}")
return None
async def find_site_by_web_url(self, web_url: str) -> Optional[Dict[str, Any]]:
"""Find a SharePoint site using its web URL (useful for guest sites)."""
try:
# Use the web URL format: sites/{hostname}:/sites/{site-path}
# Extract hostname and site path from the web URL
if not web_url.startswith("https://"):
web_url = f"https://{web_url}"
# Parse the URL to extract hostname and site path
from urllib.parse import urlparse
parsed = urlparse(web_url)
hostname = parsed.hostname
path_parts = parsed.path.strip('/').split('/')
if len(path_parts) >= 2 and path_parts[0] == 'sites':
site_path = '/'.join(path_parts[1:]) # Everything after 'sites/'
else:
logger.error(f"Invalid SharePoint URL format: {web_url}")
return None
endpoint = f"sites/{hostname}:/sites/{site_path}"
logger.debug(f"Trying web URL format: {endpoint}")
result = await self._make_graph_api_call(endpoint)
if result and "error" not in result:
site_info = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
"webUrl": result.get("webUrl"),
"description": result.get("description"),
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
logger.info(f"Found site by web URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})")
return site_info
else:
logger.warning(f"Site not found using web URL: {web_url}")
return None
except Exception as e:
logger.error(f"Error finding site by web URL: {str(e)}")
return None
async def find_site_by_url(self, hostname: str, site_path: str) -> Optional[Dict[str, Any]]:
"""Find a SharePoint site using the site URL format."""
try:
# For guest sites, try different URL formats
url_formats = [
f"sites/{hostname}:/sites/{site_path}", # Standard format
f"sites/{hostname}:/sites/{site_path}/", # With trailing slash
f"sites/{hostname}:/sites/{site_path.lower()}", # Lowercase
f"sites/{hostname}:/sites/{site_path.lower()}/", # Lowercase with slash
]
for endpoint in url_formats:
logger.debug(f"Trying URL format: {endpoint}")
result = await self._make_graph_api_call(endpoint)
if result and "error" not in result:
site_info = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
"webUrl": result.get("webUrl"),
"description": result.get("description"),
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
logger.info(f"Found site by URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})")
return site_info
else:
logger.debug(f"URL format failed: {endpoint} - {result.get('error', 'Unknown error')}")
logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{site_path}")
return None
except Exception as e:
logger.error(f"Error finding site by URL: {str(e)}")
return None
async def get_folder_by_path(self, site_id: str, folder_path: str) -> Optional[Dict[str, Any]]:
"""Get folder information by path within a site."""
try:
# Clean the path
clean_path = folder_path.lstrip('/')
endpoint = f"sites/{site_id}/drive/root:/{clean_path}"
result = await self._make_graph_api_call(endpoint)
if "error" in result:
logger.warning(f"Folder not found at path {folder_path}: {result['error']}")
return None
return result
except Exception as e:
logger.error(f"Error getting folder by path: {str(e)}")
return None
async def upload_file(self, site_id: str, folder_path: str, file_name: str, content: bytes) -> Dict[str, Any]:
"""Upload a file to SharePoint."""
try:
# Clean the path
clean_path = folder_path.lstrip('/')
upload_path = f"{clean_path.rstrip('/')}/{file_name}"
endpoint = f"sites/{site_id}/drive/root:/{upload_path}:/content"
logger.info(f"Uploading file to: {endpoint}")
result = await self._make_graph_api_call(endpoint, method="PUT", data=content)
if "error" in result:
logger.error(f"Upload failed: {result['error']}")
return result
logger.info(f"File uploaded successfully: {file_name}")
return result
except Exception as e:
logger.error(f"Error uploading file: {str(e)}")
return {"error": f"Error uploading file: {str(e)}"}
async def download_file(self, site_id: str, file_id: str) -> Optional[bytes]:
"""Download a file from SharePoint."""
try:
if self.access_token is None:
logger.error("Access token is not set. Please call setAccessToken() before using the SharePoint service.")
return None
endpoint = f"sites/{site_id}/drive/items/{file_id}/content"
headers = {"Authorization": f"Bearer {self.access_token}"}
timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(f"{self.base_url}/{endpoint}", headers=headers) as response:
if response.status == 200:
return await response.read()
else:
logger.error(f"Download failed: {response.status}")
return None
except Exception as e:
logger.error(f"Error downloading file: {str(e)}")
return None
async def list_folder_contents(self, site_id: str, folder_path: str = "") -> List[Dict[str, Any]]:
"""List contents of a folder."""
try:
if not folder_path or folder_path == "/":
endpoint = f"sites/{site_id}/drive/root/children"
else:
clean_path = folder_path.lstrip('/')
endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/children"
result = await self._make_graph_api_call(endpoint)
if "error" in result:
logger.warning(f"Failed to list folder contents: {result['error']}")
return None
items = result.get("value", [])
processed_items = []
for item in items:
# Determine if it's a folder or file
is_folder = 'folder' in item
item_info = {
"id": item.get("id"),
"name": item.get("name"),
"type": "folder" if is_folder else "file",
"size": item.get("size", 0),
"createdDateTime": item.get("createdDateTime"),
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
"webUrl": item.get("webUrl")
}
if "file" in item:
item_info["mimeType"] = item["file"].get("mimeType")
item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
if "folder" in item:
item_info["childCount"] = item["folder"].get("childCount", 0)
processed_items.append(item_info)
return processed_items
except Exception as e:
logger.error(f"Error listing folder contents: {str(e)}")
return []
async def search_files(self, site_id: str, query: str) -> List[Dict[str, Any]]:
"""Search for files in a site."""
try:
search_query = query.replace("'", "''") # Escape single quotes for OData
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
result = await self._make_graph_api_call(endpoint)
if "error" in result:
logger.warning(f"Search failed: {result['error']}")
return []
items = result.get("value", [])
processed_items = []
for item in items:
is_folder = 'folder' in item
item_info = {
"id": item.get("id"),
"name": item.get("name"),
"type": "folder" if is_folder else "file",
"size": item.get("size", 0),
"createdDateTime": item.get("createdDateTime"),
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
"webUrl": item.get("webUrl"),
"parentPath": item.get("parentReference", {}).get("path", "")
}
if "file" in item:
item_info["mimeType"] = item["file"].get("mimeType")
item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
processed_items.append(item_info)
return processed_items
except Exception as e:
logger.error(f"Error searching files: {str(e)}")
return []
async def copy_file_async(self, site_id: str, source_folder: str, source_file: str, dest_folder: str, dest_file: str) -> None:
"""Copy a file from source to destination folder (like original synchronizer)."""
try:
# First, download the source file
source_path = f"{source_folder}/{source_file}"
file_content = await self.download_file_by_path(site_id=site_id, file_path=source_path)
if not file_content:
raise Exception(f"Failed to download source file: {source_path}")
# Upload to destination
await self.upload_file(
site_id=site_id,
folder_path=dest_folder,
file_name=dest_file,
content=file_content
)
logger.info(f"File copied: {source_file} -> {dest_file}")
except Exception as e:
# Provide more specific error information
error_msg = str(e)
if "itemNotFound" in error_msg or "404" in error_msg:
raise Exception(f"Source file not found (404): {source_path} - {error_msg}")
else:
raise Exception(f"Error copying file: {error_msg}")
async def download_file_by_path(self, site_id: str, file_path: str) -> Optional[bytes]:
"""Download a file by its path within a site."""
try:
if self.access_token is None:
logger.error("Access token is not set. Please call setAccessToken() before using the SharePoint service.")
return None
# Clean the path
clean_path = file_path.strip('/')
endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/content"
# Use direct HTTP call for file downloads (binary content)
headers = {
"Authorization": f"Bearer {self.access_token}",
}
# Remove leading slash from endpoint to avoid double slash
clean_endpoint = endpoint.lstrip('/')
url = f"{self.base_url}/{clean_endpoint}"
logger.debug(f"Downloading file: GET {url}")
timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(url, headers=headers) as response:
if response.status == 200:
return await response.read()
else:
error_text = await response.text()
logger.error(f"File download failed: {response.status} - {error_text}")
return None
except Exception as e:
logger.error(f"Error downloading file by path: {str(e)}")
return None