gateway/modules/services/serviceSharepoint/mainServiceSharepoint.py
2025-10-24 23:57:17 +02:00

489 lines
22 KiB
Python

"""Connector for SharePoint operations using Microsoft Graph API."""
import logging
import aiohttp
import asyncio
from typing import Dict, Any, List, Optional
logger = logging.getLogger(__name__)
class SharepointService:
"""SharePoint connector using Microsoft Graph API for reliable authentication."""
def __init__(self, serviceCenter=None):
"""Initialize SharePoint service without access token.
Args:
serviceCenter: Service center instance for accessing other services
Use setAccessTokenFromConnection() method to configure the access token before making API calls.
"""
self.services = serviceCenter
self.access_token = None
self.base_url = "https://graph.microsoft.com/v1.0"
def setAccessTokenFromConnection(self, userConnection) -> bool:
"""Set access token from UserConnection.
Args:
userConnection: UserConnection object containing token information
Returns:
bool: True if token was set successfully, False otherwise
"""
try:
if not userConnection:
logger.error("UserConnection is required to set access token")
return False
# Get a fresh token for this specific connection
from modules.security.tokenManager import TokenManager
token = TokenManager().getFreshToken(userConnection.id)
if not token:
logger.error(f"No token found for connection {userConnection.id}")
return False
self.access_token = token.tokenAccess
logger.info(f"Access token set for connection {userConnection.id}")
return True
except Exception as e:
logger.error(f"Error setting access token: {str(e)}")
return False
async def _make_graph_api_call(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
"""Make a Microsoft Graph API call with proper error handling."""
try:
if self.access_token is None:
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
return {"error": "Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service."}
headers = {
"Authorization": f"Bearer {self.access_token}",
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
}
# Remove leading slash from endpoint to avoid double slash
clean_endpoint = endpoint.lstrip('/')
url = f"{self.base_url}/{clean_endpoint}"
logger.debug(f"Making Graph API call: {method} {url}")
timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout) as session:
if method == "GET":
async with session.get(url, headers=headers) as response:
if response.status == 200:
return await response.json()
else:
error_text = await response.text()
logger.error(f"Graph API call failed: {response.status} - {error_text}")
return {"error": f"API call failed: {response.status} - {error_text}"}
elif method == "PUT":
async with session.put(url, headers=headers, data=data) as response:
if response.status in [200, 201]:
return await response.json()
else:
error_text = await response.text()
logger.error(f"Graph API call failed: {response.status} - {error_text}")
return {"error": f"API call failed: {response.status} - {error_text}"}
elif method == "POST":
async with session.post(url, headers=headers, data=data) as response:
if response.status in [200, 201]:
return await response.json()
else:
error_text = await response.text()
logger.error(f"Graph API call failed: {response.status} - {error_text}")
return {"error": f"API call failed: {response.status} - {error_text}"}
except asyncio.TimeoutError:
logger.error(f"Graph API call timed out after 30 seconds: {endpoint}")
return {"error": f"API call timed out after 30 seconds: {endpoint}"}
except Exception as e:
logger.error(f"Error making Graph API call: {str(e)}")
return {"error": f"Error making Graph API call: {str(e)}"}
async def discover_sites(self) -> List[Dict[str, Any]]:
"""Discover all SharePoint sites accessible to the user."""
try:
result = await self._make_graph_api_call("sites?search=*")
if "error" in result:
logger.error(f"Error discovering SharePoint sites: {result['error']}")
return []
sites = result.get("value", [])
logger.info(f"Discovered {len(sites)} SharePoint sites")
processed_sites = []
for site in sites:
site_info = {
"id": site.get("id"),
"displayName": site.get("displayName"),
"name": site.get("name"),
"webUrl": site.get("webUrl"),
"description": site.get("description"),
"createdDateTime": site.get("createdDateTime"),
"lastModifiedDateTime": site.get("lastModifiedDateTime")
}
processed_sites.append(site_info)
logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}")
return processed_sites
except Exception as e:
logger.error(f"Error discovering SharePoint sites: {str(e)}")
return []
async def find_site_by_name(self, site_name: str) -> Optional[Dict[str, Any]]:
"""Find a specific SharePoint site by name using direct Graph API call."""
try:
# Try to get the site directly by name using Graph API
endpoint = f"sites/{site_name}"
result = await self._make_graph_api_call(endpoint)
if result and "error" not in result:
site_info = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
"webUrl": result.get("webUrl"),
"description": result.get("description"),
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
logger.info(f"Found site directly: {site_info['displayName']} - {site_info['webUrl']}")
return site_info
except Exception as e:
logger.debug(f"Direct site lookup failed for '{site_name}': {str(e)}")
# Fallback to discovery if direct lookup fails
logger.info(f"Direct lookup failed, trying discovery for site: {site_name}")
sites = await self.discover_sites()
if not sites:
logger.warning("No sites discovered")
return None
logger.info(f"Discovered {len(sites)} SharePoint sites:")
for site in sites:
logger.info(f" - {site.get('displayName', 'Unknown')} (ID: {site.get('id', 'Unknown')})")
# Try exact match first
for site in sites:
if site.get("displayName", "").strip().lower() == site_name.strip().lower():
logger.info(f"Found exact match: {site.get('displayName')}")
return site
# Try partial match
for site in sites:
if site_name.lower() in site.get("displayName", "").lower():
logger.info(f"Found partial match: {site.get('displayName')}")
return site
logger.warning(f"No site found matching: {site_name}")
return None
async def find_site_by_web_url(self, web_url: str) -> Optional[Dict[str, Any]]:
"""Find a SharePoint site using its web URL (useful for guest sites)."""
try:
# Use the web URL format: sites/{hostname}:/sites/{site-path}
# Extract hostname and site path from the web URL
if not web_url.startswith("https://"):
web_url = f"https://{web_url}"
# Parse the URL to extract hostname and site path
from urllib.parse import urlparse
parsed = urlparse(web_url)
hostname = parsed.hostname
path_parts = parsed.path.strip('/').split('/')
if len(path_parts) >= 2 and path_parts[0] == 'sites':
site_path = '/'.join(path_parts[1:]) # Everything after 'sites/'
else:
logger.error(f"Invalid SharePoint URL format: {web_url}")
return None
endpoint = f"sites/{hostname}:/sites/{site_path}"
logger.debug(f"Trying web URL format: {endpoint}")
result = await self._make_graph_api_call(endpoint)
if result and "error" not in result:
site_info = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
"webUrl": result.get("webUrl"),
"description": result.get("description"),
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
logger.info(f"Found site by web URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})")
return site_info
else:
logger.warning(f"Site not found using web URL: {web_url}")
return None
except Exception as e:
logger.error(f"Error finding site by web URL: {str(e)}")
return None
async def find_site_by_url(self, hostname: str, site_path: str) -> Optional[Dict[str, Any]]:
"""Find a SharePoint site using the site URL format."""
try:
# For guest sites, try different URL formats
url_formats = [
f"sites/{hostname}:/sites/{site_path}", # Standard format
f"sites/{hostname}:/sites/{site_path}/", # With trailing slash
f"sites/{hostname}:/sites/{site_path.lower()}", # Lowercase
f"sites/{hostname}:/sites/{site_path.lower()}/", # Lowercase with slash
]
for endpoint in url_formats:
logger.debug(f"Trying URL format: {endpoint}")
result = await self._make_graph_api_call(endpoint)
if result and "error" not in result:
site_info = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
"webUrl": result.get("webUrl"),
"description": result.get("description"),
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
logger.info(f"Found site by URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})")
return site_info
else:
logger.debug(f"URL format failed: {endpoint} - {result.get('error', 'Unknown error')}")
logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{site_path}")
return None
except Exception as e:
logger.error(f"Error finding site by URL: {str(e)}")
return None
async def get_folder_by_path(self, site_id: str, folder_path: str) -> Optional[Dict[str, Any]]:
"""Get folder information by path within a site."""
try:
# Clean the path
clean_path = folder_path.lstrip('/')
endpoint = f"sites/{site_id}/drive/root:/{clean_path}"
result = await self._make_graph_api_call(endpoint)
if "error" in result:
logger.warning(f"Folder not found at path {folder_path}: {result['error']}")
return None
return result
except Exception as e:
logger.error(f"Error getting folder by path: {str(e)}")
return None
async def upload_file(self, site_id: str, folder_path: str, file_name: str, content: bytes) -> Dict[str, Any]:
"""Upload a file to SharePoint."""
try:
# Clean the path
clean_path = folder_path.lstrip('/')
upload_path = f"{clean_path.rstrip('/')}/{file_name}"
endpoint = f"sites/{site_id}/drive/root:/{upload_path}:/content"
logger.info(f"Uploading file to: {endpoint}")
result = await self._make_graph_api_call(endpoint, method="PUT", data=content)
if "error" in result:
logger.error(f"Upload failed: {result['error']}")
return result
logger.info(f"File uploaded successfully: {file_name}")
return result
except Exception as e:
logger.error(f"Error uploading file: {str(e)}")
return {"error": f"Error uploading file: {str(e)}"}
async def download_file(self, site_id: str, file_id: str) -> Optional[bytes]:
"""Download a file from SharePoint."""
try:
if self.access_token is None:
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
return None
endpoint = f"sites/{site_id}/drive/items/{file_id}/content"
headers = {"Authorization": f"Bearer {self.access_token}"}
timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(f"{self.base_url}/{endpoint}", headers=headers) as response:
if response.status == 200:
return await response.read()
else:
logger.error(f"Download failed: {response.status}")
return None
except Exception as e:
logger.error(f"Error downloading file: {str(e)}")
return None
async def list_folder_contents(self, site_id: str, folder_path: str = "") -> List[Dict[str, Any]]:
"""List contents of a folder."""
try:
if not folder_path or folder_path == "/":
endpoint = f"sites/{site_id}/drive/root/children"
else:
clean_path = folder_path.lstrip('/')
endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/children"
result = await self._make_graph_api_call(endpoint)
if "error" in result:
logger.warning(f"Failed to list folder contents: {result['error']}")
return None
items = result.get("value", [])
processed_items = []
for item in items:
# Determine if it's a folder or file
is_folder = 'folder' in item
item_info = {
"id": item.get("id"),
"name": item.get("name"),
"type": "folder" if is_folder else "file",
"size": item.get("size", 0),
"createdDateTime": item.get("createdDateTime"),
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
"webUrl": item.get("webUrl")
}
if "file" in item:
item_info["mimeType"] = item["file"].get("mimeType")
item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
if "folder" in item:
item_info["childCount"] = item["folder"].get("childCount", 0)
processed_items.append(item_info)
return processed_items
except Exception as e:
logger.error(f"Error listing folder contents: {str(e)}")
return []
async def search_files(self, site_id: str, query: str) -> List[Dict[str, Any]]:
"""Search for files in a site."""
try:
search_query = query.replace("'", "''") # Escape single quotes for OData
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
result = await self._make_graph_api_call(endpoint)
if "error" in result:
logger.warning(f"Search failed: {result['error']}")
return []
items = result.get("value", [])
processed_items = []
for item in items:
is_folder = 'folder' in item
item_info = {
"id": item.get("id"),
"name": item.get("name"),
"type": "folder" if is_folder else "file",
"size": item.get("size", 0),
"createdDateTime": item.get("createdDateTime"),
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
"webUrl": item.get("webUrl"),
"parentPath": item.get("parentReference", {}).get("path", "")
}
if "file" in item:
item_info["mimeType"] = item["file"].get("mimeType")
item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl")
processed_items.append(item_info)
return processed_items
except Exception as e:
logger.error(f"Error searching files: {str(e)}")
return []
async def copy_file_async(self, site_id: str, source_folder: str, source_file: str, dest_folder: str, dest_file: str) -> None:
"""Copy a file from source to destination folder (like original synchronizer)."""
try:
# First, download the source file
source_path = f"{source_folder}/{source_file}"
file_content = await self.download_file_by_path(site_id=site_id, file_path=source_path)
if not file_content:
raise Exception(f"Failed to download source file: {source_path}")
# Upload to destination
await self.upload_file(
site_id=site_id,
folder_path=dest_folder,
file_name=dest_file,
content=file_content
)
logger.info(f"File copied: {source_file} -> {dest_file}")
except Exception as e:
# Provide more specific error information
error_msg = str(e)
if "itemNotFound" in error_msg or "404" in error_msg:
raise Exception(f"Source file not found (404): {source_path} - {error_msg}")
else:
raise Exception(f"Error copying file: {error_msg}")
async def download_file_by_path(self, site_id: str, file_path: str) -> Optional[bytes]:
"""Download a file by its path within a site."""
try:
if self.access_token is None:
logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.")
return None
# Clean the path
clean_path = file_path.strip('/')
endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/content"
# Use direct HTTP call for file downloads (binary content)
headers = {
"Authorization": f"Bearer {self.access_token}",
}
# Remove leading slash from endpoint to avoid double slash
clean_endpoint = endpoint.lstrip('/')
url = f"{self.base_url}/{clean_endpoint}"
logger.debug(f"Downloading file: GET {url}")
timeout = aiohttp.ClientTimeout(total=30)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(url, headers=headers) as response:
if response.status == 200:
return await response.read()
else:
error_text = await response.text()
logger.error(f"File download failed: {response.status} - {error_text}")
return None
except Exception as e:
logger.error(f"Error downloading file by path: {str(e)}")
return None