"""Connector for SharePoint operations using Microsoft Graph API.""" import logging import json import aiohttp import asyncio from typing import Dict, Any, List, Optional from datetime import datetime, UTC logger = logging.getLogger(__name__) class SharepointService: """SharePoint connector using Microsoft Graph API for reliable authentication.""" def __init__(self, serviceCenter=None): """Initialize SharePoint service without access token. Args: serviceCenter: Service center instance for accessing other services Use setAccessTokenFromConnection() method to configure the access token before making API calls. """ self.services = serviceCenter self.access_token = None self.base_url = "https://graph.microsoft.com/v1.0" def setAccessTokenFromConnection(self, userConnection) -> bool: """Set access token from UserConnection. Args: userConnection: UserConnection object containing token information Returns: bool: True if token was set successfully, False otherwise """ try: if not userConnection: logger.error("UserConnection is required to set access token") return False # Get a fresh token for this specific connection from modules.security.tokenManager import TokenManager token = TokenManager().getFreshToken(userConnection.id) if not token: logger.error(f"No token found for connection {userConnection.id}") return False self.access_token = token.tokenAccess logger.info(f"Access token set for connection {userConnection.id}") return True except Exception as e: logger.error(f"Error setting access token: {str(e)}") return False async def _make_graph_api_call(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]: """Make a Microsoft Graph API call with proper error handling.""" try: if self.access_token is None: logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.") return {"error": "Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service."} headers = { "Authorization": f"Bearer {self.access_token}", "Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json" } # Remove leading slash from endpoint to avoid double slash clean_endpoint = endpoint.lstrip('/') url = f"{self.base_url}/{clean_endpoint}" logger.debug(f"Making Graph API call: {method} {url}") timeout = aiohttp.ClientTimeout(total=30) async with aiohttp.ClientSession(timeout=timeout) as session: if method == "GET": async with session.get(url, headers=headers) as response: if response.status == 200: return await response.json() else: error_text = await response.text() logger.error(f"Graph API call failed: {response.status} - {error_text}") return {"error": f"API call failed: {response.status} - {error_text}"} elif method == "PUT": async with session.put(url, headers=headers, data=data) as response: if response.status in [200, 201]: return await response.json() else: error_text = await response.text() logger.error(f"Graph API call failed: {response.status} - {error_text}") return {"error": f"API call failed: {response.status} - {error_text}"} elif method == "POST": async with session.post(url, headers=headers, data=data) as response: if response.status in [200, 201]: return await response.json() else: error_text = await response.text() logger.error(f"Graph API call failed: {response.status} - {error_text}") return {"error": f"API call failed: {response.status} - {error_text}"} except asyncio.TimeoutError: logger.error(f"Graph API call timed out after 30 seconds: {endpoint}") return {"error": f"API call timed out after 30 seconds: {endpoint}"} except Exception as e: logger.error(f"Error making Graph API call: {str(e)}") return {"error": f"Error making Graph API call: {str(e)}"} async def discover_sites(self) -> List[Dict[str, Any]]: """Discover all SharePoint sites accessible to the user.""" try: result = await self._make_graph_api_call("sites?search=*") if "error" in result: logger.error(f"Error discovering SharePoint sites: {result['error']}") return [] sites = result.get("value", []) logger.info(f"Discovered {len(sites)} SharePoint sites") processed_sites = [] for site in sites: site_info = { "id": site.get("id"), "displayName": site.get("displayName"), "name": site.get("name"), "webUrl": site.get("webUrl"), "description": site.get("description"), "createdDateTime": site.get("createdDateTime"), "lastModifiedDateTime": site.get("lastModifiedDateTime") } processed_sites.append(site_info) logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}") return processed_sites except Exception as e: logger.error(f"Error discovering SharePoint sites: {str(e)}") return [] async def find_site_by_name(self, site_name: str) -> Optional[Dict[str, Any]]: """Find a specific SharePoint site by name using direct Graph API call.""" try: # Try to get the site directly by name using Graph API endpoint = f"sites/{site_name}" result = await self._make_graph_api_call(endpoint) if result and "error" not in result: site_info = { "id": result.get("id"), "displayName": result.get("displayName"), "name": result.get("name"), "webUrl": result.get("webUrl"), "description": result.get("description"), "createdDateTime": result.get("createdDateTime"), "lastModifiedDateTime": result.get("lastModifiedDateTime") } logger.info(f"Found site directly: {site_info['displayName']} - {site_info['webUrl']}") return site_info except Exception as e: logger.debug(f"Direct site lookup failed for '{site_name}': {str(e)}") # Fallback to discovery if direct lookup fails logger.info(f"Direct lookup failed, trying discovery for site: {site_name}") sites = await self.discover_sites() if not sites: logger.warning("No sites discovered") return None logger.info(f"Discovered {len(sites)} SharePoint sites:") for site in sites: logger.info(f" - {site.get('displayName', 'Unknown')} (ID: {site.get('id', 'Unknown')})") # Try exact match first for site in sites: if site.get("displayName", "").strip().lower() == site_name.strip().lower(): logger.info(f"Found exact match: {site.get('displayName')}") return site # Try partial match for site in sites: if site_name.lower() in site.get("displayName", "").lower(): logger.info(f"Found partial match: {site.get('displayName')}") return site logger.warning(f"No site found matching: {site_name}") return None async def find_site_by_web_url(self, web_url: str) -> Optional[Dict[str, Any]]: """Find a SharePoint site using its web URL (useful for guest sites).""" try: # Use the web URL format: sites/{hostname}:/sites/{site-path} # Extract hostname and site path from the web URL if not web_url.startswith("https://"): web_url = f"https://{web_url}" # Parse the URL to extract hostname and site path from urllib.parse import urlparse parsed = urlparse(web_url) hostname = parsed.hostname path_parts = parsed.path.strip('/').split('/') if len(path_parts) >= 2 and path_parts[0] == 'sites': site_path = '/'.join(path_parts[1:]) # Everything after 'sites/' else: logger.error(f"Invalid SharePoint URL format: {web_url}") return None endpoint = f"sites/{hostname}:/sites/{site_path}" logger.debug(f"Trying web URL format: {endpoint}") result = await self._make_graph_api_call(endpoint) if result and "error" not in result: site_info = { "id": result.get("id"), "displayName": result.get("displayName"), "name": result.get("name"), "webUrl": result.get("webUrl"), "description": result.get("description"), "createdDateTime": result.get("createdDateTime"), "lastModifiedDateTime": result.get("lastModifiedDateTime") } logger.info(f"Found site by web URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})") return site_info else: logger.warning(f"Site not found using web URL: {web_url}") return None except Exception as e: logger.error(f"Error finding site by web URL: {str(e)}") return None async def find_site_by_url(self, hostname: str, site_path: str) -> Optional[Dict[str, Any]]: """Find a SharePoint site using the site URL format.""" try: # For guest sites, try different URL formats url_formats = [ f"sites/{hostname}:/sites/{site_path}", # Standard format f"sites/{hostname}:/sites/{site_path}/", # With trailing slash f"sites/{hostname}:/sites/{site_path.lower()}", # Lowercase f"sites/{hostname}:/sites/{site_path.lower()}/", # Lowercase with slash ] for endpoint in url_formats: logger.debug(f"Trying URL format: {endpoint}") result = await self._make_graph_api_call(endpoint) if result and "error" not in result: site_info = { "id": result.get("id"), "displayName": result.get("displayName"), "name": result.get("name"), "webUrl": result.get("webUrl"), "description": result.get("description"), "createdDateTime": result.get("createdDateTime"), "lastModifiedDateTime": result.get("lastModifiedDateTime") } logger.info(f"Found site by URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})") return site_info else: logger.debug(f"URL format failed: {endpoint} - {result.get('error', 'Unknown error')}") logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{site_path}") return None except Exception as e: logger.error(f"Error finding site by URL: {str(e)}") return None async def get_folder_by_path(self, site_id: str, folder_path: str) -> Optional[Dict[str, Any]]: """Get folder information by path within a site.""" try: # Clean the path clean_path = folder_path.lstrip('/') endpoint = f"sites/{site_id}/drive/root:/{clean_path}" result = await self._make_graph_api_call(endpoint) if "error" in result: logger.warning(f"Folder not found at path {folder_path}: {result['error']}") return None return result except Exception as e: logger.error(f"Error getting folder by path: {str(e)}") return None async def upload_file(self, site_id: str, folder_path: str, file_name: str, content: bytes) -> Dict[str, Any]: """Upload a file to SharePoint.""" try: # Clean the path clean_path = folder_path.lstrip('/') upload_path = f"{clean_path.rstrip('/')}/{file_name}" endpoint = f"sites/{site_id}/drive/root:/{upload_path}:/content" logger.info(f"Uploading file to: {endpoint}") result = await self._make_graph_api_call(endpoint, method="PUT", data=content) if "error" in result: logger.error(f"Upload failed: {result['error']}") return result logger.info(f"File uploaded successfully: {file_name}") return result except Exception as e: logger.error(f"Error uploading file: {str(e)}") return {"error": f"Error uploading file: {str(e)}"} async def download_file(self, site_id: str, file_id: str) -> Optional[bytes]: """Download a file from SharePoint.""" try: if self.access_token is None: logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.") return None endpoint = f"sites/{site_id}/drive/items/{file_id}/content" headers = {"Authorization": f"Bearer {self.access_token}"} timeout = aiohttp.ClientTimeout(total=30) async with aiohttp.ClientSession(timeout=timeout) as session: async with session.get(f"{self.base_url}/{endpoint}", headers=headers) as response: if response.status == 200: return await response.read() else: logger.error(f"Download failed: {response.status}") return None except Exception as e: logger.error(f"Error downloading file: {str(e)}") return None async def list_folder_contents(self, site_id: str, folder_path: str = "") -> List[Dict[str, Any]]: """List contents of a folder.""" try: if not folder_path or folder_path == "/": endpoint = f"sites/{site_id}/drive/root/children" else: clean_path = folder_path.lstrip('/') endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/children" result = await self._make_graph_api_call(endpoint) if "error" in result: logger.warning(f"Failed to list folder contents: {result['error']}") return None items = result.get("value", []) processed_items = [] for item in items: # Determine if it's a folder or file is_folder = 'folder' in item item_info = { "id": item.get("id"), "name": item.get("name"), "type": "folder" if is_folder else "file", "size": item.get("size", 0), "createdDateTime": item.get("createdDateTime"), "lastModifiedDateTime": item.get("lastModifiedDateTime"), "webUrl": item.get("webUrl") } if "file" in item: item_info["mimeType"] = item["file"].get("mimeType") item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl") if "folder" in item: item_info["childCount"] = item["folder"].get("childCount", 0) processed_items.append(item_info) return processed_items except Exception as e: logger.error(f"Error listing folder contents: {str(e)}") return [] async def search_files(self, site_id: str, query: str) -> List[Dict[str, Any]]: """Search for files in a site.""" try: search_query = query.replace("'", "''") # Escape single quotes for OData endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')" result = await self._make_graph_api_call(endpoint) if "error" in result: logger.warning(f"Search failed: {result['error']}") return [] items = result.get("value", []) processed_items = [] for item in items: is_folder = 'folder' in item item_info = { "id": item.get("id"), "name": item.get("name"), "type": "folder" if is_folder else "file", "size": item.get("size", 0), "createdDateTime": item.get("createdDateTime"), "lastModifiedDateTime": item.get("lastModifiedDateTime"), "webUrl": item.get("webUrl"), "parentPath": item.get("parentReference", {}).get("path", "") } if "file" in item: item_info["mimeType"] = item["file"].get("mimeType") item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl") processed_items.append(item_info) return processed_items except Exception as e: logger.error(f"Error searching files: {str(e)}") return [] async def copy_file_async(self, site_id: str, source_folder: str, source_file: str, dest_folder: str, dest_file: str) -> None: """Copy a file from source to destination folder (like original synchronizer).""" try: # First, download the source file source_path = f"{source_folder}/{source_file}" file_content = await self.download_file_by_path(site_id=site_id, file_path=source_path) if not file_content: raise Exception(f"Failed to download source file: {source_path}") # Upload to destination await self.upload_file( site_id=site_id, folder_path=dest_folder, file_name=dest_file, content=file_content ) logger.info(f"File copied: {source_file} -> {dest_file}") except Exception as e: # Provide more specific error information error_msg = str(e) if "itemNotFound" in error_msg or "404" in error_msg: raise Exception(f"Source file not found (404): {source_path} - {error_msg}") else: raise Exception(f"Error copying file: {error_msg}") async def download_file_by_path(self, site_id: str, file_path: str) -> Optional[bytes]: """Download a file by its path within a site.""" try: if self.access_token is None: logger.error("Access token is not set. Please call setAccessTokenFromConnection() before using the SharePoint service.") return None # Clean the path clean_path = file_path.strip('/') endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/content" # Use direct HTTP call for file downloads (binary content) headers = { "Authorization": f"Bearer {self.access_token}", } # Remove leading slash from endpoint to avoid double slash clean_endpoint = endpoint.lstrip('/') url = f"{self.base_url}/{clean_endpoint}" logger.debug(f"Downloading file: GET {url}") timeout = aiohttp.ClientTimeout(total=30) async with aiohttp.ClientSession(timeout=timeout) as session: async with session.get(url, headers=headers) as response: if response.status == 200: return await response.read() else: error_text = await response.text() logger.error(f"File download failed: {response.status} - {error_text}") return None except Exception as e: logger.error(f"Error downloading file by path: {str(e)}") return None