import logging from typing import Any, Dict, List, Optional from modules.datamodels.datamodelUam import User from modules.datamodels.datamodelNeutralizer import DataNeutralizerAttributes, DataNeutraliserConfig from modules.services.serviceNeutralization.mainServiceNeutralization import NeutralizationService logger = logging.getLogger(__name__) class NeutralizationPlayground: """Feature/UI wrapper around NeutralizationService for playground & routes.""" def __init__(self, currentUser: User): self.currentUser = currentUser self.service = NeutralizationService(currentUser) def processText(self, text: str) -> Dict[str, Any]: return self.service.processText(text) def processFiles(self, fileIds: List[str]) -> Dict[str, Any]: results: List[Dict[str, Any]] = [] errors: List[str] = [] for fileId in fileIds: try: res = self.service.processFile(fileId) results.append({ 'file_id': fileId, 'neutralized_file_name': res.get('neutralized_file_name'), 'attributes_count': len(res.get('attributes', [])) }) except Exception as e: logger.error(f"Error processing file {fileId}: {str(e)}") errors.append(f"{fileId}: {str(e)}") return { 'success': len(errors) == 0, 'total_files': len(fileIds), 'successful_files': len(results), 'failed_files': len(errors), 'results': results, 'errors': errors, } async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]: from modules.features.neutralizePlayground.sharepoint import SharepointProcessor processor = SharepointProcessor(self.currentUser, self.service) return await processor.processSharepointFiles(sourcePath, targetPath) # Cleanup attributes def cleanAttributes(self, fileId: str) -> bool: if not self.service.app_interface: return False return self.service.app_interface.deleteNeutralizationAttributes(fileId) # Stats def getStats(self) -> Dict[str, Any]: try: allAttributes = self.service._getAttributes() patternCounts: Dict[str, int] = {} for attr in allAttributes: patternType = attr.patternType patternCounts[patternType] = patternCounts.get(patternType, 0) + 1 uniqueFiles = set(attr.fileId for attr in allAttributes if attr.fileId) return { 'total_attributes': len(allAttributes), 'unique_files': len(uniqueFiles), 'pattern_counts': patternCounts, 'mandate_id': self.currentUser.mandateId if self.currentUser else None, } except Exception as e: logger.error(f"Error getting stats: {str(e)}") return { 'total_attributes': 0, 'unique_files': 0, 'pattern_counts': {}, 'error': str(e), } # Additional methods needed by the route def get_config(self) -> Optional[DataNeutraliserConfig]: """Get neutralization configuration""" return self.service.getConfig() def save_config(self, config_data: Dict[str, Any]) -> DataNeutraliserConfig: """Save neutralization configuration""" return self.service.saveConfig(config_data) def neutralize_text(self, text: str, file_id: str = None) -> Dict[str, Any]: """Neutralize text content""" return self.service.processText(text) def resolve_text(self, text: str) -> str: """Resolve UIDs in neutralized text back to original text""" return self.service.resolveText(text) def get_attributes(self, file_id: str = None) -> List[DataNeutralizerAttributes]: """Get neutralization attributes, optionally filtered by file ID""" if not self.service.app_interface: return [] try: all_attributes = self.service._getAttributes() if file_id: return [attr for attr in all_attributes if attr.fileId == file_id] return all_attributes except Exception as e: logger.error(f"Error getting attributes: {str(e)}") return [] async def process_sharepoint_files(self, source_path: str, target_path: str) -> Dict[str, Any]: """Process files from SharePoint source path and store neutralized files in target path""" return await self.processSharepointFiles(source_path, target_path) def batch_neutralize_files(self, files_data: List[Dict[str, Any]]) -> Dict[str, Any]: """Process multiple files for neutralization""" file_ids = [file_data.get('fileId') for file_data in files_data if file_data.get('fileId')] return self.processFiles(file_ids) def get_processing_stats(self) -> Dict[str, Any]: """Get neutralization processing statistics""" return self.getStats() def cleanup_file_attributes(self, file_id: str) -> bool: """Clean up neutralization attributes for a specific file""" return self.cleanAttributes(file_id) # Internal SharePoint helper module separated to keep feature logic tidy class SharepointProcessor: def __init__(self, currentUser: User, service: NeutralizationService): self.currentUser = currentUser self.service = service async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]: try: logger.info(f"Processing SharePoint files from {sourcePath} to {targetPath}") connection = await self._getSharepointConnection(sourcePath) if not connection: return { 'success': False, 'message': 'No SharePoint connection found for user', 'processed_files': 0, 'errors': ['No SharePoint connection found'], } from modules.security.tokenManager import TokenManager token = TokenManager().getFreshToken(self.service.app_interface, connection['id']) if not token: return { 'success': False, 'message': 'No SharePoint access token found', 'processed_files': 0, 'errors': ['No SharePoint access token found'], } return await self._processSharepointFilesAsync(sourcePath, targetPath, token.tokenAccess) except Exception as e: logger.error(f"Error processing SharePoint files: {str(e)}") return { 'success': False, 'message': f'Error processing SharePoint files: {str(e)}', 'processed_files': 0, 'errors': [str(e)], } async def _getSharepointConnection(self, sharepointPath: str = None): try: from modules.datamodels.datamodelUam import UserConnection connections = self.service.app_interface.db.getRecordset( UserConnection, recordFilter={"userId": self.service.app_interface.userId} ) msftConnections = [c for c in connections if c.get('authority') == 'msft'] if not msftConnections: logger.warning('No Microsoft connections found for user') return None if len(msftConnections) == 1: logger.info(f"Found single Microsoft connection: {msftConnections[0].get('id')}") return msftConnections[0] if sharepointPath: return await self._matchConnectionToPath(msftConnections, sharepointPath) logger.info(f"Multiple Microsoft connections found, using first one: {msftConnections[0].get('id')}") return msftConnections[0] except Exception: logger.error('Error getting SharePoint connection') return None async def _matchConnectionToPath(self, connections: list, sharepointPath: str): try: from urllib.parse import urlparse targetDomain = urlparse(sharepointPath).netloc.lower() logger.info(f"Looking for connection matching domain: {targetDomain}") from modules.security.tokenManager import TokenManager for connection in connections: try: token = TokenManager().getFreshToken(self.service.app_interface, connection['id']) if not token: continue if await self._testSharepointAccess(token.tokenAccess, sharepointPath): logger.info(f"Found matching connection for domain {targetDomain}: {connection.get('id')}") return connection except Exception: continue logger.warning(f"No specific connection match found for {targetDomain}, using first available") return connections[0] except Exception: logger.error('Error matching connection to path') return connections[0] if connections else None async def _testSharepointAccess(self, accessToken: str, sharepointPath: str) -> bool: try: return await self._testSharepointAccessAsync(accessToken, sharepointPath) except Exception: return False async def _testSharepointAccessAsync(self, accessToken: str, sharepointPath: str) -> bool: try: from modules.services.serviceSharepoint.mainSharepoint import SharepointService connector = SharepointService(access_token=accessToken) siteUrl, _ = self._parseSharepointPath(sharepointPath) if not siteUrl: return False siteInfo = await connector.find_site_by_web_url(siteUrl) return siteInfo is not None except Exception: return False async def _processSharepointFilesAsync(self, sourcePath: str, targetPath: str, accessToken: str) -> Dict[str, Any]: try: import asyncio from modules.services.serviceSharepoint.mainSharepoint import SharepointService connector = SharepointService(access_token=accessToken) sourceSite, sourceFolder = self._parseSharepointPath(sourcePath) targetSite, targetFolder = self._parseSharepointPath(targetPath) if not sourceSite or not targetSite: return {'success': False, 'message': 'Invalid SharePoint path format', 'processed_files': 0, 'errors': ['Invalid SharePoint path format']} sourceSiteInfo = await connector.find_site_by_web_url(sourceSite) if not sourceSiteInfo: return {'success': False, 'message': f'Source site not found: {sourceSite}', 'processed_files': 0, 'errors': [f'Source site not found: {sourceSite}']} targetSiteInfo = await connector.find_site_by_web_url(targetSite) if not targetSiteInfo: return {'success': False, 'message': f'Target site not found: {targetSite}', 'processed_files': 0, 'errors': [f'Target site not found: {targetSite}']} logger.info(f"Listing files in folder: {sourceFolder} for site: {sourceSiteInfo['id']}") files = await connector.list_folder_contents(sourceSiteInfo['id'], sourceFolder) if not files: logger.warning(f"No files found in folder '{sourceFolder}', trying root folder") files = await connector.list_folder_contents(sourceSiteInfo['id'], '') if files: folders = [f for f in files if f.get('type') == 'folder'] folderNames = [f.get('name') for f in folders] logger.info(f"Available folders in root: {folderNames}") folderList = ", ".join(folderNames) if folderNames else "None" return { 'success': False, 'message': f"Folder '{sourceFolder}' not found. Available folders in root: {folderList}", 'processed_files': 0, 'errors': [f"Folder '{sourceFolder}' not found. Available folders: {folderList}"], 'available_folders': folderNames, } else: return {'success': False, 'message': f'No files found in source folder: {sourceFolder}', 'processed_files': 0, 'errors': [f'No files found in source folder: {sourceFolder}']} textFiles = [f for f in files if f.get('type') == 'file'] processed: List[Dict[str, Any]] = [] errors: List[str] = [] async def _processSingle(fileInfo: Dict[str, Any]): try: fileContent = await connector.download_file(sourceSiteInfo['id'], fileInfo['id']) if not fileContent: return {'error': f"Failed to download file: {fileInfo['name']}"} try: textContent = fileContent.decode('utf-8') except UnicodeDecodeError: textContent = fileContent.decode('latin-1') result = self.service._neutralizeText(textContent, 'text') neutralizedFilename = f"neutralized_{fileInfo['name']}" uploadResult = await connector.upload_file(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8')) if 'error' in uploadResult: return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"} return { 'success': True, 'original_name': fileInfo['name'], 'neutralized_name': neutralizedFilename, 'attributes_count': len(result.get('attributes', [])), } except Exception as e: return {'error': f"Error processing file {fileInfo['name']}: {str(e)}"} tasks = [ _processSingle(f) for f in textFiles ] results = await asyncio.gather(*tasks, return_exceptions=True) for i, r in enumerate(results): if isinstance(r, Exception): errors.append(f"Exception processing file {textFiles[i]['name']}: {str(r)}") elif isinstance(r, dict) and 'error' in r: errors.append(r['error']) elif isinstance(r, dict) and r.get('success'): processed.append({ 'original_name': r['original_name'], 'neutralized_name': r['neutralized_name'], 'attributes_count': r['attributes_count'], }) else: errors.append(f"Unknown result processing file {textFiles[i]['name']}: {r}") return { 'success': len(processed) > 0, 'message': f"Processed {len(processed)} files successfully", 'processed_files': len(processed), 'files': processed, 'errors': errors, } except Exception as e: logger.error(f"Error in async SharePoint processing: {str(e)}") return {'success': False, 'message': f'Error in async SharePoint processing: {str(e)}', 'processed_files': 0, 'errors': [str(e)]} def _parseSharepointPath(self, path: str) -> tuple[str, str]: try: if not path.startswith('https://'): return None, None if '?' in path: path = path.split('?')[0] if '/sites/' not in path: return None, None parts = path.split('/sites/', 1) if len(parts) != 2: return None, None domain = parts[0].replace('https://', '') siteName = parts[1].split('/')[0] siteUrl = f"https://{domain}/sites/{siteName}" folderParts = parts[1].split('/')[1:] from urllib.parse import unquote folderPath = unquote('/'.join(folderParts) if folderParts else '') return siteUrl, folderPath except Exception: logger.error(f"Error parsing SharePoint path '{path}'") return None, None