gateway/modules/features/neutralizePlayground/mainNeutralizePlayground.py

import logging
from typing import Any, Dict, List, Optional

from modules.datamodels.datamodelUam import User
from modules.datamodels.datamodelNeutralizer import DataNeutralizerAttributes, DataNeutraliserConfig
from modules.services.serviceNeutralization.mainServiceNeutralization import NeutralizationService

logger = logging.getLogger(__name__)


class NeutralizationPlayground:
    """Feature/UI wrapper around NeutralizationService for playground & routes."""

    def __init__(self, currentUser: User):
        self.currentUser = currentUser
        self.service = NeutralizationService(currentUser)

    def processText(self, text: str) -> Dict[str, Any]:
        return self.service.processText(text)

    def processFiles(self, fileIds: List[str]) -> Dict[str, Any]:
        results: List[Dict[str, Any]] = []
        errors: List[str] = []
        for fileId in fileIds:
            try:
                res = self.service.processFile(fileId)
                results.append({
                    'file_id': fileId,
                    'neutralized_file_name': res.get('neutralized_file_name'),
                    'attributes_count': len(res.get('attributes', []))
                })
            except Exception as e:
                logger.error(f"Error processing file {fileId}: {str(e)}")
                errors.append(f"{fileId}: {str(e)}")
        return {
            'success': len(errors) == 0,
            'total_files': len(fileIds),
            'successful_files': len(results),
            'failed_files': len(errors),
            'results': results,
            'errors': errors,
        }

    async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
        from modules.features.neutralizePlayground.sharepoint import SharepointProcessor
        processor = SharepointProcessor(self.currentUser, self.service)
        return await processor.processSharepointFiles(sourcePath, targetPath)

    # Cleanup attributes
    def cleanAttributes(self, fileId: str) -> bool:
        if not self.service.app_interface:
            return False
        return self.service.app_interface.deleteNeutralizationAttributes(fileId)

    # Stats
    def getStats(self) -> Dict[str, Any]:
        try:
            allAttributes = self.service._getAttributes()
            patternCounts: Dict[str, int] = {}
            for attr in allAttributes:
                patternType = attr.patternType
                patternCounts[patternType] = patternCounts.get(patternType, 0) + 1
            uniqueFiles = set(attr.fileId for attr in allAttributes if attr.fileId)
            return {
                'total_attributes': len(allAttributes),
                'unique_files': len(uniqueFiles),
                'pattern_counts': patternCounts,
                'mandate_id': self.currentUser.mandateId if self.currentUser else None,
            }
        except Exception as e:
            logger.error(f"Error getting stats: {str(e)}")
            return {
                'total_attributes': 0,
                'unique_files': 0,
                'pattern_counts': {},
                'error': str(e),
            }

    # Additional methods needed by the route
    def get_config(self) -> Optional[DataNeutraliserConfig]:
        """Get neutralization configuration"""
        return self.service.getConfig()

    def save_config(self, config_data: Dict[str, Any]) -> DataNeutraliserConfig:
        """Save neutralization configuration"""
        return self.service.saveConfig(config_data)

    def neutralize_text(self, text: str, file_id: str = None) -> Dict[str, Any]:
        """Neutralize text content"""
        return self.service.processText(text)

    def resolve_text(self, text: str) -> str:
        """Resolve UIDs in neutralized text back to original text"""
        return self.service.resolveText(text)

    def get_attributes(self, file_id: str = None) -> List[DataNeutralizerAttributes]:
        """Get neutralization attributes, optionally filtered by file ID"""
        if not self.service.app_interface:
            return []
        try:
            all_attributes = self.service._getAttributes()
            if file_id:
                return [attr for attr in all_attributes if attr.fileId == file_id]
            return all_attributes
        except Exception as e:
            logger.error(f"Error getting attributes: {str(e)}")
            return []

    async def process_sharepoint_files(self, source_path: str, target_path: str) -> Dict[str, Any]:
        """Process files from SharePoint source path and store neutralized files in target path"""
        return await self.processSharepointFiles(source_path, target_path)

    def batch_neutralize_files(self, files_data: List[Dict[str, Any]]) -> Dict[str, Any]:
        """Process multiple files for neutralization"""
        file_ids = [file_data.get('fileId') for file_data in files_data if file_data.get('fileId')]
        return self.processFiles(file_ids)

    def get_processing_stats(self) -> Dict[str, Any]:
        """Get neutralization processing statistics"""
        return self.getStats()

    def cleanup_file_attributes(self, file_id: str) -> bool:
        """Clean up neutralization attributes for a specific file"""
        return self.cleanAttributes(file_id)


# Internal SharePoint helper module separated to keep feature logic tidy
class SharepointProcessor:
    def __init__(self, currentUser: User, service: NeutralizationService):
        self.currentUser = currentUser
        self.service = service

    async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
        try:
            logger.info(f"Processing SharePoint files from {sourcePath} to {targetPath}")
            connection = await self._getSharepointConnection(sourcePath)
            if not connection:
                return {
                    'success': False,
                    'message': 'No SharePoint connection found for user',
                    'processed_files': 0,
                    'errors': ['No SharePoint connection found'],
                }
            from modules.security.tokenManager import TokenManager
            token = TokenManager().getFreshToken(self.service.app_interface, connection['id'])
            if not token:
                return {
                    'success': False,
                    'message': 'No SharePoint access token found',
                    'processed_files': 0,
                    'errors': ['No SharePoint access token found'],
                }
            return await self._processSharepointFilesAsync(sourcePath, targetPath, token.tokenAccess)
        except Exception as e:
            logger.error(f"Error processing SharePoint files: {str(e)}")
            return {
                'success': False,
                'message': f'Error processing SharePoint files: {str(e)}',
                'processed_files': 0,
                'errors': [str(e)],
            }

    async def _getSharepointConnection(self, sharepointPath: str = None):
        try:
            from modules.datamodels.datamodelUam import UserConnection
            connections = self.service.app_interface.db.getRecordset(
                UserConnection,
                recordFilter={"userId": self.service.app_interface.userId}
            )
            msftConnections = [c for c in connections if c.get('authority') == 'msft']
            if not msftConnections:
                logger.warning('No Microsoft connections found for user')
                return None
            if len(msftConnections) == 1:
                logger.info(f"Found single Microsoft connection: {msftConnections[0].get('id')}")
                return msftConnections[0]
            if sharepointPath:
                return await self._matchConnectionToPath(msftConnections, sharepointPath)
            logger.info(f"Multiple Microsoft connections found, using first one: {msftConnections[0].get('id')}")
            return msftConnections[0]
        except Exception:
            logger.error('Error getting SharePoint connection')
            return None

    async def _matchConnectionToPath(self, connections: list, sharepointPath: str):
        try:
            from urllib.parse import urlparse
            targetDomain = urlparse(sharepointPath).netloc.lower()
            logger.info(f"Looking for connection matching domain: {targetDomain}")
            from modules.security.tokenManager import TokenManager
            for connection in connections:
                try:
                    token = TokenManager().getFreshToken(self.service.app_interface, connection['id'])
                    if not token:
                        continue
                    if await self._testSharepointAccess(token.tokenAccess, sharepointPath):
                        logger.info(f"Found matching connection for domain {targetDomain}: {connection.get('id')}")
                        return connection
                except Exception:
                    continue
            logger.warning(f"No specific connection match found for {targetDomain}, using first available")
            return connections[0]
        except Exception:
            logger.error('Error matching connection to path')
            return connections[0] if connections else None

    async def _testSharepointAccess(self, accessToken: str, sharepointPath: str) -> bool:
        try:
            return await self._testSharepointAccessAsync(accessToken, sharepointPath)
        except Exception:
            return False

    async def _testSharepointAccessAsync(self, accessToken: str, sharepointPath: str) -> bool:
        try:
            from modules.services.serviceSharepoint.mainSharepoint import SharepointService
            connector = SharepointService(access_token=accessToken)
            siteUrl, _ = self._parseSharepointPath(sharepointPath)
            if not siteUrl:
                return False
            siteInfo = await connector.find_site_by_web_url(siteUrl)
            return siteInfo is not None
        except Exception:
            return False

    async def _processSharepointFilesAsync(self, sourcePath: str, targetPath: str, accessToken: str) -> Dict[str, Any]:
        try:
            import asyncio
            from modules.services.serviceSharepoint.mainSharepoint import SharepointService
            connector = SharepointService(access_token=accessToken)
            sourceSite, sourceFolder = self._parseSharepointPath(sourcePath)
            targetSite, targetFolder = self._parseSharepointPath(targetPath)
            if not sourceSite or not targetSite:
                return {'success': False, 'message': 'Invalid SharePoint path format', 'processed_files': 0, 'errors': ['Invalid SharePoint path format']}
            sourceSiteInfo = await connector.find_site_by_web_url(sourceSite)
            if not sourceSiteInfo:
                return {'success': False, 'message': f'Source site not found: {sourceSite}', 'processed_files': 0, 'errors': [f'Source site not found: {sourceSite}']}
            targetSiteInfo = await connector.find_site_by_web_url(targetSite)
            if not targetSiteInfo:
                return {'success': False, 'message': f'Target site not found: {targetSite}', 'processed_files': 0, 'errors': [f'Target site not found: {targetSite}']}
            logger.info(f"Listing files in folder: {sourceFolder} for site: {sourceSiteInfo['id']}")
            files = await connector.list_folder_contents(sourceSiteInfo['id'], sourceFolder)
            if not files:
                logger.warning(f"No files found in folder '{sourceFolder}', trying root folder")
                files = await connector.list_folder_contents(sourceSiteInfo['id'], '')
                if files:
                    folders = [f for f in files if f.get('type') == 'folder']
                    folderNames = [f.get('name') for f in folders]
                    logger.info(f"Available folders in root: {folderNames}")
                    folderList = ", ".join(folderNames) if folderNames else "None"
                    return {
                        'success': False,
                        'message': f"Folder '{sourceFolder}' not found. Available folders in root: {folderList}",
                        'processed_files': 0,
                        'errors': [f"Folder '{sourceFolder}' not found. Available folders: {folderList}"],
                        'available_folders': folderNames,
                    }
                else:
                    return {'success': False, 'message': f'No files found in source folder: {sourceFolder}', 'processed_files': 0, 'errors': [f'No files found in source folder: {sourceFolder}']}

            textFiles = [f for f in files if f.get('type') == 'file']
            processed: List[Dict[str, Any]] = []
            errors: List[str] = []

            async def _processSingle(fileInfo: Dict[str, Any]):
                try:
                    fileContent = await connector.download_file(sourceSiteInfo['id'], fileInfo['id'])
                    if not fileContent:
                        return {'error': f"Failed to download file: {fileInfo['name']}"}
                    try:
                        textContent = fileContent.decode('utf-8')
                    except UnicodeDecodeError:
                        textContent = fileContent.decode('latin-1')
                    result = self.service._neutralizeText(textContent, 'text')
                    neutralizedFilename = f"neutralized_{fileInfo['name']}"
                    uploadResult = await connector.upload_file(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8'))
                    if 'error' in uploadResult:
                        return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"}
                    return {
                        'success': True,
                        'original_name': fileInfo['name'],
                        'neutralized_name': neutralizedFilename,
                        'attributes_count': len(result.get('attributes', [])),
                    }
                except Exception as e:
                    return {'error': f"Error processing file {fileInfo['name']}: {str(e)}"}

            tasks = [ _processSingle(f) for f in textFiles ]
            results = await asyncio.gather(*tasks, return_exceptions=True)
            for i, r in enumerate(results):
                if isinstance(r, Exception):
                    errors.append(f"Exception processing file {textFiles[i]['name']}: {str(r)}")
                elif isinstance(r, dict) and 'error' in r:
                    errors.append(r['error'])
                elif isinstance(r, dict) and r.get('success'):
                    processed.append({
                        'original_name': r['original_name'],
                        'neutralized_name': r['neutralized_name'],
                        'attributes_count': r['attributes_count'],
                    })
                else:
                    errors.append(f"Unknown result processing file {textFiles[i]['name']}: {r}")
            return {
                'success': len(processed) > 0,
                'message': f"Processed {len(processed)} files successfully",
                'processed_files': len(processed),
                'files': processed,
                'errors': errors,
            }
        except Exception as e:
            logger.error(f"Error in async SharePoint processing: {str(e)}")
            return {'success': False, 'message': f'Error in async SharePoint processing: {str(e)}', 'processed_files': 0, 'errors': [str(e)]}

    def _parseSharepointPath(self, path: str) -> tuple[str, str]:
        try:
            if not path.startswith('https://'):
                return None, None
            if '?' in path:
                path = path.split('?')[0]
            if '/sites/' not in path:
                return None, None
            parts = path.split('/sites/', 1)
            if len(parts) != 2:
                return None, None
            domain = parts[0].replace('https://', '')
            siteName = parts[1].split('/')[0]
            siteUrl = f"https://{domain}/sites/{siteName}"
            folderParts = parts[1].split('/')[1:]
            from urllib.parse import unquote
            folderPath = unquote('/'.join(folderParts) if folderParts else '')
            return siteUrl, folderPath
        except Exception:
            logger.error(f"Error parsing SharePoint path '{path}'")
            return None, None