gateway/modules/features/neutralizePlayground/mainNeutralizePlayground.py
2025-09-23 22:47:54 +02:00

285 lines
14 KiB
Python

import logging
from typing import Any, Dict, List, Optional
from modules.interfaces.interfaceAppModel import User
from modules.services.serviceNeutralization.mainNeutralization import NeutralizationService
logger = logging.getLogger(__name__)
class NeutralizationPlayground:
"""Feature/UI wrapper around NeutralizationService for playground & routes."""
def __init__(self, currentUser: User):
self.currentUser = currentUser
self.service = NeutralizationService(currentUser)
def processText(self, text: str) -> Dict[str, Any]:
return self.service.processText(text)
def processFiles(self, fileIds: List[str]) -> Dict[str, Any]:
results: List[Dict[str, Any]] = []
errors: List[str] = []
for fileId in fileIds:
try:
res = self.service.processFile(fileId)
results.append({
'file_id': fileId,
'neutralized_file_name': res.get('neutralized_file_name'),
'attributes_count': len(res.get('attributes', []))
})
except Exception as e:
logger.error(f"Error processing file {fileId}: {str(e)}")
errors.append(f"{fileId}: {str(e)}")
return {
'success': len(errors) == 0,
'total_files': len(fileIds),
'successful_files': len(results),
'failed_files': len(errors),
'results': results,
'errors': errors,
}
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
from modules.features.neutralizePlayground.sharepoint import SharepointProcessor
processor = SharepointProcessor(self.currentUser, self.service)
return await processor.processSharepointFiles(sourcePath, targetPath)
# Cleanup attributes
def cleanAttributes(self, fileId: str) -> bool:
if not self.service.app_interface:
return False
return self.service.app_interface.deleteNeutralizationAttributes(fileId)
# Stats
def getStats(self) -> Dict[str, Any]:
try:
allAttributes = self.service._getAttributes()
patternCounts: Dict[str, int] = {}
for attr in allAttributes:
patternType = attr.patternType
patternCounts[patternType] = patternCounts.get(patternType, 0) + 1
uniqueFiles = set(attr.fileId for attr in allAttributes if attr.fileId)
return {
'total_attributes': len(allAttributes),
'unique_files': len(uniqueFiles),
'pattern_counts': patternCounts,
'mandate_id': self.currentUser.mandateId if self.currentUser else None,
}
except Exception as e:
logger.error(f"Error getting stats: {str(e)}")
return {
'total_attributes': 0,
'unique_files': 0,
'pattern_counts': {},
'error': str(e),
}
# Internal SharePoint helper module separated to keep feature logic tidy
class SharepointProcessor:
def __init__(self, currentUser: User, service: NeutralizationService):
self.currentUser = currentUser
self.service = service
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
try:
logger.info(f"Processing SharePoint files from {sourcePath} to {targetPath}")
connection = await self._getSharepointConnection(sourcePath)
if not connection:
return {
'success': False,
'message': 'No SharePoint connection found for user',
'processed_files': 0,
'errors': ['No SharePoint connection found'],
}
from modules.security.tokenManager import TokenManager
token = TokenManager().getFreshToken(self.service.app_interface, connection['id'])
if not token:
return {
'success': False,
'message': 'No SharePoint access token found',
'processed_files': 0,
'errors': ['No SharePoint access token found'],
}
return await self._processSharepointFilesAsync(sourcePath, targetPath, token.tokenAccess)
except Exception as e:
logger.error(f"Error processing SharePoint files: {str(e)}")
return {
'success': False,
'message': f'Error processing SharePoint files: {str(e)}',
'processed_files': 0,
'errors': [str(e)],
}
async def _getSharepointConnection(self, sharepointPath: str = None):
try:
from modules.interfaces.interfaceAppModel import UserConnection
connections = self.service.app_interface.db.getRecordset(
UserConnection,
recordFilter={"userId": self.service.app_interface.userId}
)
msftConnections = [c for c in connections if c.get('authority') == 'msft']
if not msftConnections:
logger.warning('No Microsoft connections found for user')
return None
if len(msftConnections) == 1:
logger.info(f"Found single Microsoft connection: {msftConnections[0].get('id')}")
return msftConnections[0]
if sharepointPath:
return await self._matchConnectionToPath(msftConnections, sharepointPath)
logger.info(f"Multiple Microsoft connections found, using first one: {msftConnections[0].get('id')}")
return msftConnections[0]
except Exception:
logger.error('Error getting SharePoint connection')
return None
async def _matchConnectionToPath(self, connections: list, sharepointPath: str):
try:
from urllib.parse import urlparse
targetDomain = urlparse(sharepointPath).netloc.lower()
logger.info(f"Looking for connection matching domain: {targetDomain}")
from modules.security.tokenManager import TokenManager
for connection in connections:
try:
token = TokenManager().getFreshToken(self.service.app_interface, connection['id'])
if not token:
continue
if await self._testSharepointAccess(token.tokenAccess, sharepointPath):
logger.info(f"Found matching connection for domain {targetDomain}: {connection.get('id')}")
return connection
except Exception:
continue
logger.warning(f"No specific connection match found for {targetDomain}, using first available")
return connections[0]
except Exception:
logger.error('Error matching connection to path')
return connections[0] if connections else None
async def _testSharepointAccess(self, accessToken: str, sharepointPath: str) -> bool:
try:
return await self._testSharepointAccessAsync(accessToken, sharepointPath)
except Exception:
return False
async def _testSharepointAccessAsync(self, accessToken: str, sharepointPath: str) -> bool:
try:
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
connector = SharepointService(access_token=accessToken)
siteUrl, _ = self._parseSharepointPath(sharepointPath)
if not siteUrl:
return False
siteInfo = await connector.find_site_by_web_url(siteUrl)
return siteInfo is not None
except Exception:
return False
async def _processSharepointFilesAsync(self, sourcePath: str, targetPath: str, accessToken: str) -> Dict[str, Any]:
try:
import asyncio
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
connector = SharepointService(access_token=accessToken)
sourceSite, sourceFolder = self._parseSharepointPath(sourcePath)
targetSite, targetFolder = self._parseSharepointPath(targetPath)
if not sourceSite or not targetSite:
return {'success': False, 'message': 'Invalid SharePoint path format', 'processed_files': 0, 'errors': ['Invalid SharePoint path format']}
sourceSiteInfo = await connector.find_site_by_web_url(sourceSite)
if not sourceSiteInfo:
return {'success': False, 'message': f'Source site not found: {sourceSite}', 'processed_files': 0, 'errors': [f'Source site not found: {sourceSite}']}
targetSiteInfo = await connector.find_site_by_web_url(targetSite)
if not targetSiteInfo:
return {'success': False, 'message': f'Target site not found: {targetSite}', 'processed_files': 0, 'errors': [f'Target site not found: {targetSite}']}
logger.info(f"Listing files in folder: {sourceFolder} for site: {sourceSiteInfo['id']}")
files = await connector.list_folder_contents(sourceSiteInfo['id'], sourceFolder)
if not files:
logger.warning(f"No files found in folder '{sourceFolder}', trying root folder")
files = await connector.list_folder_contents(sourceSiteInfo['id'], '')
if files:
folders = [f for f in files if f.get('type') == 'folder']
folderNames = [f.get('name') for f in folders]
logger.info(f"Available folders in root: {folderNames}")
folderList = ", ".join(folderNames) if folderNames else "None"
return {
'success': False,
'message': f"Folder '{sourceFolder}' not found. Available folders in root: {folderList}",
'processed_files': 0,
'errors': [f"Folder '{sourceFolder}' not found. Available folders: {folderList}"],
'available_folders': folderNames,
}
else:
return {'success': False, 'message': f'No files found in source folder: {sourceFolder}', 'processed_files': 0, 'errors': [f'No files found in source folder: {sourceFolder}']}
textFiles = [f for f in files if f.get('type') == 'file']
processed: List[Dict[str, Any]] = []
errors: List[str] = []
async def _processSingle(fileInfo: Dict[str, Any]):
try:
fileContent = await connector.download_file(sourceSiteInfo['id'], fileInfo['id'])
if not fileContent:
return {'error': f"Failed to download file: {fileInfo['name']}"}
try:
textContent = fileContent.decode('utf-8')
except UnicodeDecodeError:
textContent = fileContent.decode('latin-1')
result = self.service._neutralizeText(textContent, 'text')
neutralizedFilename = f"neutralized_{fileInfo['name']}"
uploadResult = await connector.upload_file(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8'))
if 'error' in uploadResult:
return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"}
return {
'success': True,
'original_name': fileInfo['name'],
'neutralized_name': neutralizedFilename,
'attributes_count': len(result.get('attributes', [])),
}
except Exception as e:
return {'error': f"Error processing file {fileInfo['name']}: {str(e)}"}
tasks = [ _processSingle(f) for f in textFiles ]
results = await asyncio.gather(*tasks, return_exceptions=True)
for i, r in enumerate(results):
if isinstance(r, Exception):
errors.append(f"Exception processing file {textFiles[i]['name']}: {str(r)}")
elif isinstance(r, dict) and 'error' in r:
errors.append(r['error'])
elif isinstance(r, dict) and r.get('success'):
processed.append({
'original_name': r['original_name'],
'neutralized_name': r['neutralized_name'],
'attributes_count': r['attributes_count'],
})
else:
errors.append(f"Unknown result processing file {textFiles[i]['name']}: {r}")
return {
'success': len(processed) > 0,
'message': f"Processed {len(processed)} files successfully",
'processed_files': len(processed),
'files': processed,
'errors': errors,
}
except Exception as e:
logger.error(f"Error in async SharePoint processing: {str(e)}")
return {'success': False, 'message': f'Error in async SharePoint processing: {str(e)}', 'processed_files': 0, 'errors': [str(e)]}
def _parseSharepointPath(self, path: str) -> tuple[str, str]:
try:
if not path.startswith('https://'):
return None, None
if '?' in path:
path = path.split('?')[0]
if '/sites/' not in path:
return None, None
parts = path.split('/sites/', 1)
if len(parts) != 2:
return None, None
domain = parts[0].replace('https://', '')
siteName = parts[1].split('/')[0]
siteUrl = f"https://{domain}/sites/{siteName}"
folderParts = parts[1].split('/')[1:]
from urllib.parse import unquote
folderPath = unquote('/'.join(folderParts) if folderParts else '')
return siteUrl, folderPath
except Exception:
logger.error(f"Error parsing SharePoint path '{path}'")
return None, None