Refactor full workflow engine 3.0
This commit is contained in:
parent
1019cb7a65
commit
472353fea0
40 changed files with 2605 additions and 3235 deletions
49
app.py
49
app.py
|
|
@ -4,7 +4,7 @@ os.environ["NUMEXPR_MAX_THREADS"] = "12"
|
|||
from fastapi import FastAPI, HTTPException, Depends, Body, status, Response
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from contextlib import asynccontextmanager
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
|
||||
import logging
|
||||
from logging.handlers import RotatingFileHandler
|
||||
|
|
@ -12,8 +12,7 @@ from datetime import timedelta, datetime
|
|||
import pathlib
|
||||
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from modules.shared.eventManagement import eventManager
|
||||
|
||||
|
||||
class DailyRotatingFileHandler(RotatingFileHandler):
|
||||
|
|
@ -202,46 +201,15 @@ instanceLabel = APP_CONFIG.get("APP_ENV_LABEL")
|
|||
# Define lifespan context manager for application startup/shutdown events
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
# Startup logic
|
||||
logger.info("Application is starting up")
|
||||
|
||||
# Setup APScheduler for JIRA sync
|
||||
scheduler = AsyncIOScheduler(timezone=ZoneInfo("Europe/Zurich"))
|
||||
try:
|
||||
from modules.features.syncDelta.mainSyncDelta import perform_sync_jira_delta_group
|
||||
# Schedule sync every 20 minutes (at minutes 00, 20, 40)
|
||||
scheduler.add_job(
|
||||
perform_sync_jira_delta_group,
|
||||
CronTrigger(minute="0,20,40"),
|
||||
id="jira_delta_group_sync",
|
||||
replace_existing=True,
|
||||
coalesce=True,
|
||||
max_instances=1,
|
||||
misfire_grace_time=1800,
|
||||
)
|
||||
scheduler.start()
|
||||
logger.info("APScheduler started (jira_delta_group_sync every 20 minutes at 00, 20, 40)")
|
||||
|
||||
# Run initial sync on startup (non-blocking failure)
|
||||
try:
|
||||
logger.info("Running initial JIRA sync on app startup...")
|
||||
await perform_sync_jira_delta_group()
|
||||
logger.info("Initial JIRA sync completed successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Initial JIRA sync failed: {str(e)}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize scheduler or JIRA sync: {str(e)}")
|
||||
|
||||
eventManager.start()
|
||||
yield
|
||||
|
||||
# Shutdown logic
|
||||
eventManager.stop()
|
||||
logger.info("Application has been shut down")
|
||||
try:
|
||||
if 'scheduler' in locals() and scheduler.running:
|
||||
scheduler.shutdown(wait=False)
|
||||
logger.info("APScheduler stopped")
|
||||
except Exception as e:
|
||||
logger.error(f"Error shutting down scheduler: {str(e)}")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# START APP
|
||||
app = FastAPI(
|
||||
|
|
@ -250,7 +218,6 @@ app = FastAPI(
|
|||
lifespan=lifespan
|
||||
)
|
||||
|
||||
|
||||
# Parse CORS origins from environment variable
|
||||
def get_allowed_origins():
|
||||
origins_str = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080")
|
||||
|
|
|
|||
|
|
@ -24,8 +24,11 @@ async def chatStart(interfaceChat, currentUser: User, userInput: UserInputReques
|
|||
"""
|
||||
try:
|
||||
from modules.workflows.workflowManager import WorkflowManager
|
||||
workflowManager = WorkflowManager(interfaceChat, currentUser)
|
||||
return await workflowManager.workflowStart(userInput, workflowId, workflowMode)
|
||||
from modules.services import getInterface as getServices
|
||||
services = getServices(currentUser, None)
|
||||
workflowManager = WorkflowManager(services)
|
||||
workflow = await workflowManager.workflowStart(userInput, workflowId, workflowMode)
|
||||
return workflow
|
||||
except Exception as e:
|
||||
logger.error(f"Error starting chat: {str(e)}")
|
||||
raise
|
||||
|
|
@ -34,7 +37,9 @@ async def chatStop(interfaceChat, currentUser: User, workflowId: str) -> ChatWor
|
|||
"""Stops a running chat."""
|
||||
try:
|
||||
from modules.workflows.workflowManager import WorkflowManager
|
||||
workflowManager = WorkflowManager(interfaceChat, currentUser)
|
||||
from modules.services import getInterface as getServices
|
||||
services = getServices(currentUser, None)
|
||||
workflowManager = WorkflowManager(services)
|
||||
return await workflowManager.workflowStop(workflowId)
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping chat: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -1,587 +1,285 @@
|
|||
"""
|
||||
Data Neutralization Service
|
||||
Handles file processing for data neutralization including SharePoint integration
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import mimetypes
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from modules.interfaces.interfaceAppObjects import getInterface
|
||||
from modules.interfaces.interfaceAppModel import User, DataNeutraliserConfig, DataNeutralizerAttributes
|
||||
from modules.services.serviceNeutralization.neutralizer import DataAnonymizer
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.interfaces.interfaceAppModel import User
|
||||
from modules.services.serviceNeutralization.mainNeutralization import NeutralizationService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class NeutralizationService:
|
||||
"""Service for handling data neutralization operations"""
|
||||
|
||||
def __init__(self, current_user: User):
|
||||
"""Initialize the service with user context"""
|
||||
self.current_user = current_user
|
||||
self.app_interface = getInterface(current_user)
|
||||
|
||||
def get_config(self) -> Optional[DataNeutraliserConfig]:
|
||||
"""Get the neutralization configuration for the current user's mandate"""
|
||||
return self.app_interface.getNeutralizationConfig()
|
||||
|
||||
def save_config(self, config_data: Dict[str, Any]) -> DataNeutraliserConfig:
|
||||
"""Save or update the neutralization configuration"""
|
||||
return self.app_interface.createOrUpdateNeutralizationConfig(config_data)
|
||||
|
||||
def neutralize_text(self, text: str, file_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Neutralize text content and return results with attribute mappings"""
|
||||
return self.app_interface.neutralizeText(text, file_id)
|
||||
|
||||
def get_attributes(self, file_id: Optional[str] = None) -> List[DataNeutralizerAttributes]:
|
||||
"""Get neutralization attributes, optionally filtered by file ID"""
|
||||
return self.app_interface.getNeutralizationAttributes(file_id)
|
||||
|
||||
def resolve_text(self, text: str) -> str:
|
||||
"""Resolve UIDs in neutralized text back to original text"""
|
||||
return self.app_interface.resolveNeutralizedText(text)
|
||||
|
||||
async def process_sharepoint_files(self, source_path: str, target_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Process files from SharePoint source path, neutralize them, and store in target path
|
||||
|
||||
Args:
|
||||
source_path: SharePoint path to read files from
|
||||
target_path: SharePoint path to store neutralized files
|
||||
|
||||
Returns:
|
||||
Dictionary with processing results
|
||||
"""
|
||||
|
||||
class NeutralizationPlayground:
|
||||
"""Feature/UI wrapper around NeutralizationService for playground & routes."""
|
||||
|
||||
def __init__(self, currentUser: User):
|
||||
self.currentUser = currentUser
|
||||
self.service = NeutralizationService(currentUser)
|
||||
|
||||
def processText(self, text: str) -> Dict[str, Any]:
|
||||
return self.service.processText(text)
|
||||
|
||||
def processFiles(self, fileIds: List[str]) -> Dict[str, Any]:
|
||||
results: List[Dict[str, Any]] = []
|
||||
errors: List[str] = []
|
||||
for fileId in fileIds:
|
||||
try:
|
||||
res = self.service.processFile(fileId)
|
||||
results.append({
|
||||
'file_id': fileId,
|
||||
'neutralized_file_name': res.get('neutralized_file_name'),
|
||||
'attributes_count': len(res.get('attributes', []))
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file {fileId}: {str(e)}")
|
||||
errors.append(f"{fileId}: {str(e)}")
|
||||
return {
|
||||
'success': len(errors) == 0,
|
||||
'total_files': len(fileIds),
|
||||
'successful_files': len(results),
|
||||
'failed_files': len(errors),
|
||||
'results': results,
|
||||
'errors': errors,
|
||||
}
|
||||
|
||||
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
|
||||
from modules.features.neutralizePlayground.sharepoint import SharepointProcessor
|
||||
processor = SharepointProcessor(self.currentUser, self.service)
|
||||
return await processor.processSharepointFiles(sourcePath, targetPath)
|
||||
|
||||
# Cleanup attributes
|
||||
def cleanAttributes(self, fileId: str) -> bool:
|
||||
if not self.service.app_interface:
|
||||
return False
|
||||
return self.service.app_interface.deleteNeutralizationAttributes(fileId)
|
||||
|
||||
# Stats
|
||||
def getStats(self) -> Dict[str, Any]:
|
||||
try:
|
||||
logger.info(f"Processing SharePoint files from {source_path} to {target_path}")
|
||||
|
||||
# Get user's SharePoint connection that matches the source path
|
||||
sharepoint_connection = await self._get_sharepoint_connection(source_path)
|
||||
if not sharepoint_connection:
|
||||
allAttributes = self.service._getAttributes()
|
||||
patternCounts: Dict[str, int] = {}
|
||||
for attr in allAttributes:
|
||||
patternType = attr.patternType
|
||||
patternCounts[patternType] = patternCounts.get(patternType, 0) + 1
|
||||
uniqueFiles = set(attr.fileId for attr in allAttributes if attr.fileId)
|
||||
return {
|
||||
'total_attributes': len(allAttributes),
|
||||
'unique_files': len(uniqueFiles),
|
||||
'pattern_counts': patternCounts,
|
||||
'mandate_id': self.currentUser.mandateId if self.currentUser else None,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting stats: {str(e)}")
|
||||
return {
|
||||
'total_attributes': 0,
|
||||
'unique_files': 0,
|
||||
'pattern_counts': {},
|
||||
'error': str(e),
|
||||
}
|
||||
|
||||
|
||||
# Internal SharePoint helper module separated to keep feature logic tidy
|
||||
class SharepointProcessor:
|
||||
def __init__(self, currentUser: User, service: NeutralizationService):
|
||||
self.currentUser = currentUser
|
||||
self.service = service
|
||||
|
||||
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
|
||||
try:
|
||||
logger.info(f"Processing SharePoint files from {sourcePath} to {targetPath}")
|
||||
connection = await self._getSharepointConnection(sourcePath)
|
||||
if not connection:
|
||||
return {
|
||||
"success": False,
|
||||
"message": "No SharePoint connection found for user",
|
||||
"processed_files": 0,
|
||||
"errors": ["No SharePoint connection found"]
|
||||
'success': False,
|
||||
'message': 'No SharePoint connection found for user',
|
||||
'processed_files': 0,
|
||||
'errors': ['No SharePoint connection found'],
|
||||
}
|
||||
|
||||
logger.info(f"Using SharePoint connection: {sharepoint_connection.get('id')} for path: {source_path}")
|
||||
|
||||
# Get SharePoint access token
|
||||
sharepoint_token = self.app_interface.getConnectionToken(sharepoint_connection["id"])
|
||||
if not sharepoint_token:
|
||||
from modules.security.tokenManager import TokenManager
|
||||
token = TokenManager().getFreshToken(self.service.app_interface, connection['id'])
|
||||
if not token:
|
||||
return {
|
||||
"success": False,
|
||||
"message": "No SharePoint access token found",
|
||||
"processed_files": 0,
|
||||
"errors": ["No SharePoint access token found"]
|
||||
'success': False,
|
||||
'message': 'No SharePoint access token found',
|
||||
'processed_files': 0,
|
||||
'errors': ['No SharePoint access token found'],
|
||||
}
|
||||
|
||||
# Process files asynchronously
|
||||
return await self._process_sharepoint_files_async(
|
||||
source_path, target_path, sharepoint_token.tokenAccess
|
||||
)
|
||||
|
||||
return await self._processSharepointFilesAsync(sourcePath, targetPath, token.tokenAccess)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing SharePoint files: {str(e)}")
|
||||
return {
|
||||
"success": False,
|
||||
"message": f"Error processing SharePoint files: {str(e)}",
|
||||
"processed_files": 0,
|
||||
"errors": [str(e)]
|
||||
'success': False,
|
||||
'message': f'Error processing SharePoint files: {str(e)}',
|
||||
'processed_files': 0,
|
||||
'errors': [str(e)],
|
||||
}
|
||||
|
||||
async def _get_sharepoint_connection(self, sharepoint_path: str = None):
|
||||
"""Get user's SharePoint connection that matches the given path"""
|
||||
|
||||
async def _getSharepointConnection(self, sharepointPath: str = None):
|
||||
try:
|
||||
# Get all user connections
|
||||
from modules.interfaces.interfaceAppModel import UserConnection
|
||||
connections = self.app_interface.db.getRecordset(
|
||||
connections = self.service.app_interface.db.getRecordset(
|
||||
UserConnection,
|
||||
recordFilter={"userId": self.app_interface.userId}
|
||||
recordFilter={"userId": self.service.app_interface.userId}
|
||||
)
|
||||
|
||||
# Find all Microsoft connections
|
||||
msft_connections = [conn for conn in connections if conn.get("authority") == "msft"]
|
||||
|
||||
if not msft_connections:
|
||||
logger.warning("No Microsoft connections found for user")
|
||||
msftConnections = [c for c in connections if c.get('authority') == 'msft']
|
||||
if not msftConnections:
|
||||
logger.warning('No Microsoft connections found for user')
|
||||
return None
|
||||
|
||||
if len(msft_connections) == 1:
|
||||
logger.info(f"Found single Microsoft connection: {msft_connections[0].get('id')}")
|
||||
return msft_connections[0]
|
||||
|
||||
# If multiple connections and we have a path, try to match
|
||||
if sharepoint_path:
|
||||
return await self._match_connection_to_path(msft_connections, sharepoint_path)
|
||||
|
||||
# If no path provided, return the first one
|
||||
logger.info(f"Multiple Microsoft connections found, using first one: {msft_connections[0].get('id')}")
|
||||
return msft_connections[0]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting SharePoint connection: {str(e)}")
|
||||
if len(msftConnections) == 1:
|
||||
logger.info(f"Found single Microsoft connection: {msftConnections[0].get('id')}")
|
||||
return msftConnections[0]
|
||||
if sharepointPath:
|
||||
return await self._matchConnectionToPath(msftConnections, sharepointPath)
|
||||
logger.info(f"Multiple Microsoft connections found, using first one: {msftConnections[0].get('id')}")
|
||||
return msftConnections[0]
|
||||
except Exception:
|
||||
logger.error('Error getting SharePoint connection')
|
||||
return None
|
||||
|
||||
async def _match_connection_to_path(self, connections: list, sharepoint_path: str):
|
||||
"""Match a connection to the SharePoint path by testing access"""
|
||||
|
||||
async def _matchConnectionToPath(self, connections: list, sharepointPath: str):
|
||||
try:
|
||||
# Extract domain from the path
|
||||
from urllib.parse import urlparse
|
||||
parsed_url = urlparse(sharepoint_path)
|
||||
target_domain = parsed_url.netloc.lower()
|
||||
|
||||
logger.info(f"Looking for connection matching domain: {target_domain}")
|
||||
|
||||
# Try each connection to see which one can access the site
|
||||
targetDomain = urlparse(sharepointPath).netloc.lower()
|
||||
logger.info(f"Looking for connection matching domain: {targetDomain}")
|
||||
from modules.security.tokenManager import TokenManager
|
||||
for connection in connections:
|
||||
try:
|
||||
# Get token for this connection
|
||||
token = self.app_interface.getConnectionToken(connection["id"])
|
||||
token = TokenManager().getFreshToken(self.service.app_interface, connection['id'])
|
||||
if not token:
|
||||
continue
|
||||
|
||||
# Test if this connection can access the SharePoint site
|
||||
if await self._test_sharepoint_access(token.tokenAccess, sharepoint_path):
|
||||
logger.info(f"Found matching connection for domain {target_domain}: {connection.get('id')}")
|
||||
if await self._testSharepointAccess(token.tokenAccess, sharepointPath):
|
||||
logger.info(f"Found matching connection for domain {targetDomain}: {connection.get('id')}")
|
||||
return connection
|
||||
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# If no specific match found, return the first connection
|
||||
logger.warning(f"No specific connection match found for {target_domain}, using first available")
|
||||
logger.warning(f"No specific connection match found for {targetDomain}, using first available")
|
||||
return connections[0]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error matching connection to path: {str(e)}")
|
||||
except Exception:
|
||||
logger.error('Error matching connection to path')
|
||||
return connections[0] if connections else None
|
||||
|
||||
async def _test_sharepoint_access(self, access_token: str, sharepoint_path: str) -> bool:
|
||||
"""Test if the access token can access the given SharePoint path"""
|
||||
|
||||
async def _testSharepointAccess(self, accessToken: str, sharepointPath: str) -> bool:
|
||||
try:
|
||||
return await self._test_sharepoint_access_async(access_token, sharepoint_path)
|
||||
except Exception as e:
|
||||
return await self._testSharepointAccessAsync(accessToken, sharepointPath)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
async def _test_sharepoint_access_async(self, access_token: str, sharepoint_path: str) -> bool:
|
||||
"""Async test for SharePoint access"""
|
||||
|
||||
async def _testSharepointAccessAsync(self, accessToken: str, sharepointPath: str) -> bool:
|
||||
try:
|
||||
from modules.connectors.connectorSharepoint import ConnectorSharepoint
|
||||
|
||||
connector = ConnectorSharepoint(access_token=access_token)
|
||||
|
||||
# Parse the path to get site URL
|
||||
site_url, _ = self._parse_sharepoint_path(sharepoint_path)
|
||||
if not site_url:
|
||||
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
|
||||
connector = SharepointService(access_token=accessToken)
|
||||
siteUrl, _ = self._parseSharepointPath(sharepointPath)
|
||||
if not siteUrl:
|
||||
return False
|
||||
|
||||
# Try to find the site
|
||||
site_info = await connector.find_site_by_web_url(site_url)
|
||||
return site_info is not None
|
||||
|
||||
except Exception as e:
|
||||
siteInfo = await connector.find_site_by_web_url(siteUrl)
|
||||
return siteInfo is not None
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
async def _process_sharepoint_files_async(self, source_path: str, target_path: str, access_token: str) -> Dict[str, Any]:
|
||||
"""Process SharePoint files asynchronously"""
|
||||
|
||||
async def _processSharepointFilesAsync(self, sourcePath: str, targetPath: str, accessToken: str) -> Dict[str, Any]:
|
||||
try:
|
||||
import asyncio
|
||||
from modules.connectors.connectorSharepoint import ConnectorSharepoint
|
||||
|
||||
# Initialize SharePoint connector
|
||||
connector = ConnectorSharepoint(access_token=access_token)
|
||||
|
||||
# Parse source and target paths to extract site and folder info
|
||||
source_site, source_folder = self._parse_sharepoint_path(source_path)
|
||||
target_site, target_folder = self._parse_sharepoint_path(target_path)
|
||||
|
||||
if not source_site or not target_site:
|
||||
return {
|
||||
"success": False,
|
||||
"message": "Invalid SharePoint path format",
|
||||
"processed_files": 0,
|
||||
"errors": ["Invalid SharePoint path format"]
|
||||
}
|
||||
|
||||
# Find source site
|
||||
source_site_info = await connector.find_site_by_web_url(source_site)
|
||||
if not source_site_info:
|
||||
return {
|
||||
"success": False,
|
||||
"message": f"Source site not found: {source_site}",
|
||||
"processed_files": 0,
|
||||
"errors": [f"Source site not found: {source_site}"]
|
||||
}
|
||||
|
||||
# Find target site
|
||||
target_site_info = await connector.find_site_by_web_url(target_site)
|
||||
if not target_site_info:
|
||||
return {
|
||||
"success": False,
|
||||
"message": f"Target site not found: {target_site}",
|
||||
"processed_files": 0,
|
||||
"errors": [f"Target site not found: {target_site}"]
|
||||
}
|
||||
|
||||
# List files in source folder
|
||||
logger.info(f"Listing files in folder: {source_folder} for site: {source_site_info['id']}")
|
||||
files = await connector.list_folder_contents(source_site_info["id"], source_folder)
|
||||
|
||||
# If no files found, try listing the root folder to see what's available
|
||||
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
|
||||
connector = SharepointService(access_token=accessToken)
|
||||
sourceSite, sourceFolder = self._parseSharepointPath(sourcePath)
|
||||
targetSite, targetFolder = self._parseSharepointPath(targetPath)
|
||||
if not sourceSite or not targetSite:
|
||||
return {'success': False, 'message': 'Invalid SharePoint path format', 'processed_files': 0, 'errors': ['Invalid SharePoint path format']}
|
||||
sourceSiteInfo = await connector.find_site_by_web_url(sourceSite)
|
||||
if not sourceSiteInfo:
|
||||
return {'success': False, 'message': f'Source site not found: {sourceSite}', 'processed_files': 0, 'errors': [f'Source site not found: {sourceSite}']}
|
||||
targetSiteInfo = await connector.find_site_by_web_url(targetSite)
|
||||
if not targetSiteInfo:
|
||||
return {'success': False, 'message': f'Target site not found: {targetSite}', 'processed_files': 0, 'errors': [f'Target site not found: {targetSite}']}
|
||||
logger.info(f"Listing files in folder: {sourceFolder} for site: {sourceSiteInfo['id']}")
|
||||
files = await connector.list_folder_contents(sourceSiteInfo['id'], sourceFolder)
|
||||
if not files:
|
||||
logger.warning(f"No files found in folder '{source_folder}', trying root folder")
|
||||
files = await connector.list_folder_contents(source_site_info["id"], "")
|
||||
|
||||
logger.warning(f"No files found in folder '{sourceFolder}', trying root folder")
|
||||
files = await connector.list_folder_contents(sourceSiteInfo['id'], '')
|
||||
if files:
|
||||
# List available folders for debugging
|
||||
folders = [f for f in files if f.get("type") == "folder"]
|
||||
folder_names = [f.get('name') for f in folders]
|
||||
logger.info(f"Available folders in root: {folder_names}")
|
||||
|
||||
# Format folder list for better UI display
|
||||
folder_list = ", ".join(folder_names) if folder_names else "None"
|
||||
|
||||
folders = [f for f in files if f.get('type') == 'folder']
|
||||
folderNames = [f.get('name') for f in folders]
|
||||
logger.info(f"Available folders in root: {folderNames}")
|
||||
folderList = ", ".join(folderNames) if folderNames else "None"
|
||||
return {
|
||||
"success": False,
|
||||
"message": f"Folder '{source_folder}' not found. Available folders in root: {folder_list}",
|
||||
"processed_files": 0,
|
||||
"errors": [f"Folder '{source_folder}' not found. Available folders: {folder_list}"],
|
||||
"available_folders": folder_names
|
||||
'success': False,
|
||||
'message': f"Folder '{sourceFolder}' not found. Available folders in root: {folderList}",
|
||||
'processed_files': 0,
|
||||
'errors': [f"Folder '{sourceFolder}' not found. Available folders: {folderList}"],
|
||||
'available_folders': folderNames,
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"message": f"No files found in source folder: {source_folder}",
|
||||
"processed_files": 0,
|
||||
"errors": [f"No files found in source folder: {source_folder}"]
|
||||
}
|
||||
|
||||
# Filter for text files only
|
||||
text_files = [f for f in files if f.get("type") == "file" and self._is_text_file(f.get("name", ""))]
|
||||
|
||||
if not text_files:
|
||||
return {
|
||||
"success": False,
|
||||
"message": "No text files found in source folder",
|
||||
"processed_files": 0,
|
||||
"errors": ["No text files found in source folder"]
|
||||
}
|
||||
|
||||
# Process files in parallel for better performance
|
||||
processed_files = []
|
||||
errors = []
|
||||
|
||||
# Create tasks for parallel processing
|
||||
async def process_single_file(file_info):
|
||||
"""Process a single file - download, neutralize, upload"""
|
||||
return {'success': False, 'message': f'No files found in source folder: {sourceFolder}', 'processed_files': 0, 'errors': [f'No files found in source folder: {sourceFolder}']}
|
||||
|
||||
textFiles = [f for f in files if f.get('type') == 'file']
|
||||
processed: List[Dict[str, Any]] = []
|
||||
errors: List[str] = []
|
||||
|
||||
async def _processSingle(fileInfo: Dict[str, Any]):
|
||||
try:
|
||||
# Download file
|
||||
file_content = await connector.download_file(source_site_info["id"], file_info["id"])
|
||||
if not file_content:
|
||||
return {"error": f"Failed to download file: {file_info['name']}"}
|
||||
|
||||
# Convert to text
|
||||
fileContent = await connector.download_file(sourceSiteInfo['id'], fileInfo['id'])
|
||||
if not fileContent:
|
||||
return {'error': f"Failed to download file: {fileInfo['name']}"}
|
||||
try:
|
||||
text_content = file_content.decode('utf-8')
|
||||
textContent = fileContent.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
text_content = file_content.decode('latin-1')
|
||||
|
||||
# Neutralize the text
|
||||
neutralization_result = self.app_interface.neutralizeText(text_content, file_info["id"])
|
||||
|
||||
# Create neutralized filename
|
||||
neutralized_filename = f"neutralized_{file_info['name']}"
|
||||
|
||||
# Upload neutralized file
|
||||
neutralized_content = neutralization_result["neutralized_text"].encode('utf-8')
|
||||
upload_result = await connector.upload_file(
|
||||
target_site_info["id"],
|
||||
target_folder,
|
||||
neutralized_filename,
|
||||
neutralized_content
|
||||
)
|
||||
|
||||
if "error" in upload_result:
|
||||
return {"error": f"Failed to upload neutralized file: {neutralized_filename} - {upload_result['error']}"}
|
||||
else:
|
||||
return {
|
||||
"success": True,
|
||||
"original_name": file_info["name"],
|
||||
"neutralized_name": neutralized_filename,
|
||||
"attributes_count": len(neutralization_result.get("attributes", []))
|
||||
}
|
||||
|
||||
textContent = fileContent.decode('latin-1')
|
||||
result = self.service._neutralizeText(textContent, 'text')
|
||||
neutralizedFilename = f"neutralized_{fileInfo['name']}"
|
||||
uploadResult = await connector.upload_file(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8'))
|
||||
if 'error' in uploadResult:
|
||||
return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"}
|
||||
return {
|
||||
'success': True,
|
||||
'original_name': fileInfo['name'],
|
||||
'neutralized_name': neutralizedFilename,
|
||||
'attributes_count': len(result.get('attributes', [])),
|
||||
}
|
||||
except Exception as e:
|
||||
error_msg = f"Error processing file {file_info['name']}: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
return {"error": error_msg}
|
||||
|
||||
# Process all files in parallel
|
||||
logger.info(f"Processing {len(text_files)} files in parallel...")
|
||||
tasks = [process_single_file(file_info) for file_info in text_files]
|
||||
return {'error': f"Error processing file {fileInfo['name']}: {str(e)}"}
|
||||
|
||||
tasks = [ _processSingle(f) for f in textFiles ]
|
||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
# Process results
|
||||
for i, result in enumerate(results):
|
||||
if isinstance(result, Exception):
|
||||
error_msg = f"Exception processing file {text_files[i]['name']}: {str(result)}"
|
||||
errors.append(error_msg)
|
||||
logger.error(error_msg)
|
||||
elif isinstance(result, dict) and "error" in result:
|
||||
errors.append(result["error"])
|
||||
elif isinstance(result, dict) and result.get("success"):
|
||||
processed_files.append({
|
||||
"original_name": result["original_name"],
|
||||
"neutralized_name": result["neutralized_name"],
|
||||
"attributes_count": result["attributes_count"]
|
||||
for i, r in enumerate(results):
|
||||
if isinstance(r, Exception):
|
||||
errors.append(f"Exception processing file {textFiles[i]['name']}: {str(r)}")
|
||||
elif isinstance(r, dict) and 'error' in r:
|
||||
errors.append(r['error'])
|
||||
elif isinstance(r, dict) and r.get('success'):
|
||||
processed.append({
|
||||
'original_name': r['original_name'],
|
||||
'neutralized_name': r['neutralized_name'],
|
||||
'attributes_count': r['attributes_count'],
|
||||
})
|
||||
logger.info(f"Successfully processed file: {result['original_name']} -> {result['neutralized_name']}")
|
||||
else:
|
||||
error_msg = f"Unknown result processing file {text_files[i]['name']}: {result}"
|
||||
errors.append(error_msg)
|
||||
logger.error(error_msg)
|
||||
|
||||
errors.append(f"Unknown result processing file {textFiles[i]['name']}: {r}")
|
||||
return {
|
||||
"success": len(processed_files) > 0,
|
||||
"message": f"Processed {len(processed_files)} files successfully",
|
||||
"processed_files": len(processed_files),
|
||||
"files": processed_files,
|
||||
"errors": errors
|
||||
'success': len(processed) > 0,
|
||||
'message': f"Processed {len(processed)} files successfully",
|
||||
'processed_files': len(processed),
|
||||
'files': processed,
|
||||
'errors': errors,
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in async SharePoint processing: {str(e)}")
|
||||
return {
|
||||
"success": False,
|
||||
"message": f"Error in async SharePoint processing: {str(e)}",
|
||||
"processed_files": 0,
|
||||
"errors": [str(e)]
|
||||
}
|
||||
|
||||
def _parse_sharepoint_path(self, path: str) -> tuple[str, str]:
|
||||
"""Parse SharePoint path to extract site URL and folder path"""
|
||||
return {'success': False, 'message': f'Error in async SharePoint processing: {str(e)}', 'processed_files': 0, 'errors': [str(e)]}
|
||||
|
||||
def _parseSharepointPath(self, path: str) -> tuple[str, str]:
|
||||
try:
|
||||
# Expected format: https://domain.sharepoint.com/sites/sitename/folder/path
|
||||
if not path.startswith("https://"):
|
||||
if not path.startswith('https://'):
|
||||
return None, None
|
||||
|
||||
# Remove query parameters
|
||||
if "?" in path:
|
||||
path = path.split("?")[0]
|
||||
|
||||
# Split by /sites/
|
||||
if "/sites/" not in path:
|
||||
if '?' in path:
|
||||
path = path.split('?')[0]
|
||||
if '/sites/' not in path:
|
||||
return None, None
|
||||
|
||||
parts = path.split("/sites/", 1)
|
||||
parts = path.split('/sites/', 1)
|
||||
if len(parts) != 2:
|
||||
return None, None
|
||||
|
||||
# Extract domain and site name
|
||||
domain = parts[0].replace("https://", "")
|
||||
site_name = parts[1].split("/")[0]
|
||||
|
||||
# Create proper site URL for Graph API
|
||||
site_url = f"https://{domain}/sites/{site_name}"
|
||||
|
||||
# Extract folder path (everything after the site name)
|
||||
folder_parts = parts[1].split("/")[1:]
|
||||
folder_path = "/".join(folder_parts) if folder_parts else ""
|
||||
|
||||
# URL decode the folder path
|
||||
domain = parts[0].replace('https://', '')
|
||||
siteName = parts[1].split('/')[0]
|
||||
siteUrl = f"https://{domain}/sites/{siteName}"
|
||||
folderParts = parts[1].split('/')[1:]
|
||||
from urllib.parse import unquote
|
||||
folder_path = unquote(folder_path)
|
||||
|
||||
|
||||
return site_url, folder_path
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing SharePoint path '{path}': {str(e)}")
|
||||
folderPath = unquote('/'.join(folderParts) if folderParts else '')
|
||||
return siteUrl, folderPath
|
||||
except Exception:
|
||||
logger.error(f"Error parsing SharePoint path '{path}'")
|
||||
return None, None
|
||||
|
||||
def _is_text_file(self, filename: str) -> bool:
|
||||
"""Check if file is a text file based on extension"""
|
||||
text_extensions = [
|
||||
'.txt', '.csv', '.json', '.xml', '.md', '.log',
|
||||
'.doc', '.docx', '.rtf', '.odt', # Document formats
|
||||
'.html', '.htm', '.css', '.js', '.ts', '.py', '.java', '.cpp', '.c', '.h', # Code files
|
||||
'.ini', '.cfg', '.conf', '.properties', # Config files
|
||||
'.sql', '.yaml', '.yml', '.toml', # Data/config files
|
||||
'.ps1', '.bat', '.sh', '.bash' # Script files
|
||||
]
|
||||
return any(filename.lower().endswith(ext) for ext in text_extensions)
|
||||
|
||||
def process_file_content(self, file_content: bytes, file_name: str, mime_type: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Process file content for neutralization
|
||||
|
||||
Args:
|
||||
file_content: Binary file content
|
||||
file_name: Name of the file
|
||||
mime_type: MIME type of the file
|
||||
|
||||
Returns:
|
||||
Dictionary with neutralization results
|
||||
"""
|
||||
try:
|
||||
# Determine content type based on MIME type
|
||||
content_type = self._get_content_type_from_mime(mime_type)
|
||||
|
||||
# Decode content to text
|
||||
try:
|
||||
text_content = file_content.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
# Try with different encodings
|
||||
for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
|
||||
try:
|
||||
text_content = file_content.decode(encoding)
|
||||
break
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
else:
|
||||
raise ValueError("Unable to decode file content")
|
||||
|
||||
# Generate a temporary file ID for tracking
|
||||
temp_file_id = str(uuid.uuid4())
|
||||
|
||||
# Neutralize the content
|
||||
neutralization_result = self.neutralize_text(text_content, temp_file_id)
|
||||
|
||||
# Encode the neutralized content back to bytes
|
||||
neutralized_content = neutralization_result["neutralized_text"].encode('utf-8')
|
||||
|
||||
# Generate neutralized file name
|
||||
neutralized_file_name = f"neutralized_{file_name}"
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"original_content": text_content,
|
||||
"neutralized_content": neutralization_result["neutralized_text"],
|
||||
"neutralized_file_name": neutralized_file_name,
|
||||
"attributes": neutralization_result["attributes"],
|
||||
"mapping": neutralization_result["mapping"],
|
||||
"file_id": temp_file_id
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file content: {str(e)}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"original_content": None,
|
||||
"neutralized_content": None
|
||||
}
|
||||
|
||||
def _get_content_type_from_mime(self, mime_type: str) -> str:
|
||||
"""Determine content type from MIME type for neutralization processing"""
|
||||
if mime_type.startswith('text/'):
|
||||
return 'text'
|
||||
elif mime_type in ['application/json', 'application/xml', 'text/xml']:
|
||||
return 'json' if 'json' in mime_type else 'xml'
|
||||
elif mime_type in ['text/csv', 'application/csv']:
|
||||
return 'csv'
|
||||
else:
|
||||
return 'text' # Default to text processing
|
||||
|
||||
def batch_neutralize_files(self, files_data: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Process multiple files for neutralization
|
||||
|
||||
Args:
|
||||
files_data: List of dictionaries containing file information
|
||||
Each dict should have: content, name, mime_type
|
||||
|
||||
Returns:
|
||||
Dictionary with batch processing results
|
||||
"""
|
||||
try:
|
||||
results = []
|
||||
total_files = len(files_data)
|
||||
successful_files = 0
|
||||
errors = []
|
||||
|
||||
for file_data in files_data:
|
||||
try:
|
||||
result = self.process_file_content(
|
||||
file_data['content'],
|
||||
file_data['name'],
|
||||
file_data['mime_type']
|
||||
)
|
||||
|
||||
if result['success']:
|
||||
successful_files += 1
|
||||
results.append({
|
||||
'file_name': file_data['name'],
|
||||
'neutralized_file_name': result['neutralized_file_name'],
|
||||
'file_id': result['file_id'],
|
||||
'attributes_count': len(result['attributes'])
|
||||
})
|
||||
else:
|
||||
errors.append(f"Failed to process {file_data['name']}: {result['error']}")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error processing {file_data['name']}: {str(e)}"
|
||||
errors.append(error_msg)
|
||||
logger.error(error_msg)
|
||||
|
||||
return {
|
||||
"success": len(errors) == 0,
|
||||
"total_files": total_files,
|
||||
"successful_files": successful_files,
|
||||
"failed_files": len(errors),
|
||||
"results": results,
|
||||
"errors": errors
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in batch neutralization: {str(e)}")
|
||||
return {
|
||||
"success": False,
|
||||
"total_files": len(files_data),
|
||||
"successful_files": 0,
|
||||
"failed_files": len(files_data),
|
||||
"results": [],
|
||||
"errors": [str(e)]
|
||||
}
|
||||
|
||||
def cleanup_file_attributes(self, file_id: str) -> bool:
|
||||
"""Clean up neutralization attributes for a specific file"""
|
||||
return self.app_interface.deleteNeutralizationAttributes(file_id)
|
||||
|
||||
def get_processing_stats(self) -> Dict[str, Any]:
|
||||
"""Get statistics about neutralization processing"""
|
||||
try:
|
||||
# Get all attributes for the current mandate
|
||||
all_attributes = self.get_attributes()
|
||||
|
||||
# Group by pattern type
|
||||
pattern_counts = {}
|
||||
for attr in all_attributes:
|
||||
pattern_type = attr.patternType
|
||||
pattern_counts[pattern_type] = pattern_counts.get(pattern_type, 0) + 1
|
||||
|
||||
# Get unique files
|
||||
unique_files = set(attr.fileId for attr in all_attributes if attr.fileId)
|
||||
|
||||
return {
|
||||
"total_attributes": len(all_attributes),
|
||||
"unique_files": len(unique_files),
|
||||
"pattern_counts": pattern_counts,
|
||||
"mandate_id": self.current_user.mandateId
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting processing stats: {str(e)}")
|
||||
return {
|
||||
"total_attributes": 0,
|
||||
"unique_files": 0,
|
||||
"pattern_counts": {},
|
||||
"error": str(e)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import csv
|
|||
import io
|
||||
from datetime import datetime, UTC
|
||||
from typing import Dict, Any, List, Optional
|
||||
from modules.connectors.connectorSharepoint import ConnectorSharepoint
|
||||
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
|
||||
from modules.connectors.connectorTicketJira import ConnectorTicketJira
|
||||
from modules.interfaces.interfaceAppObjects import getRootInterface
|
||||
from modules.interfaces.interfaceAppModel import UserInDB
|
||||
|
|
@ -232,6 +232,10 @@ class ManagerSyncDelta:
|
|||
self.jira_connector = None
|
||||
self.sharepoint_connector = None
|
||||
self.target_site = None
|
||||
# Initialize centralized services with root user
|
||||
from modules.services import getInterface as getServices
|
||||
root_user = self.root_interface.getUserByUsername("admin")
|
||||
self.services = getServices(root_user, None)
|
||||
|
||||
def get_sync_file_name(self) -> str:
|
||||
"""Get the appropriate sync file name based on the sync mode."""
|
||||
|
|
@ -294,8 +298,9 @@ class ManagerSyncDelta:
|
|||
|
||||
logger.info(f"Found SharePoint connection: {sharepoint_connection.id}")
|
||||
|
||||
# Get SharePoint token for this connection
|
||||
sharepoint_token = self.root_interface.getConnectionToken(sharepoint_connection.id)
|
||||
# Get fresh SharePoint token for this connection
|
||||
from modules.security.tokenManager import TokenManager
|
||||
sharepoint_token = TokenManager().getFreshToken(self.root_interface, sharepoint_connection.id)
|
||||
if not sharepoint_token:
|
||||
logger.error("No SharePoint token found for Delta Group user connection")
|
||||
return False
|
||||
|
|
@ -303,7 +308,7 @@ class ManagerSyncDelta:
|
|||
logger.info(f"Found SharePoint token: {sharepoint_token.id}")
|
||||
|
||||
# Initialize SharePoint connector with Graph API
|
||||
self.sharepoint_connector = ConnectorSharepoint(access_token=sharepoint_token.tokenAccess)
|
||||
self.sharepoint_connector = SharepointService(access_token=sharepoint_token.tokenAccess)
|
||||
|
||||
# Resolve the site by hostname + site path to get the real site ID
|
||||
logger.info(
|
||||
|
|
@ -552,3 +557,21 @@ async def perform_sync_jira_delta_group() -> bool:
|
|||
except Exception as e:
|
||||
logger.error(f"Error in perform_sync_jira_delta_group: {str(e)}")
|
||||
return False
|
||||
|
||||
# Register scheduled job on import using the shared event manager
|
||||
try:
|
||||
from modules.shared.eventManagement import eventManager
|
||||
|
||||
# Schedule sync every 20 minutes (at minutes 00, 20, 40)
|
||||
eventManager.register_cron(
|
||||
job_id="jira_delta_group_sync",
|
||||
func=perform_sync_jira_delta_group,
|
||||
cron_kwargs={"minute": "0,20,40"},
|
||||
replace_existing=True,
|
||||
coalesce=True,
|
||||
max_instances=1,
|
||||
misfire_grace_time=1800,
|
||||
)
|
||||
logger.info("Registered jira_delta_group_sync via EventManagement (every 20 minutes)")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to register jira_delta_group_sync: {str(e)}")
|
||||
|
|
@ -1,527 +0,0 @@
|
|||
import logging
|
||||
from typing import Dict, Any, List, Union, Optional
|
||||
from modules.connectors.connectorAiOpenai import AiOpenai, ContextLengthExceededException
|
||||
from modules.connectors.connectorAiAnthropic import AiAnthropic
|
||||
from modules.services.serviceDocument.documentExtraction import DocumentExtraction
|
||||
from modules.interfaces.interfaceChatModel import ChatDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# AI Model Registry with Performance Data
|
||||
AI_MODELS = {
|
||||
"openai_gpt4o": {
|
||||
"connector": "openai",
|
||||
"max_tokens": 128000,
|
||||
"cost_per_1k_tokens": 0.03, # Input
|
||||
"cost_per_1k_tokens_output": 0.06, # Output
|
||||
"speed_rating": 8, # 1-10
|
||||
"quality_rating": 9, # 1-10
|
||||
"supports_images": True,
|
||||
"supports_documents": True,
|
||||
"context_length": 128000,
|
||||
"model_name": "gpt-4o"
|
||||
},
|
||||
"openai_gpt35": {
|
||||
"connector": "openai",
|
||||
"max_tokens": 16000,
|
||||
"cost_per_1k_tokens": 0.0015,
|
||||
"cost_per_1k_tokens_output": 0.002,
|
||||
"speed_rating": 9,
|
||||
"quality_rating": 7,
|
||||
"supports_images": False,
|
||||
"supports_documents": True,
|
||||
"context_length": 16000,
|
||||
"model_name": "gpt-3.5-turbo"
|
||||
},
|
||||
"anthropic_claude": {
|
||||
"connector": "anthropic",
|
||||
"max_tokens": 200000,
|
||||
"cost_per_1k_tokens": 0.015,
|
||||
"cost_per_1k_tokens_output": 0.075,
|
||||
"speed_rating": 7,
|
||||
"quality_rating": 10,
|
||||
"supports_images": True,
|
||||
"supports_documents": True,
|
||||
"context_length": 200000,
|
||||
"model_name": "claude-3-sonnet-20240229"
|
||||
}
|
||||
}
|
||||
|
||||
class AiCalls:
|
||||
"""Interface for AI service interactions with centralized call method"""
|
||||
|
||||
def __init__(self):
|
||||
self.openaiService = AiOpenai()
|
||||
self.anthropicService = AiAnthropic()
|
||||
self.document_extractor = DocumentExtraction()
|
||||
|
||||
async def callAi(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: List[ChatDocument] = None,
|
||||
operation_type: str = "general",
|
||||
priority: str = "balanced", # "speed", "quality", "cost", "balanced"
|
||||
compress_prompt: bool = True,
|
||||
compress_documents: bool = True,
|
||||
process_documents_individually: bool = False,
|
||||
max_cost: float = None,
|
||||
max_processing_time: int = None
|
||||
) -> str:
|
||||
"""
|
||||
Zentrale AI Call Methode mit intelligenter Modell-Auswahl und Content-Verarbeitung.
|
||||
|
||||
Args:
|
||||
prompt: Der Hauptprompt für die AI
|
||||
documents: Liste von Dokumenten zur Verarbeitung
|
||||
operation_type: Art der Operation ("general", "document_analysis", "image_analysis", etc.)
|
||||
priority: Priorität für Modell-Auswahl ("speed", "quality", "cost", "balanced")
|
||||
compress_prompt: Ob der Prompt komprimiert werden soll
|
||||
compress_documents: Ob Dokumente komprimiert werden sollen
|
||||
process_documents_individually: Ob Dokumente einzeln verarbeitet werden sollen
|
||||
max_cost: Maximale Kosten für den Call
|
||||
max_processing_time: Maximale Verarbeitungszeit in Sekunden
|
||||
|
||||
Returns:
|
||||
AI Response als String
|
||||
"""
|
||||
try:
|
||||
# 1. Dokumente verarbeiten falls vorhanden
|
||||
document_content = ""
|
||||
if documents:
|
||||
document_content = await self._process_documents_for_ai(
|
||||
documents,
|
||||
operation_type,
|
||||
compress_documents,
|
||||
process_documents_individually
|
||||
)
|
||||
|
||||
# 2. Bestes Modell basierend auf Priorität und Content auswählen
|
||||
selected_model = self._select_optimal_model(
|
||||
prompt,
|
||||
document_content,
|
||||
priority,
|
||||
operation_type,
|
||||
max_cost,
|
||||
max_processing_time
|
||||
)
|
||||
|
||||
# 3. Content für das gewählte Modell optimieren
|
||||
optimized_prompt, optimized_content = await self._optimize_content_for_model(
|
||||
prompt,
|
||||
document_content,
|
||||
selected_model,
|
||||
compress_prompt,
|
||||
compress_documents
|
||||
)
|
||||
|
||||
# 4. AI Call mit Failover ausführen
|
||||
return await self._execute_ai_call_with_failover(
|
||||
selected_model,
|
||||
optimized_prompt,
|
||||
optimized_content
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in centralized AI call: {str(e)}")
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
def _select_optimal_model(
|
||||
self,
|
||||
prompt: str,
|
||||
document_content: str,
|
||||
priority: str,
|
||||
operation_type: str,
|
||||
max_cost: float = None,
|
||||
max_processing_time: int = None
|
||||
) -> str:
|
||||
"""Wählt das optimale Modell basierend auf Priorität und Content aus"""
|
||||
|
||||
# Content-Größe berechnen
|
||||
total_content_size = len(prompt.encode('utf-8')) + len(document_content.encode('utf-8'))
|
||||
|
||||
# Verfügbare Modelle filtern
|
||||
available_models = {}
|
||||
for model_name, model_info in AI_MODELS.items():
|
||||
# Prüfe ob Modell für Content-Größe geeignet ist
|
||||
if total_content_size > model_info["context_length"] * 0.8: # 80% für Content
|
||||
continue
|
||||
|
||||
# Prüfe Kosten-Limit
|
||||
if max_cost:
|
||||
estimated_cost = self._estimate_cost(model_info, total_content_size)
|
||||
if estimated_cost > max_cost:
|
||||
continue
|
||||
|
||||
# Prüfe Operation-Type Kompatibilität
|
||||
if operation_type == "image_analysis" and not model_info["supports_images"]:
|
||||
continue
|
||||
|
||||
available_models[model_name] = model_info
|
||||
|
||||
if not available_models:
|
||||
# Fallback zum kleinsten Modell
|
||||
return "openai_gpt35"
|
||||
|
||||
# Modell basierend auf Priorität auswählen
|
||||
if priority == "speed":
|
||||
return max(available_models.keys(), key=lambda x: available_models[x]["speed_rating"])
|
||||
elif priority == "quality":
|
||||
return max(available_models.keys(), key=lambda x: available_models[x]["quality_rating"])
|
||||
elif priority == "cost":
|
||||
return min(available_models.keys(), key=lambda x: available_models[x]["cost_per_1k_tokens"])
|
||||
else: # balanced
|
||||
# Gewichtete Bewertung: 40% Qualität, 30% Geschwindigkeit, 30% Kosten
|
||||
def balanced_score(model_name):
|
||||
model_info = available_models[model_name]
|
||||
quality_score = model_info["quality_rating"] * 0.4
|
||||
speed_score = model_info["speed_rating"] * 0.3
|
||||
cost_score = (10 - (model_info["cost_per_1k_tokens"] * 1000)) * 0.3 # Niedrigere Kosten = höherer Score
|
||||
return quality_score + speed_score + cost_score
|
||||
|
||||
return max(available_models.keys(), key=balanced_score)
|
||||
|
||||
def _estimate_cost(self, model_info: Dict, content_size: int) -> float:
|
||||
"""Schätzt die Kosten für einen AI Call"""
|
||||
# Grobe Schätzung: 1 Token ≈ 4 Zeichen
|
||||
estimated_tokens = content_size / 4
|
||||
input_cost = (estimated_tokens / 1000) * model_info["cost_per_1k_tokens"]
|
||||
output_cost = (estimated_tokens / 1000) * model_info["cost_per_1k_tokens_output"] * 0.1 # 10% für Output
|
||||
return input_cost + output_cost
|
||||
|
||||
async def _process_documents_for_ai(
|
||||
self,
|
||||
documents: List[ChatDocument],
|
||||
operation_type: str,
|
||||
compress_documents: bool,
|
||||
process_individually: bool
|
||||
) -> str:
|
||||
"""Verarbeitet Dokumente für AI Call mit documentExtraction.py"""
|
||||
|
||||
if not documents:
|
||||
return ""
|
||||
|
||||
processed_contents = []
|
||||
|
||||
for doc in documents:
|
||||
try:
|
||||
# Extrahiere Content mit documentExtraction.py
|
||||
extracted = await self.document_extractor.processFileData(
|
||||
doc.fileData,
|
||||
doc.fileName,
|
||||
doc.mimeType,
|
||||
prompt=f"Extract relevant content for {operation_type}",
|
||||
documentId=doc.id,
|
||||
enableAI=True
|
||||
)
|
||||
|
||||
# Kombiniere alle Content-Items
|
||||
doc_content = []
|
||||
for content_item in extracted.contents:
|
||||
if content_item.data and content_item.data.strip():
|
||||
doc_content.append(content_item.data)
|
||||
|
||||
if doc_content:
|
||||
combined_doc_content = "\n\n".join(doc_content)
|
||||
|
||||
# Komprimiere falls gewünscht
|
||||
if compress_documents and len(combined_doc_content.encode('utf-8')) > 10000: # 10KB Limit
|
||||
combined_doc_content = await self._compress_content(
|
||||
combined_doc_content,
|
||||
10000,
|
||||
"document"
|
||||
)
|
||||
|
||||
processed_contents.append(f"Document: {doc.fileName}\n{combined_doc_content}")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing document {doc.fileName}: {str(e)}")
|
||||
processed_contents.append(f"Document: {doc.fileName}\n[Error processing document: {str(e)}]")
|
||||
|
||||
return "\n\n---\n\n".join(processed_contents)
|
||||
|
||||
async def _optimize_content_for_model(
|
||||
self,
|
||||
prompt: str,
|
||||
document_content: str,
|
||||
model_name: str,
|
||||
compress_prompt: bool,
|
||||
compress_documents: bool
|
||||
) -> tuple[str, str]:
|
||||
"""Optimiert Content für das gewählte Modell"""
|
||||
|
||||
model_info = AI_MODELS[model_name]
|
||||
max_content_size = model_info["context_length"] * 0.7 # 70% für Content
|
||||
|
||||
optimized_prompt = prompt
|
||||
optimized_content = document_content
|
||||
|
||||
# Prompt komprimieren falls gewünscht
|
||||
if compress_prompt and len(prompt.encode('utf-8')) > 2000: # 2KB Limit für Prompt
|
||||
optimized_prompt = await self._compress_content(prompt, 2000, "prompt")
|
||||
|
||||
# Dokument-Content komprimieren falls gewünscht
|
||||
if compress_documents and document_content:
|
||||
content_size = len(document_content.encode('utf-8'))
|
||||
if content_size > max_content_size:
|
||||
optimized_content = await self._compress_content(
|
||||
document_content,
|
||||
int(max_content_size),
|
||||
"document"
|
||||
)
|
||||
|
||||
return optimized_prompt, optimized_content
|
||||
|
||||
async def _compress_content(self, content: str, target_size: int, content_type: str) -> str:
|
||||
"""Komprimiert Content intelligent basierend auf Typ"""
|
||||
|
||||
if len(content.encode('utf-8')) <= target_size:
|
||||
return content
|
||||
|
||||
try:
|
||||
# Verwende AI für intelligente Kompression
|
||||
compression_prompt = f"""
|
||||
Komprimiere den folgenden {content_type} auf maximal {target_size} Zeichen,
|
||||
behalte aber alle wichtigen Informationen bei:
|
||||
|
||||
{content}
|
||||
|
||||
Gib nur den komprimierten Inhalt zurück, ohne zusätzliche Erklärungen.
|
||||
"""
|
||||
|
||||
# Verwende das schnellste verfügbare Modell für Kompression
|
||||
compression_model = "openai_gpt35"
|
||||
model_info = AI_MODELS[compression_model]
|
||||
connector = getattr(self, f"{model_info['connector']}Service")
|
||||
|
||||
messages = [{"role": "user", "content": compression_prompt}]
|
||||
|
||||
if model_info["connector"] == "openai":
|
||||
compressed = await connector.callAiBasic(messages)
|
||||
else:
|
||||
response = await connector.callAiBasic(messages)
|
||||
compressed = response["choices"][0]["message"]["content"]
|
||||
|
||||
return compressed
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"AI compression failed, using truncation: {str(e)}")
|
||||
# Fallback: Einfache Truncation
|
||||
return content[:target_size] + "... [truncated]"
|
||||
|
||||
async def _execute_ai_call_with_failover(
|
||||
self,
|
||||
model_name: str,
|
||||
prompt: str,
|
||||
document_content: str
|
||||
) -> str:
|
||||
"""Führt AI Call mit automatischem Failover aus"""
|
||||
|
||||
try:
|
||||
model_info = AI_MODELS[model_name]
|
||||
connector = getattr(self, f"{model_info['connector']}Service")
|
||||
|
||||
# Messages vorbereiten
|
||||
messages = []
|
||||
if document_content:
|
||||
messages.append({
|
||||
"role": "system",
|
||||
"content": f"Context from documents:\n{document_content}"
|
||||
})
|
||||
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
})
|
||||
|
||||
# AI Call ausführen
|
||||
if model_info["connector"] == "openai":
|
||||
return await connector.callAiBasic(messages)
|
||||
else: # anthropic
|
||||
response = await connector.callAiBasic(messages)
|
||||
return response["choices"][0]["message"]["content"]
|
||||
|
||||
except ContextLengthExceededException:
|
||||
logger.warning(f"Context length exceeded for {model_name}, trying fallback")
|
||||
# Fallback zu Modell mit größerem Context
|
||||
fallback_model = self._find_fallback_model(model_name)
|
||||
if fallback_model:
|
||||
return await self._execute_ai_call_with_failover(fallback_model, prompt, document_content)
|
||||
else:
|
||||
# Letzter Ausweg: Content weiter komprimieren
|
||||
compressed_prompt = await self._compress_content(prompt, 1000, "prompt")
|
||||
compressed_content = await self._compress_content(document_content, 5000, "document")
|
||||
return await self._execute_ai_call_with_failover("openai_gpt35", compressed_prompt, compressed_content)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"AI call failed with {model_name}: {e}")
|
||||
# Allgemeiner Fallback
|
||||
return await self._execute_ai_call_with_failover("openai_gpt35", prompt, document_content)
|
||||
|
||||
def _find_fallback_model(self, current_model: str) -> Optional[str]:
|
||||
"""Findet ein Fallback-Modell mit größerem Context"""
|
||||
current_context = AI_MODELS[current_model]["context_length"]
|
||||
|
||||
# Suche Modell mit größerem Context
|
||||
for model_name, model_info in AI_MODELS.items():
|
||||
if model_info["context_length"] > current_context:
|
||||
return model_name
|
||||
|
||||
return None
|
||||
|
||||
# Legacy methods
|
||||
|
||||
async def callAiTextBasic(self, prompt: str, context: Optional[str] = None) -> str:
|
||||
"""
|
||||
Basic text processing - now uses centralized AI call method.
|
||||
|
||||
Args:
|
||||
prompt: The user prompt to process
|
||||
context: Optional system context/prompt
|
||||
|
||||
Returns:
|
||||
The AI response as text
|
||||
"""
|
||||
# Combine context with prompt if provided
|
||||
full_prompt = prompt
|
||||
if context:
|
||||
full_prompt = f"Context: {context}\n\nUser Request: {prompt}"
|
||||
|
||||
# Use centralized AI call with speed priority for basic calls
|
||||
return await self.callAi(
|
||||
prompt=full_prompt,
|
||||
priority="speed",
|
||||
compress_prompt=True,
|
||||
compress_documents=False
|
||||
)
|
||||
|
||||
async def callAiTextAdvanced(self, prompt: str, context: Optional[str] = None, _is_fallback: bool = False) -> str:
|
||||
"""
|
||||
Advanced text processing - now uses centralized AI call method.
|
||||
|
||||
Args:
|
||||
prompt: The user prompt to process
|
||||
context: Optional system context/prompt
|
||||
_is_fallback: Internal flag (kept for compatibility)
|
||||
|
||||
Returns:
|
||||
The AI response as text
|
||||
"""
|
||||
# Combine context with prompt if provided
|
||||
full_prompt = prompt
|
||||
if context:
|
||||
full_prompt = f"Context: {context}\n\nUser Request: {prompt}"
|
||||
|
||||
# Use centralized AI call with quality priority for advanced calls
|
||||
return await self.callAi(
|
||||
prompt=full_prompt,
|
||||
priority="quality",
|
||||
compress_prompt=False,
|
||||
compress_documents=False
|
||||
)
|
||||
|
||||
async def callAiImageBasic(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
|
||||
"""
|
||||
Basic image processing - now uses centralized AI call method.
|
||||
|
||||
Args:
|
||||
prompt: The prompt for image analysis
|
||||
imageData: The image data (file path or bytes)
|
||||
mimeType: Optional MIME type of the image
|
||||
|
||||
Returns:
|
||||
The AI response as text
|
||||
"""
|
||||
try:
|
||||
# For image processing, use the original connector directly
|
||||
# as the centralized method doesn't handle images yet
|
||||
return await self.openaiService.callAiImage(prompt, imageData, mimeType)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in OpenAI image call: {str(e)}")
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
async def callAiImageAdvanced(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
|
||||
"""
|
||||
Advanced image processing - now uses centralized AI call method.
|
||||
|
||||
Args:
|
||||
prompt: The prompt for image analysis
|
||||
imageData: The image data (file path or bytes)
|
||||
mimeType: Optional MIME type of the image
|
||||
|
||||
Returns:
|
||||
The AI response as text
|
||||
"""
|
||||
try:
|
||||
# For image processing, use the original connector directly
|
||||
# as the centralized method doesn't handle images yet
|
||||
return await self.anthropicService.callAiImage(prompt, imageData, mimeType)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Anthropic image call: {str(e)}")
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
# Convenience methods for common use cases
|
||||
|
||||
async def callAiForDocumentAnalysis(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: List[ChatDocument],
|
||||
priority: str = "balanced"
|
||||
) -> str:
|
||||
"""Convenience method for document analysis"""
|
||||
return await self.callAi(
|
||||
prompt=prompt,
|
||||
documents=documents,
|
||||
operation_type="document_analysis",
|
||||
priority=priority,
|
||||
compress_documents=True,
|
||||
process_documents_individually=False
|
||||
)
|
||||
|
||||
async def callAiForReportGeneration(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: List[ChatDocument],
|
||||
priority: str = "quality"
|
||||
) -> str:
|
||||
"""Convenience method for report generation"""
|
||||
return await self.callAi(
|
||||
prompt=prompt,
|
||||
documents=documents,
|
||||
operation_type="report_generation",
|
||||
priority=priority,
|
||||
compress_documents=True,
|
||||
process_documents_individually=True
|
||||
)
|
||||
|
||||
async def callAiForEmailComposition(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: List[ChatDocument] = None,
|
||||
priority: str = "speed"
|
||||
) -> str:
|
||||
"""Convenience method for email composition"""
|
||||
return await self.callAi(
|
||||
prompt=prompt,
|
||||
documents=documents,
|
||||
operation_type="email_composition",
|
||||
priority=priority,
|
||||
compress_prompt=True,
|
||||
compress_documents=True
|
||||
)
|
||||
|
||||
async def callAiForTaskPlanning(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: List[ChatDocument] = None,
|
||||
priority: str = "balanced"
|
||||
) -> str:
|
||||
"""Convenience method for task planning"""
|
||||
return await self.callAi(
|
||||
prompt=prompt,
|
||||
documents=documents,
|
||||
operation_type="task_planning",
|
||||
priority=priority,
|
||||
compress_prompt=False,
|
||||
compress_documents=True
|
||||
)
|
||||
|
||||
30
modules/interfaces/interfaceAiModel.py
Normal file
30
modules/interfaces/interfaceAiModel.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
from typing import Optional
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class AiCallOptions(BaseModel):
|
||||
"""Options for centralized AI processing (no document extraction here)."""
|
||||
|
||||
operationType: str = Field(default="general", description="Type of operation")
|
||||
priority: str = Field(default="balanced", description="speed|quality|cost|balanced")
|
||||
compressPrompt: bool = Field(default=True, description="Whether to compress the prompt")
|
||||
compressContext: bool = Field(default=True, description="Whether to compress optional context")
|
||||
maxCost: Optional[float] = Field(default=None, description="Max cost budget")
|
||||
maxProcessingTime: Optional[int] = Field(default=None, description="Max processing time in seconds")
|
||||
|
||||
|
||||
class AiCallRequest(BaseModel):
|
||||
"""Centralized AI call request payload for interface use."""
|
||||
|
||||
prompt: str = Field(description="The user prompt")
|
||||
context: Optional[str] = Field(default=None, description="Optional external context (e.g., extracted docs)")
|
||||
options: AiCallOptions = Field(default_factory=AiCallOptions)
|
||||
|
||||
|
||||
class AiCallResponse(BaseModel):
|
||||
"""Standardized AI call response."""
|
||||
|
||||
content: str = Field(description="AI response content")
|
||||
modelName: str = Field(description="Selected model name")
|
||||
usedTokens: Optional[int] = Field(default=None, description="Estimated used tokens")
|
||||
costEstimate: Optional[float] = Field(default=None, description="Estimated cost of the call")
|
||||
117
modules/interfaces/interfaceAiObjects.py
Normal file
117
modules/interfaces/interfaceAiObjects.py
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
import logging
|
||||
from typing import Dict, Any, List
|
||||
|
||||
from modules.connectors.connectorAiOpenai import AiOpenai
|
||||
from modules.connectors.connectorAiAnthropic import AiAnthropic
|
||||
from modules.interfaces.interfaceAiModel import AiCallOptions, AiCallRequest, AiCallResponse
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Local model registry (connectors specification) belongs in interface layer, not service
|
||||
aiModels: Dict[str, Dict[str, Any]] = {
|
||||
"openai_gpt4o": {
|
||||
"connector": "openai",
|
||||
"contextLength": 128000,
|
||||
"costPer1kTokens": 0.03,
|
||||
"costPer1kTokensOutput": 0.06,
|
||||
"speedRating": 8,
|
||||
"qualityRating": 9,
|
||||
},
|
||||
"openai_gpt35": {
|
||||
"connector": "openai",
|
||||
"contextLength": 16000,
|
||||
"costPer1kTokens": 0.0015,
|
||||
"costPer1kTokensOutput": 0.002,
|
||||
"speedRating": 9,
|
||||
"qualityRating": 7,
|
||||
},
|
||||
"anthropic_claude": {
|
||||
"connector": "anthropic",
|
||||
"contextLength": 200000,
|
||||
"costPer1kTokens": 0.015,
|
||||
"costPer1kTokensOutput": 0.075,
|
||||
"speedRating": 7,
|
||||
"qualityRating": 10,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class AiObjects:
|
||||
"""Centralized AI interface: selects model and calls connector. No document handling."""
|
||||
|
||||
def __init__(self):
|
||||
self.openaiService = AiOpenai()
|
||||
self.anthropicService = AiAnthropic()
|
||||
|
||||
def _estimateCost(self, modelInfo: Dict[str, Any], contentSize: int) -> float:
|
||||
estimatedTokens = contentSize / 4
|
||||
inputCost = (estimatedTokens / 1000) * modelInfo["costPer1kTokens"]
|
||||
outputCost = (estimatedTokens / 1000) * modelInfo["costPer1kTokensOutput"] * 0.1
|
||||
return inputCost + outputCost
|
||||
|
||||
def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str:
|
||||
totalSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8"))
|
||||
candidates: Dict[str, Dict[str, Any]] = {}
|
||||
for name, info in aiModels.items():
|
||||
if totalSize > info["contextLength"] * 0.8:
|
||||
continue
|
||||
if options.maxCost is not None:
|
||||
if self._estimateCost(info, totalSize) > options.maxCost:
|
||||
continue
|
||||
candidates[name] = info
|
||||
if not candidates:
|
||||
return "openai_gpt35"
|
||||
if options.priority == "speed":
|
||||
return max(candidates, key=lambda k: candidates[k]["speedRating"])
|
||||
if options.priority == "quality":
|
||||
return max(candidates, key=lambda k: candidates[k]["qualityRating"])
|
||||
if options.priority == "cost":
|
||||
return min(candidates, key=lambda k: candidates[k]["costPer1kTokens"])
|
||||
def balancedScore(name: str) -> float:
|
||||
info = candidates[name]
|
||||
return info["qualityRating"] * 0.4 + info["speedRating"] * 0.3 + (10 - info["costPer1kTokens"] * 1000) * 0.3
|
||||
return max(candidates, key=balancedScore)
|
||||
|
||||
def _connectorFor(self, modelName: str):
|
||||
return self.openaiService if aiModels[modelName]["connector"] == "openai" else self.anthropicService
|
||||
|
||||
async def call(self, request: AiCallRequest) -> AiCallResponse:
|
||||
prompt = request.prompt
|
||||
context = request.context or ""
|
||||
options = request.options
|
||||
|
||||
# Compress optionally (prompt/context) - simple truncation fallback kept here
|
||||
def maybeTruncate(text: str, limit: int) -> str:
|
||||
data = text.encode("utf-8")
|
||||
if len(data) <= limit:
|
||||
return text
|
||||
return data[:limit].decode("utf-8", errors="ignore") + "... [truncated]"
|
||||
|
||||
if options.compressPrompt and len(prompt.encode("utf-8")) > 2000:
|
||||
prompt = maybeTruncate(prompt, 2000)
|
||||
if options.compressContext and len(context.encode("utf-8")) > 70000:
|
||||
context = maybeTruncate(context, 70000)
|
||||
|
||||
modelName = self._selectModel(prompt, context, options)
|
||||
|
||||
messages: List[Dict[str, Any]] = []
|
||||
if context:
|
||||
messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
connector = self._connectorFor(modelName)
|
||||
if aiModels[modelName]["connector"] == "openai":
|
||||
content = await connector.callAiBasic(messages)
|
||||
else:
|
||||
response = await connector.callAiBasic(messages)
|
||||
content = response["choices"][0]["message"]["content"]
|
||||
|
||||
# Estimate cost/tokens
|
||||
totalSize = len((prompt + context).encode("utf-8"))
|
||||
cost = self._estimateCost(aiModels[modelName], totalSize)
|
||||
usedTokens = int(totalSize / 4)
|
||||
|
||||
return AiCallResponse(content=content, modelName=modelName, usedTokens=usedTokens, costEstimate=cost)
|
||||
|
||||
|
|
@ -201,7 +201,6 @@ class AppObjects:
|
|||
"""
|
||||
return self.access.canModify(model_class, recordId)
|
||||
|
||||
|
||||
def getInitialId(self, model_class: type) -> Optional[str]:
|
||||
"""Returns the initial ID for a table."""
|
||||
return self.db.getInitialId(model_class)
|
||||
|
|
@ -268,105 +267,6 @@ class AppObjects:
|
|||
logger.error(f"Error getting user by ID: {str(e)}")
|
||||
return None
|
||||
|
||||
def getUserConnections(self, userId: str) -> List[UserConnection]:
|
||||
"""Returns all connections for a user."""
|
||||
try:
|
||||
# Get connections for this user
|
||||
connections = self.db.getRecordset(UserConnection, recordFilter={"userId": userId})
|
||||
|
||||
# Convert to UserConnection objects
|
||||
result = []
|
||||
for conn_dict in connections:
|
||||
try:
|
||||
# Create UserConnection object
|
||||
connection = UserConnection(
|
||||
id=conn_dict["id"],
|
||||
userId=conn_dict["userId"],
|
||||
authority=conn_dict.get("authority"),
|
||||
externalId=conn_dict.get("externalId", ""),
|
||||
externalUsername=conn_dict.get("externalUsername", ""),
|
||||
externalEmail=conn_dict.get("externalEmail"),
|
||||
status=conn_dict.get("status", "pending"),
|
||||
connectedAt=conn_dict.get("connectedAt"),
|
||||
lastChecked=conn_dict.get("lastChecked"),
|
||||
expiresAt=conn_dict.get("expiresAt")
|
||||
)
|
||||
result.append(connection)
|
||||
except Exception as e:
|
||||
logger.error(f"Error converting connection dict to object: {str(e)}")
|
||||
continue
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting user connections: {str(e)}")
|
||||
return []
|
||||
|
||||
def addUserConnection(self, userId: str, authority: AuthAuthority, externalId: str,
|
||||
externalUsername: str, externalEmail: Optional[str] = None,
|
||||
status: ConnectionStatus = ConnectionStatus.PENDING) -> UserConnection:
|
||||
"""
|
||||
Adds a new connection for a user.
|
||||
|
||||
Args:
|
||||
userId: The ID of the user
|
||||
authority: The authentication authority (e.g., MSFT, GOOGLE)
|
||||
externalId: The external ID from the authority
|
||||
externalUsername: The username from the authority
|
||||
externalEmail: Optional email from the authority
|
||||
status: The connection status (defaults to PENDING)
|
||||
|
||||
Returns:
|
||||
The created UserConnection object
|
||||
"""
|
||||
try:
|
||||
# Get the user
|
||||
user = self.getUser(userId)
|
||||
if not user:
|
||||
raise ValueError(f"User not found: {userId}")
|
||||
|
||||
# Create new connection with all required fields
|
||||
connection = UserConnection(
|
||||
id=str(uuid.uuid4()),
|
||||
userId=userId,
|
||||
authority=authority,
|
||||
externalId=externalId,
|
||||
externalUsername=externalUsername,
|
||||
externalEmail=externalEmail,
|
||||
status=status,
|
||||
connectedAt=get_utc_timestamp(),
|
||||
lastChecked=get_utc_timestamp(),
|
||||
expiresAt=None # Optional field, set to None by default
|
||||
)
|
||||
|
||||
# Save to connections table
|
||||
self.db.recordCreate(UserConnection, connection)
|
||||
|
||||
|
||||
return connection
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding user connection: {str(e)}")
|
||||
raise ValueError(f"Failed to add user connection: {str(e)}")
|
||||
|
||||
def removeUserConnection(self, connectionId: str) -> None:
|
||||
"""Remove a connection to an external service"""
|
||||
try:
|
||||
# Get connection
|
||||
connections = self.db.getRecordset(UserConnection, recordFilter={
|
||||
"id": connectionId
|
||||
})
|
||||
|
||||
if not connections:
|
||||
raise ValueError(f"Connection {connectionId} not found")
|
||||
|
||||
# Delete connection
|
||||
self.db.recordDelete(UserConnection, connectionId)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error removing user connection: {str(e)}")
|
||||
raise ValueError(f"Failed to remove user connection: {str(e)}")
|
||||
|
||||
def authenticateLocalUser(self, username: str, password: str) -> Optional[User]:
|
||||
"""Authenticates a user by username and password using local authentication."""
|
||||
# Clear the users table from cache and reload it
|
||||
|
|
@ -551,6 +451,154 @@ class AppObjects:
|
|||
logger.error(f"Error deleting user: {str(e)}")
|
||||
raise ValueError(f"Failed to delete user: {str(e)}")
|
||||
|
||||
def _getInitialUser(self) -> Optional[Dict[str, Any]]:
|
||||
"""Get the initial user record directly from database without access control."""
|
||||
try:
|
||||
initialUserId = self.getInitialId(UserInDB)
|
||||
if not initialUserId:
|
||||
return None
|
||||
|
||||
users = self.db.getRecordset(UserInDB, recordFilter={"id": initialUserId})
|
||||
return users[0] if users else None
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting initial user: {str(e)}")
|
||||
return None
|
||||
|
||||
def checkUsernameAvailability(self, checkData: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Checks if a username is available for registration."""
|
||||
try:
|
||||
username = checkData.get("username")
|
||||
authenticationAuthority = checkData.get("authenticationAuthority", "local")
|
||||
|
||||
if not username:
|
||||
return {
|
||||
"available": False,
|
||||
"message": "Username is required"
|
||||
}
|
||||
|
||||
# Get user by username
|
||||
user = self.getUserByUsername(username)
|
||||
|
||||
# Check if user exists (User model instance)
|
||||
if user is not None:
|
||||
return {
|
||||
"available": False,
|
||||
"message": "Username is already taken"
|
||||
}
|
||||
|
||||
return {
|
||||
"available": True,
|
||||
"message": "Username is available"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking username availability: {str(e)}")
|
||||
return {
|
||||
"available": False,
|
||||
"message": f"Error checking username availability: {str(e)}"
|
||||
}
|
||||
|
||||
# Connection methods
|
||||
|
||||
def getUserConnections(self, userId: str) -> List[UserConnection]:
|
||||
"""Returns all connections for a user."""
|
||||
try:
|
||||
# Get connections for this user
|
||||
connections = self.db.getRecordset(UserConnection, recordFilter={"userId": userId})
|
||||
|
||||
# Convert to UserConnection objects
|
||||
result = []
|
||||
for conn_dict in connections:
|
||||
try:
|
||||
# Create UserConnection object
|
||||
connection = UserConnection(
|
||||
id=conn_dict["id"],
|
||||
userId=conn_dict["userId"],
|
||||
authority=conn_dict.get("authority"),
|
||||
externalId=conn_dict.get("externalId", ""),
|
||||
externalUsername=conn_dict.get("externalUsername", ""),
|
||||
externalEmail=conn_dict.get("externalEmail"),
|
||||
status=conn_dict.get("status", "pending"),
|
||||
connectedAt=conn_dict.get("connectedAt"),
|
||||
lastChecked=conn_dict.get("lastChecked"),
|
||||
expiresAt=conn_dict.get("expiresAt")
|
||||
)
|
||||
result.append(connection)
|
||||
except Exception as e:
|
||||
logger.error(f"Error converting connection dict to object: {str(e)}")
|
||||
continue
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting user connections: {str(e)}")
|
||||
return []
|
||||
|
||||
def addUserConnection(self, userId: str, authority: AuthAuthority, externalId: str,
|
||||
externalUsername: str, externalEmail: Optional[str] = None,
|
||||
status: ConnectionStatus = ConnectionStatus.PENDING) -> UserConnection:
|
||||
"""
|
||||
Adds a new connection for a user.
|
||||
|
||||
Args:
|
||||
userId: The ID of the user
|
||||
authority: The authentication authority (e.g., MSFT, GOOGLE)
|
||||
externalId: The external ID from the authority
|
||||
externalUsername: The username from the authority
|
||||
externalEmail: Optional email from the authority
|
||||
status: The connection status (defaults to PENDING)
|
||||
|
||||
Returns:
|
||||
The created UserConnection object
|
||||
"""
|
||||
try:
|
||||
# Get the user
|
||||
user = self.getUser(userId)
|
||||
if not user:
|
||||
raise ValueError(f"User not found: {userId}")
|
||||
|
||||
# Create new connection with all required fields
|
||||
connection = UserConnection(
|
||||
id=str(uuid.uuid4()),
|
||||
userId=userId,
|
||||
authority=authority,
|
||||
externalId=externalId,
|
||||
externalUsername=externalUsername,
|
||||
externalEmail=externalEmail,
|
||||
status=status,
|
||||
connectedAt=get_utc_timestamp(),
|
||||
lastChecked=get_utc_timestamp(),
|
||||
expiresAt=None # Optional field, set to None by default
|
||||
)
|
||||
|
||||
# Save to connections table
|
||||
self.db.recordCreate(UserConnection, connection)
|
||||
|
||||
|
||||
return connection
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding user connection: {str(e)}")
|
||||
raise ValueError(f"Failed to add user connection: {str(e)}")
|
||||
|
||||
def removeUserConnection(self, connectionId: str) -> None:
|
||||
"""Remove a connection to an external service"""
|
||||
try:
|
||||
# Get connection
|
||||
connections = self.db.getRecordset(UserConnection, recordFilter={
|
||||
"id": connectionId
|
||||
})
|
||||
|
||||
if not connections:
|
||||
raise ValueError(f"Connection {connectionId} not found")
|
||||
|
||||
# Delete connection
|
||||
self.db.recordDelete(UserConnection, connectionId)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error removing user connection: {str(e)}")
|
||||
raise ValueError(f"Failed to remove user connection: {str(e)}")
|
||||
|
||||
# Mandate methods
|
||||
|
||||
def getAllMandates(self) -> List[Mandate]:
|
||||
|
|
@ -650,52 +698,7 @@ class AppObjects:
|
|||
logger.error(f"Error deleting mandate: {str(e)}")
|
||||
raise ValueError(f"Failed to delete mandate: {str(e)}")
|
||||
|
||||
def _getInitialUser(self) -> Optional[Dict[str, Any]]:
|
||||
"""Get the initial user record directly from database without access control."""
|
||||
try:
|
||||
initialUserId = self.getInitialId(UserInDB)
|
||||
if not initialUserId:
|
||||
return None
|
||||
|
||||
users = self.db.getRecordset(UserInDB, recordFilter={"id": initialUserId})
|
||||
return users[0] if users else None
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting initial user: {str(e)}")
|
||||
return None
|
||||
|
||||
def checkUsernameAvailability(self, checkData: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Checks if a username is available for registration."""
|
||||
try:
|
||||
username = checkData.get("username")
|
||||
authenticationAuthority = checkData.get("authenticationAuthority", "local")
|
||||
|
||||
if not username:
|
||||
return {
|
||||
"available": False,
|
||||
"message": "Username is required"
|
||||
}
|
||||
|
||||
# Get user by username
|
||||
user = self.getUserByUsername(username)
|
||||
|
||||
# Check if user exists (User model instance)
|
||||
if user is not None:
|
||||
return {
|
||||
"available": False,
|
||||
"message": "Username is already taken"
|
||||
}
|
||||
|
||||
return {
|
||||
"available": True,
|
||||
"message": "Username is available"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking username availability: {str(e)}")
|
||||
return {
|
||||
"available": False,
|
||||
"message": f"Error checking username availability: {str(e)}"
|
||||
}
|
||||
# Token methods
|
||||
|
||||
def saveAccessToken(self, token: Token, replace_existing: bool = True) -> None:
|
||||
"""Save an access token for the current user (must NOT have connectionId)"""
|
||||
|
|
@ -803,56 +806,8 @@ class AppObjects:
|
|||
logger.error(f"Error saving connection token: {str(e)}")
|
||||
raise
|
||||
|
||||
def getAccessToken(self, authority: str, auto_refresh: bool = True) -> Optional[Token]:
|
||||
"""Get the latest valid access token for the current user and authority, optionally auto-refresh if expired"""
|
||||
try:
|
||||
# Validate that we're not looking for connection tokens
|
||||
if not self.currentUser or not self.currentUser.id:
|
||||
raise ValueError("No valid user context available for token retrieval")
|
||||
|
||||
# Get access tokens for this user and authority (must NOT have connectionId)
|
||||
tokens = self.db.getRecordset(Token, recordFilter={
|
||||
"userId": self.currentUser.id,
|
||||
"authority": authority,
|
||||
"connectionId": None # Ensure we only get access tokens
|
||||
})
|
||||
|
||||
if not tokens:
|
||||
return None
|
||||
|
||||
# Sort by creation date and get the latest
|
||||
tokens.sort(key=lambda x: x.get("createdAt", ""), reverse=True)
|
||||
latest_token = Token(**tokens[0])
|
||||
|
||||
# Check if token is expired
|
||||
if latest_token.expiresAt and latest_token.expiresAt < get_utc_timestamp():
|
||||
if auto_refresh:
|
||||
# Import TokenManager here to avoid circular imports
|
||||
from modules.security.tokenManager import TokenManager
|
||||
token_manager = TokenManager()
|
||||
|
||||
# Try to refresh the token
|
||||
refreshed_token = token_manager.refresh_token(latest_token)
|
||||
if refreshed_token:
|
||||
# Save the new token (which will automatically replace old ones)
|
||||
self.saveAccessToken(refreshed_token)
|
||||
|
||||
return refreshed_token
|
||||
else:
|
||||
logger.warning(f"Failed to refresh expired access token for {authority}")
|
||||
return None
|
||||
else:
|
||||
logger.warning(f"Access token for {authority} is expired (expiresAt: {latest_token.expiresAt})")
|
||||
return None
|
||||
|
||||
return latest_token
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting access token: {str(e)}")
|
||||
return None
|
||||
|
||||
def getConnectionToken(self, connectionId: str, auto_refresh: bool = True) -> Optional[Token]:
|
||||
"""Get the connection token for a specific connectionId, optionally auto-refresh if expired"""
|
||||
def getConnectionToken(self, connectionId: str) -> Optional[Token]:
|
||||
"""Get the latest stored token for a specific connectionId (no refresh)."""
|
||||
try:
|
||||
# Validate connectionId
|
||||
if not connectionId:
|
||||
|
|
@ -873,31 +828,7 @@ class AppObjects:
|
|||
tokens.sort(key=lambda x: x.get("expiresAt", 0), reverse=True)
|
||||
latest_token = Token(**tokens[0])
|
||||
|
||||
# Check if token is expired or expires within 30 minutes
|
||||
current_time = get_utc_timestamp()
|
||||
thirty_minutes = 30 * 60 # 30 minutes in seconds
|
||||
|
||||
if latest_token.expiresAt and latest_token.expiresAt < (current_time + thirty_minutes):
|
||||
if auto_refresh:
|
||||
# Import TokenManager here to avoid circular imports
|
||||
from modules.security.tokenManager import TokenManager
|
||||
token_manager = TokenManager()
|
||||
|
||||
# Try to refresh the token
|
||||
refreshed_token = token_manager.refresh_token(latest_token)
|
||||
|
||||
if refreshed_token:
|
||||
# Save the new token (which will automatically replace old ones)
|
||||
self.saveConnectionToken(refreshed_token)
|
||||
|
||||
logger.info(f"Proactively refreshed connection token for connectionId {connectionId} (expired in {latest_token.expiresAt - current_time} seconds)")
|
||||
return refreshed_token
|
||||
else:
|
||||
logger.warning(f"Token refresh failed for connectionId {connectionId}")
|
||||
return None
|
||||
else:
|
||||
logger.warning(f"Connection token for connectionId {connectionId} expires soon (expiresAt: {latest_token.expiresAt})")
|
||||
return None
|
||||
# No auto-refresh here. Callers should use a higher-level service to refresh when needed.
|
||||
|
||||
return latest_token
|
||||
|
||||
|
|
@ -905,53 +836,6 @@ class AppObjects:
|
|||
logger.error(f"Error getting connection token for connectionId {connectionId}: {str(e)}")
|
||||
return None
|
||||
|
||||
def deleteAccessToken(self, authority: str) -> None:
|
||||
"""Delete all access tokens for the current user and authority"""
|
||||
try:
|
||||
# Validate user context
|
||||
if not self.currentUser or not self.currentUser.id:
|
||||
raise ValueError("No valid user context available for token deletion")
|
||||
|
||||
# Get access tokens to delete (must NOT have connectionId)
|
||||
tokens = self.db.getRecordset(Token, recordFilter={
|
||||
"userId": self.currentUser.id,
|
||||
"authority": authority,
|
||||
"connectionId": None # Ensure we only delete access tokens
|
||||
})
|
||||
|
||||
# Delete each token
|
||||
for token in tokens:
|
||||
self.db.recordDelete(Token, token["id"])
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting access token: {str(e)}")
|
||||
raise
|
||||
|
||||
def deleteConnectionTokenByConnectionId(self, connectionId: str) -> None:
|
||||
"""Delete all connection tokens for a specific connectionId"""
|
||||
try:
|
||||
# Validate connectionId
|
||||
if not connectionId:
|
||||
raise ValueError("connectionId is required for deleteConnectionTokenByConnectionId")
|
||||
|
||||
# Get connection tokens to delete
|
||||
tokens = self.db.getRecordset(Token, recordFilter={
|
||||
"connectionId": connectionId
|
||||
})
|
||||
|
||||
# Delete each token
|
||||
for token in tokens:
|
||||
self.db.recordDelete(Token, token["id"])
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting connection token for connectionId {connectionId}: {str(e)}")
|
||||
raise
|
||||
|
||||
# =====================
|
||||
# Token revocation (LOCAL gateway JWTs)
|
||||
# =====================
|
||||
def findActiveTokenById(self, tokenId: str, userId: str, authority: AuthAuthority, sessionId: str = None, mandateId: str = None) -> Optional[Token]:
|
||||
"""Find an active access token by its id (jti) with optional session/tenant scoping."""
|
||||
try:
|
||||
|
|
@ -1088,7 +972,7 @@ class AppObjects:
|
|||
logger.error(f"Error during logout: {str(e)}")
|
||||
raise
|
||||
|
||||
# Data Neutralization methods
|
||||
# Neutralization methods
|
||||
|
||||
def getNeutralizationConfig(self) -> Optional[DataNeutraliserConfig]:
|
||||
"""Get the data neutralization configuration for the current user's mandate"""
|
||||
|
|
@ -1138,98 +1022,6 @@ class AppObjects:
|
|||
logger.error(f"Error creating/updating neutralization config: {str(e)}")
|
||||
raise ValueError(f"Failed to create/update neutralization config: {str(e)}")
|
||||
|
||||
def neutralizeText(self, text: str, file_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Neutralize text content and store attribute mappings"""
|
||||
try:
|
||||
from modules.services.serviceNeutralization.neutralizer import DataAnonymizer
|
||||
|
||||
# Get neutralization configuration to extract namesToParse
|
||||
config = self.getNeutralizationConfig()
|
||||
names_to_parse = []
|
||||
if config and hasattr(config, 'namesToParse') and config.namesToParse:
|
||||
# Split by newlines and filter out empty strings
|
||||
names_to_parse = [name.strip() for name in config.namesToParse.split('\n') if name.strip()]
|
||||
|
||||
# Initialize anonymizer with custom names
|
||||
anonymizer = DataAnonymizer(names_to_parse=names_to_parse)
|
||||
|
||||
# Process the text
|
||||
result = anonymizer.process_content(text, 'text')
|
||||
|
||||
# Store attribute mappings in database
|
||||
stored_attributes = []
|
||||
for original_text, neutralized_text in result.mapping.items():
|
||||
# Extract pattern type and UUID from the neutralized text format [type.uuid]
|
||||
pattern_type = "unknown"
|
||||
placeholder_uuid = None
|
||||
|
||||
if neutralized_text.startswith("[") and "." in neutralized_text and neutralized_text.endswith("]"):
|
||||
# Extract type and UUID from [type.uuid] format
|
||||
inner = neutralized_text[1:-1] # Remove [ and ]
|
||||
if "." in inner:
|
||||
pattern_type, placeholder_uuid = inner.split(".", 1)
|
||||
|
||||
# Check if this exact original text already has a placeholder in the database
|
||||
existing_attribute = self.getExistingPlaceholder(original_text)
|
||||
|
||||
if existing_attribute:
|
||||
# Reuse existing placeholder
|
||||
existing_uuid = existing_attribute.id
|
||||
existing_pattern_type = existing_attribute.patternType
|
||||
|
||||
# Update the neutralized text to use the existing UUID
|
||||
result.data = result.data.replace(neutralized_text, f"[{existing_pattern_type}.{existing_uuid}]")
|
||||
result.mapping[original_text] = f"[{existing_pattern_type}.{existing_uuid}]"
|
||||
|
||||
stored_attributes.append(existing_attribute)
|
||||
else:
|
||||
# Create new attribute record with the UUID that the neutralizer generated
|
||||
attribute_data = {
|
||||
"id": placeholder_uuid, # Use the UUID from the neutralizer
|
||||
"mandateId": self.mandateId,
|
||||
"userId": self.userId,
|
||||
"originalText": original_text,
|
||||
"fileId": file_id,
|
||||
"patternType": pattern_type
|
||||
}
|
||||
|
||||
attribute = DataNeutralizerAttributes.from_dict(attribute_data)
|
||||
created_attribute = self.db.recordCreate(DataNeutralizerAttributes, attribute)
|
||||
stored_attributes.append(created_attribute)
|
||||
|
||||
|
||||
# The neutralized text is already in the correct [type.uuid] format
|
||||
# No need to replace it, as it's already properly formatted
|
||||
|
||||
return {
|
||||
"neutralized_text": result.data,
|
||||
"attributes": stored_attributes,
|
||||
"mapping": result.mapping,
|
||||
"replaced_fields": result.replaced_fields,
|
||||
"processed_info": result.processed_info
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error neutralizing text: {str(e)}")
|
||||
raise ValueError(f"Failed to neutralize text: {str(e)}")
|
||||
|
||||
def getExistingPlaceholder(self, original_text: str) -> Optional[DataNeutralizerAttributes]:
|
||||
"""Get existing placeholder for original text if it exists"""
|
||||
try:
|
||||
existing_attributes = self.db.getRecordset(DataNeutralizerAttributes, recordFilter={
|
||||
"mandateId": self.mandateId,
|
||||
"userId": self.userId,
|
||||
"originalText": original_text
|
||||
})
|
||||
|
||||
if existing_attributes:
|
||||
return DataNeutralizerAttributes.from_dict(existing_attributes[0])
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting existing placeholder: {str(e)}")
|
||||
return None
|
||||
|
||||
def getNeutralizationAttributes(self, file_id: Optional[str] = None) -> List[DataNeutralizerAttributes]:
|
||||
"""Get neutralization attributes, optionally filtered by file ID"""
|
||||
try:
|
||||
|
|
@ -1246,35 +1038,6 @@ class AppObjects:
|
|||
logger.error(f"Error getting neutralization attributes: {str(e)}")
|
||||
return []
|
||||
|
||||
def resolveNeutralizedText(self, text: str) -> str:
|
||||
"""Resolve UIDs in neutralized text back to original text"""
|
||||
try:
|
||||
# Find all placeholders in the new format [type.uuid]
|
||||
placeholder_pattern = r'\[([a-z]+)\.([a-f0-9-]{36})\]'
|
||||
matches = re.findall(placeholder_pattern, text)
|
||||
|
||||
resolved_text = text
|
||||
for placeholder_type, uid in matches:
|
||||
# Find the attribute with this UID (which is the record ID)
|
||||
attributes = self.db.getRecordset(DataNeutralizerAttributes, recordFilter={
|
||||
"mandateId": self.mandateId,
|
||||
"id": uid
|
||||
})
|
||||
|
||||
if attributes:
|
||||
attribute = attributes[0]
|
||||
# Replace placeholder with original text
|
||||
placeholder = f"[{placeholder_type}.{uid}]"
|
||||
resolved_text = resolved_text.replace(placeholder, attribute["originalText"])
|
||||
else:
|
||||
logger.warning(f"No attribute found for UID {uid}")
|
||||
|
||||
return resolved_text
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error resolving neutralized text: {str(e)}")
|
||||
return text
|
||||
|
||||
def deleteNeutralizationAttributes(self, file_id: str) -> bool:
|
||||
"""Delete all neutralization attributes for a specific file"""
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import pandas as pd
|
|||
import openpyxl
|
||||
from modules.shared.timezoneUtils import get_utc_now
|
||||
|
||||
from modules.connectors.connectorSharepoint import ConnectorSharepoint
|
||||
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
|
||||
|
||||
from modules.interfaces.interfaceTicketModel import TicketBase, Task
|
||||
|
||||
|
|
@ -14,7 +14,7 @@ from modules.interfaces.interfaceTicketModel import TicketBase, Task
|
|||
@dataclass(slots=True)
|
||||
class TicketSharepointSyncInterface:
|
||||
connector_ticket: TicketBase
|
||||
connector_sharepoint: ConnectorSharepoint
|
||||
connector_sharepoint: SharepointService
|
||||
task_sync_definition: dict
|
||||
sync_folder: str
|
||||
sync_file: str
|
||||
|
|
@ -26,7 +26,7 @@ class TicketSharepointSyncInterface:
|
|||
async def create(
|
||||
cls,
|
||||
connector_ticket: TicketBase,
|
||||
connector_sharepoint: ConnectorSharepoint,
|
||||
connector_sharepoint: SharepointService,
|
||||
task_sync_definition: dict,
|
||||
sync_folder: str,
|
||||
sync_file: str,
|
||||
|
|
@ -700,7 +700,7 @@ class TicketSharepointSyncInterface:
|
|||
|
||||
def _transform_tasks(
|
||||
self, tasks: list[Task], include_put: bool = False
|
||||
) -> list[Task]:
|
||||
) -> list[Task]:
|
||||
"""Transforms tasks according to the task_sync_definition."""
|
||||
transformed_tasks = []
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from modules.security.auth import limiter, getCurrentUser
|
|||
|
||||
# Import interfaces
|
||||
from modules.interfaces.interfaceAppModel import User, DataNeutraliserConfig, DataNeutralizerAttributes
|
||||
from modules.features.neutralizePlayground.mainNeutralizePlayground import NeutralizationService
|
||||
from modules.features.neutralization.mainNeutralizationPlayground import NeutralizationService
|
||||
|
||||
# Configure logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
|
|||
|
|
@ -339,7 +339,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
)
|
||||
|
||||
# Create JWT token data (like Microsoft does)
|
||||
from modules.security.auth import createAccessToken
|
||||
from modules.security.jwtService import createAccessToken
|
||||
jwt_token_data = {
|
||||
"sub": user.username,
|
||||
"mandateId": str(user.mandateId),
|
||||
|
|
@ -637,29 +637,19 @@ async def verify_token(
|
|||
detail="No Google connection found for current user"
|
||||
)
|
||||
|
||||
# Get the current token
|
||||
current_token = appInterface.getConnectionToken(google_connection.id, auto_refresh=False)
|
||||
|
||||
# Get a fresh token via TokenManager convenience method
|
||||
from modules.security.tokenManager import TokenManager
|
||||
current_token = TokenManager().getFreshToken(appInterface, google_connection.id)
|
||||
|
||||
if not current_token:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="No Google token found for this connection"
|
||||
)
|
||||
|
||||
# Verify the token
|
||||
# Verify the (fresh) token
|
||||
token_verification = await verify_google_token(current_token.tokenAccess)
|
||||
|
||||
if not token_verification.get("valid"):
|
||||
# Try to refresh the token if verification failed
|
||||
from modules.security.tokenManager import TokenManager
|
||||
token_manager = TokenManager()
|
||||
refreshed_token = token_manager.refresh_token(current_token)
|
||||
|
||||
if refreshed_token:
|
||||
appInterface.saveConnectionToken(refreshed_token)
|
||||
# Verify the refreshed token
|
||||
token_verification = await verify_google_token(refreshed_token.tokenAccess)
|
||||
|
||||
return {
|
||||
"valid": token_verification.get("valid", False),
|
||||
"scopes": token_verification.get("scopes", []),
|
||||
|
|
@ -721,8 +711,9 @@ async def refresh_token(
|
|||
|
||||
logger.debug(f"Found Google connection: {google_connection.id}, status={google_connection.status}")
|
||||
|
||||
# Get the token for this specific connection using the new method
|
||||
current_token = appInterface.getConnectionToken(google_connection.id, auto_refresh=False)
|
||||
# Get the token for this specific connection (fresh if expiring soon)
|
||||
from modules.security.tokenManager import TokenManager
|
||||
current_token = TokenManager().getFreshToken(appInterface, google_connection.id)
|
||||
|
||||
if not current_token:
|
||||
raise HTTPException(
|
||||
|
|
@ -731,38 +722,25 @@ async def refresh_token(
|
|||
)
|
||||
|
||||
|
||||
# If we could not obtain a fresh token, report error
|
||||
if not current_token:
|
||||
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to refresh token")
|
||||
|
||||
# Always attempt refresh (as per your requirement)
|
||||
from modules.security.tokenManager import TokenManager
|
||||
token_manager = TokenManager()
|
||||
# Update the connection status and timing
|
||||
google_connection.expiresAt = float(current_token.expiresAt) if current_token.expiresAt else google_connection.expiresAt
|
||||
google_connection.lastChecked = get_utc_timestamp()
|
||||
google_connection.status = ConnectionStatus.ACTIVE
|
||||
appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.to_dict())
|
||||
|
||||
refreshed_token = token_manager.refresh_token(current_token)
|
||||
if refreshed_token:
|
||||
# Save the new connection token (which will automatically replace old ones)
|
||||
appInterface.saveConnectionToken(refreshed_token)
|
||||
|
||||
# Update the connection's expiration time
|
||||
google_connection.expiresAt = float(refreshed_token.expiresAt)
|
||||
google_connection.lastChecked = get_utc_timestamp()
|
||||
google_connection.status = ConnectionStatus.ACTIVE
|
||||
|
||||
# Save updated connection
|
||||
appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.to_dict())
|
||||
|
||||
# Calculate time until expiration
|
||||
current_time = get_utc_timestamp()
|
||||
expires_in = int(refreshed_token.expiresAt - current_time)
|
||||
|
||||
return {
|
||||
"message": "Token refreshed successfully",
|
||||
"expires_at": refreshed_token.expiresAt,
|
||||
"expires_in_seconds": expires_in
|
||||
}
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to refresh token"
|
||||
)
|
||||
# Calculate time until expiration
|
||||
current_time = get_utc_timestamp()
|
||||
expires_in = int(current_token.expiresAt - current_time) if current_token.expiresAt else 0
|
||||
|
||||
return {
|
||||
"message": "Token refreshed successfully",
|
||||
"expires_at": current_token.expiresAt,
|
||||
"expires_in_seconds": expires_in
|
||||
}
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
|
|
|
|||
|
|
@ -13,7 +13,8 @@ from jose import jwt
|
|||
from pydantic import BaseModel
|
||||
|
||||
# Import auth modules
|
||||
from modules.security.auth import createAccessToken, createAccessTokenWithCookie, setRefreshTokenCookie, getCurrentUser, limiter, SECRET_KEY, ALGORITHM
|
||||
from modules.security.auth import getCurrentUser, limiter, SECRET_KEY, ALGORITHM
|
||||
from modules.security.jwtService import createAccessToken, createRefreshToken, setAccessTokenCookie, setRefreshTokenCookie
|
||||
from modules.interfaces.interfaceAppObjects import getInterface, getRootInterface
|
||||
from modules.interfaces.interfaceAppModel import User, UserInDB, AuthAuthority, UserPrivilege, Token
|
||||
from modules.shared.attributeUtils import ModelMixin
|
||||
|
|
@ -91,11 +92,13 @@ async def login(
|
|||
session_id = str(uuid.uuid4())
|
||||
token_data["sid"] = session_id
|
||||
|
||||
# Create access token with httpOnly cookie
|
||||
access_token = createAccessTokenWithCookie(token_data, response)
|
||||
# Create access token + set cookie
|
||||
access_token, _access_expires = createAccessToken(token_data)
|
||||
setAccessTokenCookie(response, access_token)
|
||||
|
||||
# Create refresh token with httpOnly cookie
|
||||
refresh_token = setRefreshTokenCookie(token_data, response)
|
||||
# Create refresh token + set cookie
|
||||
refresh_token, _refresh_expires = createRefreshToken(token_data)
|
||||
setRefreshTokenCookie(response, refresh_token)
|
||||
|
||||
# Get expiration time for response
|
||||
try:
|
||||
|
|
@ -287,8 +290,9 @@ async def refresh_token(
|
|||
"authenticationAuthority": currentUser.authenticationAuthority
|
||||
}
|
||||
|
||||
# Create new access token with cookie
|
||||
access_token = createAccessTokenWithCookie(token_data, response)
|
||||
# Create new access token + set cookie
|
||||
access_token, _expires = createAccessToken(token_data)
|
||||
setAccessTokenCookie(response, access_token)
|
||||
|
||||
# Get expiration time
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -14,7 +14,8 @@ import httpx
|
|||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.interfaces.interfaceAppObjects import getInterface, getRootInterface
|
||||
from modules.interfaces.interfaceAppModel import AuthAuthority, User, Token, ConnectionStatus, UserConnection
|
||||
from modules.security.auth import getCurrentUser, limiter, createAccessToken
|
||||
from modules.security.auth import getCurrentUser, limiter
|
||||
from modules.security.jwtService import createAccessToken
|
||||
from modules.shared.attributeUtils import ModelMixin
|
||||
from modules.shared.timezoneUtils import get_utc_now, create_expiration_timestamp, get_utc_timestamp
|
||||
|
||||
|
|
@ -559,9 +560,9 @@ async def refresh_token(
|
|||
|
||||
logger.debug(f"Found Microsoft connection: {msft_connection.id}, status={msft_connection.status}")
|
||||
|
||||
# Get the token for this specific connection using the new method
|
||||
# Enable auto-refresh to handle expired tokens gracefully
|
||||
current_token = appInterface.getConnectionToken(msft_connection.id, auto_refresh=True)
|
||||
# Get a fresh token via TokenManager convenience method
|
||||
from modules.security.tokenManager import TokenManager
|
||||
current_token = TokenManager().getFreshToken(appInterface, msft_connection.id)
|
||||
|
||||
if not current_token:
|
||||
raise HTTPException(
|
||||
|
|
|
|||
|
|
@ -54,106 +54,7 @@ limiter = Limiter(key_func=get_remote_address)
|
|||
# Logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> Tuple[str, datetime]:
|
||||
"""
|
||||
Creates a JWT Access Token.
|
||||
|
||||
Args:
|
||||
data: Data to encode (usually user ID or username)
|
||||
expiresDelta: Validity duration of the token (optional)
|
||||
|
||||
Returns:
|
||||
Tuple of (JWT Token as string, expiration datetime)
|
||||
"""
|
||||
toEncode = data.copy()
|
||||
# Ensure a token id (jti) exists for revocation tracking (only required for local, harmless otherwise)
|
||||
if "jti" not in toEncode or not toEncode.get("jti"):
|
||||
toEncode["jti"] = str(uuid.uuid4())
|
||||
|
||||
if expiresDelta:
|
||||
expire = get_utc_now() + expiresDelta
|
||||
else:
|
||||
expire = get_utc_now() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
|
||||
|
||||
toEncode.update({"exp": expire})
|
||||
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
|
||||
|
||||
return encodedJwt, expire
|
||||
|
||||
def createAccessTokenWithCookie(data: dict, response: Response, expiresDelta: Optional[timedelta] = None) -> str:
|
||||
"""
|
||||
Creates a JWT Access Token and sets it as an httpOnly cookie.
|
||||
|
||||
Args:
|
||||
data: Data to encode (usually user ID or username)
|
||||
response: FastAPI Response object to set cookie
|
||||
expiresDelta: Validity duration of the token (optional)
|
||||
|
||||
Returns:
|
||||
JWT Token as string
|
||||
"""
|
||||
access_token, expires_at = createAccessToken(data, expiresDelta)
|
||||
|
||||
# Set httpOnly cookie
|
||||
response.set_cookie(
|
||||
key="auth_token",
|
||||
value=access_token,
|
||||
httponly=True,
|
||||
secure=True, # HTTPS only in production
|
||||
samesite="strict",
|
||||
max_age=int(expiresDelta.total_seconds()) if expiresDelta else ACCESS_TOKEN_EXPIRE_MINUTES * 60
|
||||
)
|
||||
|
||||
return access_token
|
||||
|
||||
def createRefreshToken(data: dict) -> Tuple[str, datetime]:
|
||||
"""
|
||||
Creates a JWT Refresh Token with longer expiration.
|
||||
|
||||
Args:
|
||||
data: Data to encode (usually user ID or username)
|
||||
|
||||
Returns:
|
||||
Tuple of (JWT Refresh Token as string, expiration datetime)
|
||||
"""
|
||||
toEncode = data.copy()
|
||||
# Ensure a token id (jti) exists for revocation tracking
|
||||
if "jti" not in toEncode or not toEncode.get("jti"):
|
||||
toEncode["jti"] = str(uuid.uuid4())
|
||||
|
||||
# Add refresh token type
|
||||
toEncode["type"] = "refresh"
|
||||
|
||||
expire = get_utc_now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
|
||||
toEncode.update({"exp": expire})
|
||||
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
|
||||
|
||||
return encodedJwt, expire
|
||||
|
||||
def setRefreshTokenCookie(data: dict, response: Response) -> str:
|
||||
"""
|
||||
Creates a JWT Refresh Token and sets it as an httpOnly cookie.
|
||||
|
||||
Args:
|
||||
data: Data to encode (usually user ID or username)
|
||||
response: FastAPI Response object to set cookie
|
||||
|
||||
Returns:
|
||||
JWT Refresh Token as string
|
||||
"""
|
||||
refresh_token, expires_at = createRefreshToken(data)
|
||||
|
||||
# Set httpOnly cookie for refresh token
|
||||
response.set_cookie(
|
||||
key="refresh_token",
|
||||
value=refresh_token,
|
||||
httponly=True,
|
||||
secure=True, # HTTPS only in production
|
||||
samesite="strict",
|
||||
max_age=REFRESH_TOKEN_EXPIRE_DAYS * 24 * 60 * 60 # Days to seconds
|
||||
)
|
||||
|
||||
return refresh_token
|
||||
# Note: JWT creation and cookie helpers moved to modules.security.jwtService
|
||||
|
||||
def _getUserBase(token: str = Depends(cookieAuth)) -> User:
|
||||
"""
|
||||
|
|
|
|||
72
modules/security/jwtService.py
Normal file
72
modules/security/jwtService.py
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
"""
|
||||
JWT Service
|
||||
Centralizes local JWT creation and cookie helpers.
|
||||
"""
|
||||
|
||||
from datetime import timedelta
|
||||
from typing import Optional, Tuple
|
||||
from fastapi import Response
|
||||
from jose import jwt
|
||||
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.shared.timezoneUtils import get_utc_now
|
||||
|
||||
# Config
|
||||
SECRET_KEY = APP_CONFIG.get("APP_JWT_KEY_SECRET")
|
||||
ALGORITHM = APP_CONFIG.get("Auth_ALGORITHM")
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES = int(APP_CONFIG.get("APP_TOKEN_EXPIRY"))
|
||||
REFRESH_TOKEN_EXPIRE_DAYS = int(APP_CONFIG.get("APP_REFRESH_TOKEN_EXPIRY", "7"))
|
||||
|
||||
|
||||
def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> Tuple[str, "datetime"]:
|
||||
"""Create a JWT access token and return (token, expiresAt)."""
|
||||
toEncode = data.copy()
|
||||
if "jti" not in toEncode or not toEncode.get("jti"):
|
||||
import uuid
|
||||
toEncode["jti"] = str(uuid.uuid4())
|
||||
|
||||
expire = get_utc_now() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
|
||||
toEncode.update({"exp": expire})
|
||||
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
|
||||
return encodedJwt, expire
|
||||
|
||||
|
||||
def createRefreshToken(data: dict) -> Tuple[str, "datetime"]:
|
||||
"""Create a JWT refresh token and return (token, expiresAt)."""
|
||||
toEncode = data.copy()
|
||||
if "jti" not in toEncode or not toEncode.get("jti"):
|
||||
import uuid
|
||||
toEncode["jti"] = str(uuid.uuid4())
|
||||
toEncode["type"] = "refresh"
|
||||
|
||||
expire = get_utc_now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
|
||||
toEncode.update({"exp": expire})
|
||||
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
|
||||
return encodedJwt, expire
|
||||
|
||||
|
||||
def setAccessTokenCookie(response: Response, token: str, expiresDelta: Optional[timedelta] = None) -> None:
|
||||
"""Set access token as httpOnly cookie."""
|
||||
maxAge = int(expiresDelta.total_seconds()) if expiresDelta else ACCESS_TOKEN_EXPIRE_MINUTES * 60
|
||||
response.set_cookie(
|
||||
key="auth_token",
|
||||
value=token,
|
||||
httponly=True,
|
||||
secure=True,
|
||||
samesite="strict",
|
||||
max_age=maxAge
|
||||
)
|
||||
|
||||
|
||||
def setRefreshTokenCookie(response: Response, token: str) -> None:
|
||||
"""Set refresh token as httpOnly cookie."""
|
||||
response.set_cookie(
|
||||
key="refresh_token",
|
||||
value=token,
|
||||
httponly=True,
|
||||
secure=True,
|
||||
samesite="strict",
|
||||
max_age=REFRESH_TOKEN_EXPIRE_DAYS * 24 * 60 * 60
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -6,7 +6,7 @@ Handles all token operations including automatic refresh for backend services.
|
|||
import logging
|
||||
import httpx
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
from typing import Optional, Dict, Any, Callable
|
||||
|
||||
from modules.interfaces.interfaceAppModel import Token, AuthAuthority
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
|
@ -198,4 +198,66 @@ class TokenManager:
|
|||
except Exception as e:
|
||||
logger.error(f"Error refreshing token: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
def ensure_fresh_token(self, token: Token, *, seconds_before_expiry: int = 30 * 60, save_callback: Optional[Callable[[Token], None]] = None) -> Optional[Token]:
|
||||
"""Ensure a token is fresh; refresh if expiring within threshold.
|
||||
|
||||
Args:
|
||||
token: Existing token to validate/refresh.
|
||||
seconds_before_expiry: Threshold window to proactively refresh.
|
||||
save_callback: Optional function to persist a refreshed token.
|
||||
|
||||
Returns:
|
||||
A fresh token (refreshed or original) or None if refresh failed.
|
||||
"""
|
||||
try:
|
||||
if token is None:
|
||||
return None
|
||||
|
||||
now_ts = get_utc_timestamp()
|
||||
expires_at = token.expiresAt or 0
|
||||
|
||||
# If token expires within the threshold, try to refresh
|
||||
if expires_at and expires_at < (now_ts + seconds_before_expiry):
|
||||
logger.info(
|
||||
f"ensure_fresh_token: Token for connection {token.connectionId} expiring soon "
|
||||
f"(in {max(0, expires_at - now_ts)}s). Attempting proactive refresh."
|
||||
)
|
||||
refreshed = self.refresh_token(token)
|
||||
if refreshed:
|
||||
if save_callback is not None:
|
||||
try:
|
||||
save_callback(refreshed)
|
||||
except Exception as e:
|
||||
logger.warning(f"ensure_fresh_token: Failed to persist refreshed token: {e}")
|
||||
return refreshed
|
||||
else:
|
||||
logger.warning("ensure_fresh_token: Token refresh failed")
|
||||
return None
|
||||
|
||||
# Token is sufficiently fresh
|
||||
return token
|
||||
except Exception as e:
|
||||
logger.error(f"ensure_fresh_token: Error ensuring fresh token: {e}")
|
||||
return None
|
||||
|
||||
# Convenience wrapper to fetch and ensure fresh token for a connection via interface layer
|
||||
def getFreshToken(self, interfaceApp, connectionId: str, secondsBeforeExpiry: int = 30 * 60) -> Optional[Token]:
|
||||
"""Return a fresh token for a connection, refreshing when expiring soon.
|
||||
|
||||
Reads the latest stored token via interfaceApp.getConnectionToken, then
|
||||
uses ensure_fresh_token to refresh if needed and persists the refreshed
|
||||
token via interfaceApp.saveConnectionToken.
|
||||
"""
|
||||
try:
|
||||
token = interfaceApp.getConnectionToken(connectionId)
|
||||
if not token:
|
||||
return None
|
||||
return self.ensure_fresh_token(
|
||||
token,
|
||||
seconds_before_expiry=secondsBeforeExpiry,
|
||||
save_callback=lambda t: interfaceApp.saveConnectionToken(t)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"getFreshToken: Error fetching or refreshing token for connection {connectionId}: {e}")
|
||||
return None
|
||||
|
|
@ -51,8 +51,8 @@ class TokenRefreshService:
|
|||
try:
|
||||
logger.debug(f"Refreshing Google token for connection {connection.id}")
|
||||
|
||||
# Get current token
|
||||
current_token = interface.getConnectionToken(connection.id, auto_refresh=False)
|
||||
# Get current token (no refresh in interface layer)
|
||||
current_token = interface.getConnectionToken(connection.id)
|
||||
if not current_token:
|
||||
logger.warning(f"No Google token found for connection {connection.id}")
|
||||
return False
|
||||
|
|
@ -100,8 +100,8 @@ class TokenRefreshService:
|
|||
try:
|
||||
logger.debug(f"Refreshing Microsoft token for connection {connection.id}")
|
||||
|
||||
# Get current token
|
||||
current_token = interface.getConnectionToken(connection.id, auto_refresh=False)
|
||||
# Get current token (no refresh in interface layer)
|
||||
current_token = interface.getConnectionToken(connection.id)
|
||||
if not current_token:
|
||||
logger.warning(f"No Microsoft token found for connection {connection.id}")
|
||||
return False
|
||||
|
|
|
|||
100
modules/services/__init__.py
Normal file
100
modules/services/__init__.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
from typing import Any
|
||||
|
||||
from modules.interfaces.interfaceAppModel import User
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow
|
||||
from modules.services.serviceWorkflows.mainServiceWorkflows import WorkflowService
|
||||
|
||||
class PublicService:
|
||||
"""Lightweight proxy exposing only public callable attributes of a target.
|
||||
|
||||
- Hides names starting with '_'
|
||||
- Optionally restricts to callables only
|
||||
- Optional name_filter predicate for allow-list patterns
|
||||
"""
|
||||
|
||||
def __init__(self, target: Any, functions_only: bool = True, name_filter=None):
|
||||
self._target = target
|
||||
self._functions_only = functions_only
|
||||
self._name_filter = name_filter
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
if name.startswith('_'):
|
||||
raise AttributeError(f"'{type(self._target).__name__}' attribute '{name}' is private")
|
||||
if self._name_filter and not self._name_filter(name):
|
||||
raise AttributeError(f"'{name}' not exposed by policy")
|
||||
attr = getattr(self._target, name)
|
||||
if self._functions_only and not callable(attr):
|
||||
raise AttributeError(f"'{name}' is not a function")
|
||||
return attr
|
||||
|
||||
def __dir__(self):
|
||||
names = [
|
||||
n for n in dir(self._target)
|
||||
if not n.startswith('_')
|
||||
and (not self._functions_only or callable(getattr(self._target, n, None)))
|
||||
and (self._name_filter(n) if self._name_filter else True)
|
||||
]
|
||||
return sorted(names)
|
||||
|
||||
|
||||
class Services:
|
||||
|
||||
def __init__(self, user: User, workflow: ChatWorkflow):
|
||||
self.user: User = user
|
||||
self.workflow: ChatWorkflow = workflow
|
||||
|
||||
# Directly expose existing service modules
|
||||
|
||||
from .serviceDocument.mainServiceDocumentExtraction import DocumentExtractionService
|
||||
self.document = PublicService(DocumentExtractionService(self))
|
||||
|
||||
from .serviceDocument.mainServiceDocumentGeneration import DocumentGenerationService
|
||||
self.document = PublicService(DocumentGenerationService(self))
|
||||
|
||||
from .serviceNeutralization.mainNeutralization import NeutralizationService
|
||||
self.neutralization = PublicService(NeutralizationService())
|
||||
|
||||
from .serviceSharepoint.mainSharepoint import SharePointService
|
||||
self.sharepoint = PublicService(SharePointService(self))
|
||||
|
||||
from .serviceAi.mainServiceAi import AiService
|
||||
self.ai = PublicService(AiService(self))
|
||||
|
||||
from .serviceWorkflows.mainServiceWorkflows import WorkflowService
|
||||
self.workflow = PublicService(WorkflowService(self))
|
||||
|
||||
# Initialize chat interface for workflow operations
|
||||
from modules.interfaces.interfaceChatObjects import getInterface as getChatInterface
|
||||
self.chatInterface = getChatInterface(user)
|
||||
|
||||
# Chat interface wrapper methods
|
||||
def getWorkflow(self, workflowId: str):
|
||||
return self.chatInterface.getWorkflow(workflowId)
|
||||
|
||||
def createWorkflow(self, workflowData: dict):
|
||||
return self.chatInterface.createWorkflow(workflowData)
|
||||
|
||||
def updateWorkflow(self, workflowId: str, workflowData: dict):
|
||||
return self.chatInterface.updateWorkflow(workflowId, workflowData)
|
||||
|
||||
def createMessage(self, messageData: dict):
|
||||
return self.chatInterface.createMessage(messageData)
|
||||
|
||||
def updateMessage(self, messageId: str, messageData: dict):
|
||||
return self.chatInterface.updateMessage(messageId, messageData)
|
||||
|
||||
def createLog(self, logData: dict):
|
||||
return self.chatInterface.createLog(logData)
|
||||
|
||||
def updateWorkflowStats(self, workflowId: str, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0):
|
||||
return self.chatInterface.updateWorkflowStats(workflowId, bytesSent, bytesReceived, tokenCount)
|
||||
|
||||
@property
|
||||
def mandateId(self):
|
||||
return self.chatInterface.mandateId
|
||||
|
||||
|
||||
def getInterface(user: User, workflow: ChatWorkflow) -> Services:
|
||||
return Services(user, workflow)
|
||||
|
||||
|
||||
137
modules/services/serviceAi/mainServiceAi.py
Normal file
137
modules/services/serviceAi/mainServiceAi.py
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
import logging
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
|
||||
from modules.interfaces.interfaceChatModel import ChatDocument
|
||||
from modules.services.serviceDocument.documentExtraction import DocumentExtractionService
|
||||
from modules.interfaces.interfaceAiModel import AiCallRequest, AiCallOptions
|
||||
from modules.interfaces.interfaceAiObjects import AiObjects
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Model registry is now provided by interfaces via AiModels
|
||||
|
||||
|
||||
class AiService:
|
||||
"""Centralized AI service orchestrating documents, model selection and failover.
|
||||
|
||||
The concrete connector instances (OpenAI/Anthropic) are injected by the interface layer.
|
||||
"""
|
||||
|
||||
def __init__(self, aiObjects: AiObjects | None = None) -> None:
|
||||
# Only depend on interfaces
|
||||
self.aiObjects = aiObjects or AiObjects()
|
||||
self.documentExtractor = DocumentExtractionService()
|
||||
|
||||
async def callAi(
|
||||
self,
|
||||
prompt: str,
|
||||
documents: Optional[List[ChatDocument]] = None,
|
||||
processDocumentsIndividually: bool = False,
|
||||
options: Optional[AiCallOptions] = None,
|
||||
) -> str:
|
||||
try:
|
||||
documentContent = ""
|
||||
if documents:
|
||||
documentContent = await self._processDocumentsForAi(
|
||||
documents,
|
||||
options.operationType if options else "general",
|
||||
options.compressContext if options else True,
|
||||
processDocumentsIndividually,
|
||||
)
|
||||
|
||||
effectiveOptions = options or AiCallOptions()
|
||||
request = AiCallRequest(
|
||||
prompt=prompt,
|
||||
context=documentContent or None,
|
||||
options=effectiveOptions,
|
||||
)
|
||||
|
||||
response = await self.aiObjects.call(request)
|
||||
return response.content
|
||||
except Exception as e:
|
||||
logger.error(f"Error in centralized AI call: {str(e)}")
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
# Model selection now handled by interface AiObjects
|
||||
|
||||
# Cost estimation handled by interface for model selection
|
||||
|
||||
async def _processDocumentsForAi(
|
||||
self,
|
||||
documents: List[ChatDocument],
|
||||
operationType: str,
|
||||
compressDocuments: bool,
|
||||
processIndividually: bool,
|
||||
) -> str:
|
||||
if not documents:
|
||||
return ""
|
||||
|
||||
processedContents: List[str] = []
|
||||
for doc in documents:
|
||||
try:
|
||||
extracted = await self.documentExtractor.processFileData(
|
||||
doc.fileData,
|
||||
doc.fileName,
|
||||
doc.mimeType,
|
||||
prompt=f"Extract relevant content for {operationType}",
|
||||
documentId=doc.id,
|
||||
enableAI=True,
|
||||
)
|
||||
|
||||
docContent: List[str] = []
|
||||
for contentItem in extracted.contents:
|
||||
if contentItem.data and contentItem.data.strip():
|
||||
docContent.append(contentItem.data)
|
||||
|
||||
if docContent:
|
||||
combinedDocContent = "\n\n".join(docContent)
|
||||
if (
|
||||
compressDocuments
|
||||
and len(combinedDocContent.encode("utf-8")) > 10000
|
||||
):
|
||||
combinedDocContent = await self._compressContent(
|
||||
combinedDocContent, 10000, "document"
|
||||
)
|
||||
processedContents.append(
|
||||
f"Document: {doc.fileName}\n{combinedDocContent}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Error processing document {doc.fileName}: {str(e)}"
|
||||
)
|
||||
processedContents.append(
|
||||
f"Document: {doc.fileName}\n[Error processing document: {str(e)}]"
|
||||
)
|
||||
|
||||
return "\n\n---\n\n".join(processedContents)
|
||||
|
||||
# Prompt/context optimization (compression) handled by interface
|
||||
|
||||
async def _compressContent(self, content: str, targetSize: int, contentType: str) -> str:
|
||||
if len(content.encode("utf-8")) <= targetSize:
|
||||
return content
|
||||
|
||||
try:
|
||||
compressionPrompt = f"""
|
||||
Komprimiere den folgenden {contentType} auf maximal {targetSize} Zeichen,
|
||||
behalte aber alle wichtigen Informationen bei:
|
||||
|
||||
{content}
|
||||
|
||||
Gib nur den komprimierten Inhalt zurück, ohne zusätzliche Erklärungen.
|
||||
"""
|
||||
|
||||
# Service must not call connectors directly; use simple truncation fallback here
|
||||
data = content.encode("utf-8")
|
||||
return data[:targetSize].decode("utf-8", errors="ignore") + "... [truncated]"
|
||||
except Exception as e:
|
||||
logger.warning(f"AI compression failed, using truncation: {str(e)}")
|
||||
return content[:targetSize] + "... [truncated]"
|
||||
|
||||
# Failover logic now centralized in interface via model selection; service delegates a single call
|
||||
|
||||
# Fallback selection moved to interface; service doesn't select models directly
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -22,7 +22,7 @@ from modules.interfaces.interfaceChatModel import (
|
|||
ContentItem,
|
||||
ContentMetadata
|
||||
)
|
||||
from modules.services.serviceNeutralization.neutralizer import DataAnonymizer
|
||||
from modules.services.serviceNeutralization.mainNeutralization import NeutralizationService
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -36,13 +36,16 @@ class FileProcessingError(Exception):
|
|||
"""Custom exception for file processing errors."""
|
||||
pass
|
||||
|
||||
class DocumentExtraction:
|
||||
class DocumentExtractionService:
|
||||
"""Processor for handling document operations and content extraction."""
|
||||
|
||||
def __init__(self, serviceCenter=None):
|
||||
"""Initialize the document processor."""
|
||||
self._neutralizer = DataAnonymizer() if APP_CONFIG.get("ENABLE_CONTENT_NEUTRALIZATION", False) else None
|
||||
self._neutralizer = NeutralizationService() if APP_CONFIG.get("ENABLE_CONTENT_NEUTRALIZATION", False) else None
|
||||
self._serviceCenter = serviceCenter
|
||||
# Centralized services interface (for AI)
|
||||
from modules.services import getInterface as getServices
|
||||
self.services = getServices(serviceCenter.user, serviceCenter.workflow)
|
||||
|
||||
self.supportedTypes: Dict[str, Callable[[bytes, str, str], Awaitable[List[ContentItem]]]] = {
|
||||
# Text and data files
|
||||
|
|
@ -218,10 +221,6 @@ class DocumentExtraction:
|
|||
# This should never be reached, but just in case
|
||||
raise FileProcessingError(f"Failed to decode {fileName} with any encoding")
|
||||
|
||||
def initialize(self) -> None:
|
||||
"""Initialize the document processor."""
|
||||
pass
|
||||
|
||||
def _loadPdfExtractor(self):
|
||||
"""Loads PDF extraction libraries when needed"""
|
||||
global pdfExtractorLoaded
|
||||
|
|
@ -1132,18 +1131,18 @@ class DocumentExtraction:
|
|||
# Create a basic content item explaining the limitation
|
||||
info_content = f"""Legacy Word Document (.doc) - {fileName}
|
||||
|
||||
Note: This is a legacy .doc format file. For better content extraction,
|
||||
consider converting to .docx format.
|
||||
Note: This is a legacy .doc format file. For better content extraction,
|
||||
consider converting to .docx format.
|
||||
|
||||
File size: {len(fileData)} bytes
|
||||
Format: Microsoft Word 97-2003 Document
|
||||
File size: {len(fileData)} bytes
|
||||
Format: Microsoft Word 97-2003 Document
|
||||
|
||||
Content extraction from .doc files requires specialized tools like:
|
||||
- antiword (Linux/Unix)
|
||||
- catdoc (Linux/Unix)
|
||||
- Microsoft Word (for conversion)
|
||||
Content extraction from .doc files requires specialized tools like:
|
||||
- antiword (Linux/Unix)
|
||||
- catdoc (Linux/Unix)
|
||||
- Microsoft Word (for conversion)
|
||||
|
||||
The raw binary content is available but not human-readable."""
|
||||
The raw binary content is available but not human-readable."""
|
||||
|
||||
contentItems.append(ContentItem(
|
||||
label="info",
|
||||
|
|
@ -1183,18 +1182,18 @@ The raw binary content is available but not human-readable."""
|
|||
# Create a basic content item explaining the limitation
|
||||
info_content = f"""Legacy Excel Document (.xls) - {fileName}
|
||||
|
||||
Note: This is a legacy .xls format file. For better content extraction,
|
||||
consider converting to .xlsx format.
|
||||
Note: This is a legacy .xls format file. For better content extraction,
|
||||
consider converting to .xlsx format.
|
||||
|
||||
File size: {len(fileData)} bytes
|
||||
Format: Microsoft Excel 97-2003 Workbook
|
||||
File size: {len(fileData)} bytes
|
||||
Format: Microsoft Excel 97-2003 Workbook
|
||||
|
||||
Content extraction from .xls files requires specialized tools like:
|
||||
- xlrd (Python library)
|
||||
- Microsoft Excel (for conversion)
|
||||
- LibreOffice (for conversion)
|
||||
Content extraction from .xls files requires specialized tools like:
|
||||
- xlrd (Python library)
|
||||
- Microsoft Excel (for conversion)
|
||||
- LibreOffice (for conversion)
|
||||
|
||||
The raw binary content is available but not human-readable."""
|
||||
The raw binary content is available but not human-readable."""
|
||||
|
||||
contentItems.append(ContentItem(
|
||||
label="info",
|
||||
|
|
@ -1234,18 +1233,18 @@ The raw binary content is available but not human-readable."""
|
|||
# Create a basic content item explaining the limitation
|
||||
info_content = f"""Legacy PowerPoint Document (.ppt) - {fileName}
|
||||
|
||||
Note: This is a legacy .ppt format file. For better content extraction,
|
||||
consider converting to .pptx format.
|
||||
Note: This is a legacy .ppt format file. For better content extraction,
|
||||
consider converting to .pptx format.
|
||||
|
||||
File size: {len(fileData)} bytes
|
||||
Format: Microsoft PowerPoint 97-2003 Presentation
|
||||
File size: {len(fileData)} bytes
|
||||
Format: Microsoft PowerPoint 97-2003 Presentation
|
||||
|
||||
Content extraction from .ppt files requires specialized tools like:
|
||||
- python-pptx (limited support for .ppt)
|
||||
- Microsoft PowerPoint (for conversion)
|
||||
- LibreOffice (for conversion)
|
||||
Content extraction from .ppt files requires specialized tools like:
|
||||
- python-pptx (limited support for .ppt)
|
||||
- Microsoft PowerPoint (for conversion)
|
||||
- LibreOffice (for conversion)
|
||||
|
||||
The raw binary content is available but not human-readable."""
|
||||
The raw binary content is available but not human-readable."""
|
||||
|
||||
contentItems.append(ContentItem(
|
||||
label="info",
|
||||
|
|
@ -1417,11 +1416,7 @@ The raw binary content is available but not human-readable."""
|
|||
# Process with AI based on content type
|
||||
try:
|
||||
if mimeType.startswith('image/') and mimeType != "image/svg+xml":
|
||||
# For images (excluding SVG), extract meaningful content as text
|
||||
# Use AI to analyze the image and extract relevant information
|
||||
|
||||
|
||||
# Create a specific prompt for image content extraction
|
||||
# For images (excluding SVG), analyze via centralized AI service
|
||||
imagePrompt = f"""
|
||||
Analyze this image and extract the actual content and information from it.
|
||||
Focus on extracting text, data, charts, diagrams, or any meaningful content.
|
||||
|
|
@ -1430,8 +1425,19 @@ The raw binary content is available but not human-readable."""
|
|||
|
||||
Original prompt: {prompt}
|
||||
"""
|
||||
|
||||
processedContent = await self._serviceCenter.callAiImageBasic(imagePrompt, chunk, mimeType)
|
||||
from modules.interfaces.interfaceChatModel import ChatDocument
|
||||
image_doc = ChatDocument(fileData=chunk, fileName="image", mimeType=mimeType)
|
||||
processedContent = await self.services.ai.callAi(
|
||||
prompt=imagePrompt,
|
||||
documents=[image_doc],
|
||||
options={
|
||||
"process_type": "image",
|
||||
"operation_type": "analyse_content",
|
||||
"priority": "balanced",
|
||||
"compress_documents": True,
|
||||
"max_cost": 0.03
|
||||
}
|
||||
)
|
||||
else:
|
||||
# For text content (including SVG), use text AI service
|
||||
# Neutralize content if neutralizer is enabled (only for text)
|
||||
|
|
@ -1456,7 +1462,36 @@ The raw binary content is available but not human-readable."""
|
|||
# For code files, preserve the complete content without AI processing
|
||||
processedContent = contentToProcess
|
||||
else:
|
||||
processedContent = await self._serviceCenter.callAiTextBasic(aiPrompt, contentToProcess)
|
||||
if self.services and hasattr(self.services, 'ai'):
|
||||
processedContent = await self.services.ai.callAi(
|
||||
prompt=aiPrompt,
|
||||
documents=None,
|
||||
options={
|
||||
"process_type": "text",
|
||||
"operation_type": "analyse_content",
|
||||
"priority": "balanced",
|
||||
"compress_prompt": True,
|
||||
"compress_documents": False,
|
||||
"processing_mode": "advanced",
|
||||
"max_cost": 0.05,
|
||||
"max_processing_time": 30
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Fallback to basic AI processing with centralized service
|
||||
processedContent = await self.services.ai.callAi(
|
||||
prompt=aiPrompt,
|
||||
documents=None,
|
||||
options={
|
||||
"process_type": "text",
|
||||
"operation_type": "analyse_content",
|
||||
"priority": "speed",
|
||||
"compress_prompt": True,
|
||||
"compress_documents": False,
|
||||
"max_cost": 0.01,
|
||||
"max_processing_time": 15
|
||||
}
|
||||
)
|
||||
|
||||
chunkResults.append(processedContent)
|
||||
except Exception as aiError:
|
||||
|
|
@ -13,7 +13,7 @@ from modules.services.serviceDocument.documentUtility import (
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DocumentGenerator:
|
||||
class DocumentGenerationService:
|
||||
def __init__(self, service):
|
||||
self.service = service
|
||||
|
||||
206
modules/services/serviceNeutralization/mainNeutralization.py
Normal file
206
modules/services/serviceNeutralization/mainNeutralization.py
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
"""
|
||||
Data Neutralization Service
|
||||
Handles file processing for data neutralization including SharePoint integration
|
||||
DSGVO-konformer Daten-Neutralisierer für KI-Agentensysteme
|
||||
Unterstützt TXT, JSON, CSV, Excel und Word-Dateien
|
||||
Mehrsprachig: DE, EN, FR, IT
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
import os
|
||||
import uuid
|
||||
import json
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import mimetypes
|
||||
|
||||
from modules.interfaces.interfaceAppObjects import getInterface
|
||||
from modules.interfaces.interfaceAppModel import User, DataNeutraliserConfig, DataNeutralizerAttributes
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
|
||||
# Import all necessary classes and functions for neutralization
|
||||
from modules.services.serviceNeutralization.subProcessCommon import ProcessResult, CommonUtils, NeutralizationResult, NeutralizationAttribute
|
||||
from modules.services.serviceNeutralization.subProcessText import TextProcessor, PlainText
|
||||
from modules.services.serviceNeutralization.subProcessList import ListProcessor, TableData
|
||||
from modules.services.serviceNeutralization.subProcessBinary import BinaryProcessor, BinaryData
|
||||
from modules.services.serviceNeutralization.subParseString import StringParser
|
||||
from modules.services.serviceNeutralization.subPatterns import Pattern, HeaderPatterns, DataPatterns, TextTablePatterns
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class NeutralizationService:
|
||||
"""Service for handling data neutralization operations"""
|
||||
|
||||
def __init__(self, current_user: User = None, names_to_parse: List[str] = None):
|
||||
"""Initialize the service with user context and anonymization processors
|
||||
|
||||
Args:
|
||||
current_user: User object for context (optional for basic neutralization)
|
||||
names_to_parse: List of names to parse and replace (case-insensitive)
|
||||
"""
|
||||
self.current_user = current_user
|
||||
self.app_interface = getInterface(current_user) if current_user else None
|
||||
|
||||
# Initialize anonymization processors
|
||||
self.names_to_parse = names_to_parse or []
|
||||
self.textProcessor = TextProcessor(names_to_parse)
|
||||
self.listProcessor = ListProcessor(names_to_parse)
|
||||
self.binaryProcessor = BinaryProcessor()
|
||||
self.commonUtils = CommonUtils()
|
||||
|
||||
def getConfig(self) -> Optional[DataNeutraliserConfig]:
|
||||
"""Get the neutralization configuration for the current user's mandate"""
|
||||
if not self.app_interface:
|
||||
return None
|
||||
return self.app_interface.getNeutralizationConfig()
|
||||
|
||||
def saveConfig(self, config_data: Dict[str, Any]) -> DataNeutraliserConfig:
|
||||
"""Save or update the neutralization configuration"""
|
||||
if not self.app_interface:
|
||||
raise ValueError("User context required for saving configuration")
|
||||
return self.app_interface.createOrUpdateNeutralizationConfig(config_data)
|
||||
|
||||
# Public API: process text or file
|
||||
|
||||
def processText(self, text: str) -> Dict[str, Any]:
|
||||
"""Neutralize a raw text string and return a standard result dict."""
|
||||
return self._neutralizeText(text, 'text')
|
||||
|
||||
def processFile(self, fileId: str) -> Dict[str, Any]:
|
||||
"""Neutralize a file referenced by its fileId using app interface."""
|
||||
if not self.app_interface:
|
||||
raise ValueError("User context is required to process a file by fileId")
|
||||
# Fetch file data and metadata
|
||||
fileInfo = None
|
||||
try:
|
||||
# getFile returns an object; fallback to dict-like
|
||||
fileInfo = self.app_interface.getFile(fileId)
|
||||
except Exception:
|
||||
fileInfo = None
|
||||
fileName = getattr(fileInfo, 'fileName', None) if fileInfo else None
|
||||
mimeType = getattr(fileInfo, 'mimeType', None) if fileInfo else None
|
||||
fileData = self.app_interface.getFileData(fileId)
|
||||
if not fileData:
|
||||
raise ValueError(f"No file data found for fileId: {fileId}")
|
||||
|
||||
# Determine textType from mime
|
||||
textType = self._getContentTypeFromMime(mimeType or '')
|
||||
|
||||
# Decode to text
|
||||
try:
|
||||
textContent = fileData.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
decoded = None
|
||||
for enc in ['latin-1', 'cp1252', 'iso-8859-1']:
|
||||
try:
|
||||
decoded = fileData.decode(enc)
|
||||
break
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
if decoded is None:
|
||||
raise ValueError("Unable to decode file content")
|
||||
textContent = decoded
|
||||
|
||||
result = self._neutralizeText(textContent, textType)
|
||||
# Add a reasonable output filename if original known
|
||||
if fileName:
|
||||
result['neutralized_file_name'] = f"neutralized_{fileName}"
|
||||
result['file_id'] = fileId
|
||||
return result
|
||||
|
||||
def resolveText(self, text: str) -> str:
|
||||
if not self.app_interface:
|
||||
return text
|
||||
try:
|
||||
placeholder_pattern = r'\[([a-z]+)\.([a-f0-9-]{36})\]'
|
||||
matches = re.findall(placeholder_pattern, text)
|
||||
resolved_text = text
|
||||
for placeholder_type, uid in matches:
|
||||
attributes = self.app_interface.db.getRecordset(
|
||||
DataNeutralizerAttributes,
|
||||
recordFilter={
|
||||
"mandateId": self.app_interface.mandateId,
|
||||
"id": uid
|
||||
}
|
||||
)
|
||||
if attributes:
|
||||
attribute = attributes[0]
|
||||
placeholder = f"[{placeholder_type}.{uid}]"
|
||||
resolved_text = resolved_text.replace(placeholder, attribute["originalText"])
|
||||
return resolved_text
|
||||
except Exception:
|
||||
return text
|
||||
|
||||
# Helper functions
|
||||
|
||||
def _neutralizeText(self, text: str, textType: str = None) -> Dict[str, Any]:
|
||||
"""Process text and return unified dict for API consumption."""
|
||||
try:
|
||||
# Auto-detect content type if not provided
|
||||
if textType is None:
|
||||
textType = self.commonUtils.detect_content_type(text)
|
||||
|
||||
# Check if content is binary data
|
||||
if self.binaryProcessor.is_binary_content(text):
|
||||
data, mapping, replaced_fields, processed_info = self.binaryProcessor.process_binary_content(text)
|
||||
neutralized_text = text if isinstance(data, str) else str(data)
|
||||
attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()]
|
||||
return NeutralizationResult(
|
||||
neutralized_text=neutralized_text,
|
||||
mapping=mapping,
|
||||
attributes=attributes,
|
||||
processed_info=processed_info
|
||||
).model_dump()
|
||||
|
||||
# Inline former _processData routing
|
||||
if textType in ['csv', 'json', 'xml']:
|
||||
if textType == 'csv':
|
||||
data, mapping, replaced_fields, processed_info = self.listProcessor.process_csv_content(text)
|
||||
elif textType == 'json':
|
||||
data, mapping, replaced_fields, processed_info = self.listProcessor.process_json_content(text)
|
||||
else: # xml
|
||||
data, mapping, replaced_fields, processed_info = self.listProcessor.process_xml_content(text)
|
||||
else:
|
||||
data, mapping, replaced_fields, processed_info = self.textProcessor.process_text_content(text)
|
||||
# Stringify data consistently
|
||||
if textType == 'csv':
|
||||
try:
|
||||
neutralized_text = data.to_csv(index=False)
|
||||
except Exception:
|
||||
neutralized_text = str(data)
|
||||
elif textType == 'json':
|
||||
neutralized_text = json.dumps(data, ensure_ascii=False)
|
||||
elif textType == 'xml':
|
||||
neutralized_text = str(data)
|
||||
else:
|
||||
neutralized_text = str(data)
|
||||
|
||||
attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()]
|
||||
return NeutralizationResult(
|
||||
neutralized_text=neutralized_text,
|
||||
mapping=mapping,
|
||||
attributes=attributes,
|
||||
processed_info=processed_info
|
||||
).model_dump()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing content: {str(e)}")
|
||||
return NeutralizationResult(
|
||||
neutralized_text='',
|
||||
mapping={},
|
||||
attributes=[],
|
||||
processed_info={'type': 'error', 'error': str(e)}
|
||||
).model_dump()
|
||||
|
||||
def _getContentTypeFromMime(self, mime_type: str) -> str:
|
||||
"""Determine content type from MIME type for neutralization processing"""
|
||||
if mime_type.startswith('text/'):
|
||||
return 'text'
|
||||
elif mime_type in ['application/json', 'application/xml', 'text/xml']:
|
||||
return 'json' if 'json' in mime_type else 'xml'
|
||||
elif mime_type in ['text/csv', 'application/csv']:
|
||||
return 'csv'
|
||||
else:
|
||||
return 'text' # Default to text processing
|
||||
|
|
@ -1,112 +0,0 @@
|
|||
"""
|
||||
DSGVO-konformer Daten-Neutralisierer für KI-Agentensysteme
|
||||
Unterstützt TXT, JSON, CSV, Excel und Word-Dateien
|
||||
Mehrsprachig: DE, EN, FR, IT
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Any
|
||||
|
||||
# Import all necessary classes and functions
|
||||
from modules.services.serviceNeutralization.subProcessCommon import ProcessResult, CommonUtils
|
||||
from modules.services.serviceNeutralization.subProcessText import TextProcessor, PlainText
|
||||
from modules.services.serviceNeutralization.subProcessList import ListProcessor, TableData
|
||||
from modules.services.serviceNeutralization.subProcessBinary import BinaryProcessor, BinaryData
|
||||
from modules.services.serviceNeutralization.subParseString import StringParser
|
||||
from modules.services.serviceNeutralization.subPatterns import Pattern, HeaderPatterns, DataPatterns, TextTablePatterns
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Export all classes and functions for external use
|
||||
__all__ = [
|
||||
'DataAnonymizer',
|
||||
'ProcessResult',
|
||||
'CommonUtils',
|
||||
'TextProcessor',
|
||||
'PlainText',
|
||||
'ListProcessor',
|
||||
'TableData',
|
||||
'BinaryProcessor',
|
||||
'BinaryData',
|
||||
'StringParser',
|
||||
'Pattern',
|
||||
'HeaderPatterns',
|
||||
'DataPatterns',
|
||||
'TextTablePatterns'
|
||||
]
|
||||
|
||||
class DataAnonymizer:
|
||||
"""Hauptklasse für die Datenanonymisierung"""
|
||||
|
||||
def __init__(self, names_to_parse: List[str] = None):
|
||||
"""Initialize the anonymizer with specialized processors
|
||||
|
||||
Args:
|
||||
names_to_parse: List of names to parse and replace (case-insensitive)
|
||||
"""
|
||||
self.names_to_parse = names_to_parse or []
|
||||
|
||||
# Initialize specialized processors
|
||||
self.text_processor = TextProcessor(names_to_parse)
|
||||
self.list_processor = ListProcessor(names_to_parse)
|
||||
self.binary_processor = BinaryProcessor()
|
||||
|
||||
# Common utilities
|
||||
self.common_utils = CommonUtils()
|
||||
|
||||
def process_content(self, content: str, content_type: str = None) -> ProcessResult:
|
||||
"""
|
||||
Process content and return anonymized data
|
||||
|
||||
Args:
|
||||
content: Content to process
|
||||
content_type: Type of content ('csv', 'json', 'xml', 'text', 'binary')
|
||||
If None, will auto-detect
|
||||
|
||||
Returns:
|
||||
ProcessResult: Contains anonymized data, mapping, replaced fields and processing info
|
||||
"""
|
||||
try:
|
||||
# Auto-detect content type if not provided
|
||||
if content_type is None:
|
||||
content_type = self.common_utils.detect_content_type(content)
|
||||
|
||||
# Check if content is binary data
|
||||
if self.binary_processor.is_binary_content(content):
|
||||
return self.binary_processor.process_binary_content(content)
|
||||
|
||||
# Route to appropriate processor based on content type
|
||||
if content_type in ['csv', 'json', 'xml']:
|
||||
if content_type == 'csv':
|
||||
result, mapping, replaced_fields, processed_info = self.list_processor.process_csv_content(content)
|
||||
elif content_type == 'json':
|
||||
result, mapping, replaced_fields, processed_info = self.list_processor.process_json_content(content)
|
||||
else: # xml
|
||||
result, mapping, replaced_fields, processed_info = self.list_processor.process_xml_content(content)
|
||||
|
||||
return ProcessResult(result, mapping, replaced_fields, processed_info)
|
||||
else:
|
||||
# Handle as text
|
||||
result, mapping, replaced_fields, processed_info = self.text_processor.process_text_content(content)
|
||||
return ProcessResult(result, mapping, replaced_fields, processed_info)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing content: {str(e)}")
|
||||
return ProcessResult(None, {}, [], {'type': 'error', 'error': str(e)})
|
||||
|
||||
def get_mapping(self) -> Dict[str, str]:
|
||||
"""
|
||||
Get the combined mapping from all processors
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: Combined mapping dictionary
|
||||
"""
|
||||
text_mapping = self.text_processor.get_mapping()
|
||||
list_mapping = self.list_processor.get_mapping()
|
||||
return self.common_utils.merge_mappings(text_mapping, list_mapping)
|
||||
|
||||
def clear_mapping(self):
|
||||
"""Clear the mapping in all processors"""
|
||||
self.text_processor.clear_mapping()
|
||||
self.list_processor.clear_mapping()
|
||||
|
|
@ -1,91 +0,0 @@
|
|||
# Neutralizer Module Structure
|
||||
|
||||
This module provides DSGVO-compliant data anonymization for AI agent systems. The code has been refactored into specialized sub-modules for better maintainability and code reuse.
|
||||
|
||||
## Module Overview
|
||||
|
||||
### Core Module
|
||||
- **`neutralizer.py`** - Main DataAnonymizer class that orchestrates all processing
|
||||
|
||||
### Specialized Processors
|
||||
- **`subProcessText.py`** - Handles plain text processing without header information
|
||||
- **`subProcessList.py`** - Handles structured data with headers (CSV, JSON, XML)
|
||||
- **`subProcessBinary.py`** - Handles binary data types (images, audio, video, etc.)
|
||||
|
||||
### Utility Modules
|
||||
- **`subParseString.py`** - String parsing and replacement utilities for emails, phones, addresses, IDs and names
|
||||
- **`subProcessCommon.py`** - Common utilities and data structures shared across modules
|
||||
- **`patterns.py`** - Pattern definitions for data anonymization
|
||||
|
||||
## Key Features
|
||||
|
||||
### 1. Modular Architecture
|
||||
- **Separation of Concerns**: Each module handles a specific type of data processing
|
||||
- **Code Reuse**: Common functionality is centralized in utility modules
|
||||
- **Maintainability**: Easier to modify and extend individual components
|
||||
|
||||
### 2. Processing Order
|
||||
1. **Pattern-based matches** (emails, phones, addresses, etc.) are processed FIRST
|
||||
2. **Custom names** from the user list are processed SECOND
|
||||
3. **Already anonymized content** (placeholders) is skipped
|
||||
|
||||
### 3. Supported Data Types
|
||||
- **Text**: Plain text documents, emails, etc.
|
||||
- **Structured Data**: CSV, JSON, XML with headers
|
||||
- **Binary Data**: Images, audio, video (framework ready, implementation pending)
|
||||
|
||||
### 4. Placeholder Protection
|
||||
- Prevents re-anonymization of already processed content
|
||||
- Uses format `[tag.uuid]` for placeholders
|
||||
- Validates placeholder format before processing
|
||||
|
||||
## Usage Example
|
||||
|
||||
```python
|
||||
from modules.neutralizer import DataAnonymizer
|
||||
|
||||
# Initialize with custom names
|
||||
anonymizer = DataAnonymizer(names_to_parse=['John Doe', 'Jane Smith'])
|
||||
|
||||
# Process content (auto-detects type)
|
||||
result = anonymizer.process_content(content, content_type='text')
|
||||
|
||||
# Or specify content type explicitly
|
||||
result = anonymizer.process_content(content, content_type='csv')
|
||||
|
||||
# Get mapping of original values to placeholders
|
||||
mapping = anonymizer.get_mapping()
|
||||
```
|
||||
|
||||
## Module Dependencies
|
||||
|
||||
```
|
||||
neutralizer.py
|
||||
├── subProcessCommon.py (ProcessResult, CommonUtils)
|
||||
├── subProcessText.py (TextProcessor)
|
||||
├── subProcessList.py (ListProcessor)
|
||||
├── subProcessBinary.py (BinaryProcessor)
|
||||
└── patterns.py (Pattern definitions)
|
||||
|
||||
subProcessText.py
|
||||
└── subParseString.py (StringParser)
|
||||
|
||||
subProcessList.py
|
||||
├── subParseString.py (StringParser)
|
||||
└── patterns.py (HeaderPatterns)
|
||||
|
||||
subProcessBinary.py
|
||||
└── (standalone)
|
||||
|
||||
subParseString.py
|
||||
└── patterns.py (DataPatterns)
|
||||
```
|
||||
|
||||
## Benefits of New Structure
|
||||
|
||||
1. **Single Responsibility**: Each module has one clear purpose
|
||||
2. **DRY Principle**: No code duplication across modules
|
||||
3. **Testability**: Individual modules can be tested in isolation
|
||||
4. **Extensibility**: Easy to add new data types or processing methods
|
||||
5. **Maintainability**: Changes to one module don't affect others
|
||||
6. **Performance**: Specialized processors are optimized for their data types
|
||||
|
|
@ -5,6 +5,7 @@ Shared functions and data structures
|
|||
|
||||
import re
|
||||
from typing import Dict, List, Any, Union, Optional
|
||||
from pydantic import BaseModel
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
|
|
@ -15,6 +16,19 @@ class ProcessResult:
|
|||
replaced_fields: List[str]
|
||||
processed_info: Dict[str, Any] # Additional processing information
|
||||
|
||||
class NeutralizationAttribute(BaseModel):
|
||||
"""Single attribute describing a replacement mapping."""
|
||||
original: str
|
||||
placeholder: str
|
||||
patternType: Optional[str] = None
|
||||
|
||||
class NeutralizationResult(BaseModel):
|
||||
"""Unified result for all content types, suitable for API responses."""
|
||||
neutralized_text: str
|
||||
mapping: Dict[str, str]
|
||||
attributes: List[NeutralizationAttribute]
|
||||
processed_info: Dict[str, Any]
|
||||
|
||||
class CommonUtils:
|
||||
"""Common utility functions for data processing"""
|
||||
|
||||
|
|
|
|||
|
|
@ -82,7 +82,8 @@ class TextProcessor:
|
|||
# Get processing information
|
||||
processed_info = {
|
||||
'type': 'text',
|
||||
'tables': [{'headers': t.headers, 'row_count': len(t.rows)} for t in tables] if hasattr(tables[0], 'headers') else []
|
||||
'tables': ([{'headers': t.headers, 'row_count': len(t.rows)} for t in tables]
|
||||
if tables else [])
|
||||
}
|
||||
|
||||
return result, self.string_parser.get_mapping(), [], processed_info
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ from datetime import datetime, UTC
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ConnectorSharepoint:
|
||||
class SharepointService:
|
||||
"""SharePoint connector using Microsoft Graph API for reliable authentication."""
|
||||
|
||||
def __init__(self, access_token: str):
|
||||
546
modules/services/serviceWorkflows/mainServiceWorkflows.py
Normal file
546
modules/services/serviceWorkflows/mainServiceWorkflows.py
Normal file
|
|
@ -0,0 +1,546 @@
|
|||
import logging
|
||||
import uuid
|
||||
from typing import Dict, Any, List, Optional
|
||||
from modules.interfaces.interfaceAppModel import User, UserConnection
|
||||
from modules.interfaces.interfaceChatModel import ChatDocument, ChatMessage, ExtractedContent
|
||||
from modules.services.serviceDocument.documentExtraction import DocumentExtractionService
|
||||
from modules.services.serviceDocument.documentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class WorkflowService:
|
||||
"""Service class containing methods for document processing, chat operations, and workflow management"""
|
||||
|
||||
def __init__(self, service_center):
|
||||
self.service_center = service_center
|
||||
self.user = service_center.user
|
||||
self.workflow = service_center.workflow
|
||||
self.interfaceChat = service_center.interfaceChat
|
||||
self.interfaceComponent = service_center.interfaceComponent
|
||||
self.interfaceApp = service_center.interfaceApp
|
||||
self.documentProcessor = service_center.documentProcessor
|
||||
# Centralized services interface (for AI)
|
||||
from modules.services import getInterface as getServices
|
||||
self.services = getServices(self.user, self.workflow)
|
||||
|
||||
async def summarizeChat(self, messages: List[ChatMessage]) -> str:
|
||||
"""
|
||||
Summarize chat messages from last to first message with status="first"
|
||||
|
||||
Args:
|
||||
messages: List of chat messages to summarize
|
||||
|
||||
Returns:
|
||||
str: Summary of the chat in user's language
|
||||
"""
|
||||
try:
|
||||
# Get messages from last to first, stopping at first message with status="first"
|
||||
relevantMessages = []
|
||||
for msg in reversed(messages):
|
||||
relevantMessages.append(msg)
|
||||
if msg.status == "first":
|
||||
break
|
||||
|
||||
# Create prompt for AI
|
||||
prompt = f"""You are an AI assistant providing a summary of a chat conversation.
|
||||
Please respond in '{self.user.language}' language.
|
||||
|
||||
Chat History:
|
||||
{chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))}
|
||||
|
||||
Instructions:
|
||||
1. Summarize the conversation's key points and outcomes
|
||||
2. Be concise but informative
|
||||
3. Use a professional but friendly tone
|
||||
4. Focus on important decisions and next steps if any
|
||||
|
||||
Please provide a comprehensive summary of this conversation."""
|
||||
|
||||
# Get summary using centralized AI (speed priority)
|
||||
return await self.services.ai.callAi(
|
||||
prompt=prompt,
|
||||
documents=None,
|
||||
options={
|
||||
"process_type": "text",
|
||||
"operation_type": "generate_content",
|
||||
"priority": "speed",
|
||||
"compress_prompt": True,
|
||||
"compress_documents": False,
|
||||
"max_cost": 0.01
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error summarizing chat: {str(e)}")
|
||||
return f"Error summarizing chat: {str(e)}"
|
||||
|
||||
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
|
||||
"""Get ChatDocuments from a list of document references using all three formats."""
|
||||
try:
|
||||
all_documents = []
|
||||
for doc_ref in documentList:
|
||||
if doc_ref.startswith("docItem:"):
|
||||
# docItem:<id>:<filename> - extract ID and find document
|
||||
parts = doc_ref.split(':')
|
||||
if len(parts) >= 2:
|
||||
doc_id = parts[1]
|
||||
# Find the document by ID
|
||||
for message in self.workflow.messages:
|
||||
if message.documents:
|
||||
for doc in message.documents:
|
||||
if doc.id == doc_id:
|
||||
doc_name = getattr(doc, 'fileName', 'unknown')
|
||||
logger.debug(f"Found docItem reference {doc_ref}: {doc_name}")
|
||||
all_documents.append(doc)
|
||||
break
|
||||
elif doc_ref.startswith("docList:"):
|
||||
# docList:<messageId>:<label> or docList:<label> - extract message ID and find document list
|
||||
parts = doc_ref.split(':')
|
||||
if len(parts) >= 3:
|
||||
# Format: docList:<messageId>:<label>
|
||||
message_id = parts[1]
|
||||
label = parts[2]
|
||||
# Find the message by ID and get all its documents
|
||||
for message in self.workflow.messages:
|
||||
if str(message.id) == message_id:
|
||||
if message.documents:
|
||||
doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')]
|
||||
logger.debug(f"Found docList reference {doc_ref}: {len(message.documents)} documents - {doc_names}")
|
||||
all_documents.extend(message.documents)
|
||||
else:
|
||||
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
|
||||
break
|
||||
elif len(parts) >= 2:
|
||||
# Format: docList:<label> - find message by documentsLabel
|
||||
label = parts[1]
|
||||
logger.debug(f"Looking for message with documentsLabel: {label}")
|
||||
# Find messages with matching documentsLabel
|
||||
matching_messages = []
|
||||
for message in self.workflow.messages:
|
||||
# Check both attribute and raw data for documentsLabel
|
||||
msg_label = getattr(message, 'documentsLabel', None)
|
||||
if msg_label == label:
|
||||
matching_messages.append(message)
|
||||
logger.debug(f"Found message {message.id} with matching documentsLabel: {msg_label}")
|
||||
else:
|
||||
# Debug: show what labels we're comparing
|
||||
logger.debug(f"Message {message.id} has documentsLabel: '{msg_label}' (looking for: '{label}')")
|
||||
|
||||
if matching_messages:
|
||||
# Use the newest message (highest publishedAt)
|
||||
matching_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
|
||||
newest_message = matching_messages[0]
|
||||
|
||||
if newest_message.documents:
|
||||
doc_names = [doc.fileName for doc in newest_message.documents if hasattr(doc, 'fileName')]
|
||||
logger.debug(f"Found docList reference {doc_ref}: {len(newest_message.documents)} documents - {doc_names}")
|
||||
all_documents.extend(newest_message.documents)
|
||||
else:
|
||||
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
|
||||
else:
|
||||
logger.debug(f"No messages found with documentsLabel: {label}")
|
||||
else:
|
||||
# Direct label reference (round1_task2_action3_contextinfo)
|
||||
# Search for messages with matching documentsLabel to find the actual documents
|
||||
if doc_ref.startswith("round"):
|
||||
# Parse round/task/action to find the corresponding document list
|
||||
label_parts = doc_ref.split('_', 3)
|
||||
if len(label_parts) >= 4:
|
||||
round_num = int(label_parts[0].replace('round', ''))
|
||||
task_num = int(label_parts[1].replace('task', ''))
|
||||
action_num = int(label_parts[2].replace('action', ''))
|
||||
context_info = label_parts[3]
|
||||
|
||||
logger.debug(f"Resolving round reference: round{round_num}_task{task_num}_action{action_num}_{context_info}")
|
||||
logger.debug(f"Looking for messages with documentsLabel matching: {doc_ref}")
|
||||
|
||||
# Find messages with matching documentsLabel (this is the correct way!)
|
||||
# In case of retries, we want the NEWEST message (most recent publishedAt)
|
||||
matching_messages = []
|
||||
for message in self.workflow.messages:
|
||||
msg_documents_label = getattr(message, 'documentsLabel', '')
|
||||
|
||||
# Check if this message's documentsLabel matches our reference
|
||||
if msg_documents_label == doc_ref:
|
||||
# Found a matching message, collect it for comparison
|
||||
matching_messages.append(message)
|
||||
logger.debug(f"Found message {message.id} with matching documentsLabel: {msg_documents_label}")
|
||||
|
||||
# If we found matching messages, take the newest one (highest publishedAt)
|
||||
if matching_messages:
|
||||
# Sort by publishedAt descending (newest first)
|
||||
matching_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
|
||||
newest_message = matching_messages[0]
|
||||
|
||||
logger.debug(f"Found {len(matching_messages)} matching messages, using newest: {newest_message.id} (publishedAt: {getattr(newest_message, 'publishedAt', 'unknown')})")
|
||||
logger.debug(f"Newest message has {len(newest_message.documents) if newest_message.documents else 0} documents")
|
||||
|
||||
if newest_message.documents:
|
||||
doc_names = [doc.fileName for doc in newest_message.documents if hasattr(doc, 'fileName')]
|
||||
logger.debug(f"Added {len(newest_message.documents)} documents from newest message {newest_message.id}: {doc_names}")
|
||||
all_documents.extend(newest_message.documents)
|
||||
else:
|
||||
logger.debug(f"No documents found in newest message {newest_message.id}")
|
||||
else:
|
||||
logger.debug(f"No messages found with documentsLabel: {doc_ref}")
|
||||
# Fallback: also check if any message has this documentsLabel as a prefix
|
||||
logger.debug(f"Trying fallback search for messages with documentsLabel containing: {doc_ref}")
|
||||
fallback_messages = []
|
||||
for message in self.workflow.messages:
|
||||
msg_documents_label = getattr(message, 'documentsLabel', '')
|
||||
if msg_documents_label and msg_documents_label.startswith(doc_ref):
|
||||
fallback_messages.append(message)
|
||||
logger.debug(f"Found fallback message {message.id} with documentsLabel: {msg_documents_label}")
|
||||
|
||||
if fallback_messages:
|
||||
# Sort by publishedAt descending (newest first)
|
||||
fallback_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
|
||||
newest_fallback = fallback_messages[0]
|
||||
|
||||
logger.debug(f"Using fallback message {newest_fallback.id} with documentsLabel: {getattr(newest_fallback, 'documentsLabel', 'unknown')}")
|
||||
if newest_fallback.documents:
|
||||
doc_names = [doc.fileName for doc in newest_fallback.documents if hasattr(doc, 'fileName')]
|
||||
logger.debug(f"Added {len(newest_fallback.documents)} documents from fallback message {newest_fallback.id}: {doc_names}")
|
||||
all_documents.extend(newest_fallback.documents)
|
||||
else:
|
||||
logger.debug(f"No documents found in fallback message {newest_fallback.id}")
|
||||
else:
|
||||
logger.debug(f"No fallback messages found either")
|
||||
|
||||
logger.debug(f"Resolved {len(all_documents)} documents from document list: {documentList}")
|
||||
return all_documents
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting documents from document list: {str(e)}")
|
||||
return []
|
||||
|
||||
def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str:
|
||||
"""Get connection reference from UserConnection with enhanced state information"""
|
||||
# Get token information to check if it's expired
|
||||
token = None
|
||||
token_status = "unknown"
|
||||
try:
|
||||
# Get a fresh token via TokenManager convenience method
|
||||
logger.debug(f"Getting fresh token for connection {connection.id}")
|
||||
from modules.security.tokenManager import TokenManager
|
||||
token = TokenManager().getFreshToken(self.interfaceApp, connection.id)
|
||||
if token:
|
||||
if hasattr(token, 'expiresAt') and token.expiresAt:
|
||||
current_time = get_utc_timestamp()
|
||||
logger.debug(f"getConnectionReferenceFromUserConnection: Current time: {current_time}")
|
||||
logger.debug(f"getConnectionReferenceFromUserConnection: Token expires at: {token.expiresAt}")
|
||||
if current_time > token.expiresAt:
|
||||
token_status = "expired"
|
||||
else:
|
||||
# Check if this token was recently refreshed (within last 5 minutes)
|
||||
time_since_creation = current_time - token.createdAt if hasattr(token, 'createdAt') else 0
|
||||
if time_since_creation < 300: # 5 minutes
|
||||
token_status = "valid (refreshed)"
|
||||
else:
|
||||
token_status = "valid"
|
||||
else:
|
||||
token_status = "no_expiration"
|
||||
else:
|
||||
token_status = "no_token"
|
||||
except Exception as e:
|
||||
token_status = f"error: {str(e)}"
|
||||
|
||||
# Build enhanced reference with state information
|
||||
base_ref = f"connection:{connection.authority.value}:{connection.externalUsername}:{connection.id}"
|
||||
state_info = f" [status:{connection.status.value}, token:{token_status}]"
|
||||
|
||||
logger.debug(f"getConnectionReferenceFromUserConnection: Built reference: {base_ref + state_info}")
|
||||
return base_ref + state_info
|
||||
|
||||
def getUserConnectionFromConnectionReference(self, connectionReference: str) -> Optional[UserConnection]:
|
||||
"""Get UserConnection from reference string (handles both old and enhanced formats)"""
|
||||
try:
|
||||
# Parse reference format: connection:{authority}:{username}:{id} [status:..., token:...]
|
||||
# Remove state information if present
|
||||
base_reference = connectionReference.split(' [')[0]
|
||||
|
||||
parts = base_reference.split(':')
|
||||
if len(parts) != 4 or parts[0] != "connection":
|
||||
return None
|
||||
|
||||
authority = parts[1]
|
||||
username = parts[2]
|
||||
conn_id = parts[3]
|
||||
|
||||
# Get user connections through AppObjects interface
|
||||
user_connections = self.interfaceApp.getUserConnections(self.user.id)
|
||||
|
||||
# Find matching connection
|
||||
for conn in user_connections:
|
||||
if str(conn.id) == conn_id and conn.authority.value == authority and conn.externalUsername == username:
|
||||
return conn
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing connection reference: {str(e)}")
|
||||
return None
|
||||
|
||||
def getFileInfo(self, fileId: str) -> Dict[str, Any]:
|
||||
"""Get file information"""
|
||||
file_item = self.interfaceComponent.getFile(fileId)
|
||||
if file_item:
|
||||
return {
|
||||
"id": file_item.id,
|
||||
"fileName": file_item.fileName,
|
||||
"size": file_item.fileSize,
|
||||
"mimeType": file_item.mimeType,
|
||||
"fileHash": file_item.fileHash,
|
||||
"creationDate": file_item.creationDate
|
||||
}
|
||||
return None
|
||||
|
||||
def getFileData(self, fileId: str) -> bytes:
|
||||
"""Get file data by ID"""
|
||||
return self.interfaceComponent.getFileData(fileId)
|
||||
|
||||
async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
||||
"""Extract content from ChatDocument using prompt"""
|
||||
try:
|
||||
# ChatDocument is just a reference, so we need to get file data using fileId
|
||||
if not hasattr(document, 'fileId') or not document.fileId:
|
||||
logger.error(f"Document {document.id} has no fileId")
|
||||
raise ValueError("Document has no fileId")
|
||||
|
||||
# Get file data from service center using document's fileId
|
||||
fileData = self.getFileData(document.fileId)
|
||||
if not fileData:
|
||||
logger.error(f"No file data found for fileId: {document.fileId}")
|
||||
raise ValueError("No file data found for document")
|
||||
|
||||
# Get fileName and mime type from document properties
|
||||
try:
|
||||
fileName = document.fileName
|
||||
mimeType = document.mimeType
|
||||
except Exception as e:
|
||||
# Try to diagnose and recover the issue
|
||||
diagnosis = self._diagnoseDocumentAccess(document)
|
||||
logger.error(f"Critical error: Cannot access document properties for document {document.id}. Diagnosis: {diagnosis}")
|
||||
|
||||
# Attempt recovery
|
||||
if self._recoverDocumentAccess(document):
|
||||
try:
|
||||
fileName = document.fileName
|
||||
mimeType = document.mimeType
|
||||
logger.info(f"Document access recovered for {document.id} - proceeding with AI extraction")
|
||||
except Exception as recovery_error:
|
||||
logger.error(f"Recovery failed for document {document.id}: {str(recovery_error)}")
|
||||
raise RuntimeError(f"Document {document.id} properties are permanently inaccessible after recovery attempt - cannot proceed with AI extraction: {str(recovery_error)}")
|
||||
else:
|
||||
# Recovery failed - don't continue with invalid data
|
||||
raise RuntimeError(f"Document {document.id} properties are inaccessible and recovery failed. Diagnosis: {diagnosis}")
|
||||
|
||||
# Process with document processor directly
|
||||
extractedContent = await self.documentProcessor.processFileData(
|
||||
fileData=fileData,
|
||||
fileName=fileName,
|
||||
mimeType=mimeType,
|
||||
base64Encoded=False,
|
||||
prompt=prompt,
|
||||
documentId=document.id
|
||||
)
|
||||
|
||||
# Note: ExtractedContent model only has 'id' and 'contents' fields
|
||||
# No need to set objectId or objectType as they don't exist in the model
|
||||
|
||||
return extractedContent
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting from document: {str(e)}")
|
||||
raise
|
||||
|
||||
def _diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
|
||||
"""
|
||||
Diagnose document access issues and provide recovery information.
|
||||
This method helps identify why document properties are inaccessible.
|
||||
"""
|
||||
try:
|
||||
diagnosis = {
|
||||
'document_id': document.id,
|
||||
'file_id': document.fileId,
|
||||
'has_component_interface': document._componentInterface is not None,
|
||||
'component_interface_type': type(document._componentInterface).__name__ if document._componentInterface else None,
|
||||
'file_exists': False,
|
||||
'file_info': None,
|
||||
'error_details': None
|
||||
}
|
||||
|
||||
# Check if component interface is set
|
||||
if not document._componentInterface:
|
||||
diagnosis['error_details'] = "Component interface not set - document cannot access file system"
|
||||
return diagnosis
|
||||
|
||||
# Try to access the file directly
|
||||
try:
|
||||
file_info = self.interfaceComponent.getFile(document.fileId)
|
||||
if file_info:
|
||||
diagnosis['file_exists'] = True
|
||||
diagnosis['file_info'] = {
|
||||
'fileName': file_info.fileName if hasattr(file_info, 'fileName') else 'N/A',
|
||||
'fileSize': file_info.fileSize if hasattr(file_info, 'fileSize') else 'N/A',
|
||||
'mimeType': file_info.mimeType if hasattr(file_info, 'mimeType') else 'N/A'
|
||||
}
|
||||
else:
|
||||
diagnosis['error_details'] = f"File with ID {document.fileId} not found in component interface"
|
||||
except Exception as e:
|
||||
diagnosis['error_details'] = f"Error accessing file {document.fileId}: {str(e)}"
|
||||
|
||||
return diagnosis
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'document_id': document.id if hasattr(document, 'id') else 'unknown',
|
||||
'file_id': document.fileId if hasattr(document, 'fileId') else 'unknown',
|
||||
'error_details': f"Error during diagnosis: {str(e)}"
|
||||
}
|
||||
|
||||
def _recoverDocumentAccess(self, document: ChatDocument) -> bool:
|
||||
"""
|
||||
Attempt to recover document access by re-setting the component interface.
|
||||
Returns True if recovery was successful.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Attempting to recover document access for document {document.id}")
|
||||
|
||||
# Re-set the component interface
|
||||
document.setComponentInterface(self.interfaceComponent)
|
||||
|
||||
# Test if we can now access the fileName
|
||||
try:
|
||||
test_fileName = document.fileName
|
||||
logger.info(f"Document access recovered for {document.id} -> {test_fileName}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Document access recovery failed for {document.id}: {str(e)}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
|
||||
return False
|
||||
|
||||
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> ChatDocument:
|
||||
"""Create document with file in one step - handles file creation internally"""
|
||||
# Convert content to bytes based on base64 flag
|
||||
if base64encoded:
|
||||
import base64
|
||||
content_bytes = base64.b64decode(content)
|
||||
else:
|
||||
content_bytes = content.encode('utf-8')
|
||||
|
||||
# Create the file (hash and size are computed inside interfaceComponent)
|
||||
file_item = self.interfaceComponent.createFile(
|
||||
name=fileName,
|
||||
mimeType=mimeType,
|
||||
content=content_bytes
|
||||
)
|
||||
|
||||
# Then store the file data
|
||||
self.interfaceComponent.createFileData(file_item.id, content_bytes)
|
||||
|
||||
# Get file info to copy attributes
|
||||
file_info = self.getFileInfo(file_item.id)
|
||||
if not file_info:
|
||||
logger.error(f"Could not get file info for fileId: {file_item.id}")
|
||||
raise ValueError(f"File info not found for fileId: {file_item.id}")
|
||||
|
||||
# Create document with all file attributes copied
|
||||
document = ChatDocument(
|
||||
id=str(uuid.uuid4()),
|
||||
messageId=messageId or "", # Use provided messageId or empty string as fallback
|
||||
fileId=file_item.id,
|
||||
fileName=file_info.get("fileName", fileName),
|
||||
fileSize=file_info.get("size", 0),
|
||||
mimeType=file_info.get("mimeType", mimeType)
|
||||
)
|
||||
|
||||
return document
|
||||
|
||||
def calculateObjectSize(self, obj: Any) -> int:
|
||||
"""
|
||||
Calculate the size of an object in bytes.
|
||||
|
||||
Args:
|
||||
obj: Object to calculate size for
|
||||
|
||||
Returns:
|
||||
int: Size in bytes
|
||||
"""
|
||||
try:
|
||||
import json
|
||||
import sys
|
||||
|
||||
if obj is None:
|
||||
return 0
|
||||
|
||||
# Convert object to JSON string and calculate size
|
||||
json_str = json.dumps(obj, ensure_ascii=False, default=str)
|
||||
return len(json_str.encode('utf-8'))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating object size: {str(e)}")
|
||||
return 0
|
||||
|
||||
def getWorkflowContext(self) -> Dict[str, int]:
|
||||
"""Get current workflow context for document generation"""
|
||||
try:
|
||||
return {
|
||||
'currentRound': self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 0,
|
||||
'currentTask': self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 0,
|
||||
'currentAction': self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 0
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting workflow context: {str(e)}")
|
||||
return {'currentRound': 0, 'currentTask': 0, 'currentAction': 0}
|
||||
|
||||
def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
|
||||
"""Set current workflow context for document generation and routing"""
|
||||
try:
|
||||
# Prepare update data
|
||||
update_data = {}
|
||||
|
||||
if round_number is not None:
|
||||
self.workflow.currentRound = round_number
|
||||
update_data["currentRound"] = round_number
|
||||
if task_number is not None:
|
||||
self.workflow.currentTask = task_number
|
||||
update_data["currentTask"] = task_number
|
||||
if action_number is not None:
|
||||
self.workflow.currentAction = action_number
|
||||
update_data["currentAction"] = action_number
|
||||
|
||||
# Persist changes to database if any updates were made
|
||||
if update_data:
|
||||
self.interfaceChat.updateWorkflow(self.workflow.id, update_data)
|
||||
|
||||
logger.debug(f"Updated workflow context: Round {self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 'N/A'}, Task {self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 'N/A'}, Action {self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 'N/A'}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting workflow context: {str(e)}")
|
||||
|
||||
def getWorkflowStats(self) -> Dict[str, Any]:
|
||||
"""Get comprehensive workflow statistics including current context"""
|
||||
try:
|
||||
workflow_context = self.getWorkflowContext()
|
||||
return {
|
||||
'currentRound': workflow_context['currentRound'],
|
||||
'currentTask': workflow_context['currentTask'],
|
||||
'currentAction': workflow_context['currentAction'],
|
||||
'totalTasks': self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 0,
|
||||
'totalActions': self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 0,
|
||||
'workflowStatus': self.workflow.status if hasattr(self.workflow, 'status') else 'unknown',
|
||||
'workflowId': self.workflow.id if hasattr(self.workflow, 'id') else 'unknown'
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting workflow stats: {str(e)}")
|
||||
return {
|
||||
'currentRound': 0,
|
||||
'currentTask': 0,
|
||||
'currentAction': 0,
|
||||
'totalTasks': 0,
|
||||
'totalActions': 0,
|
||||
'workflowStatus': 'unknown',
|
||||
'workflowId': 'unknown'
|
||||
}
|
||||
120
modules/shared/eventManagement.py
Normal file
120
modules/shared/eventManagement.py
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
import logging
|
||||
from typing import Callable, Optional, Dict, Any
|
||||
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from apscheduler.triggers.interval import IntervalTrigger
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EventManagement:
|
||||
"""
|
||||
Generic event scheduler wrapper around APScheduler's AsyncIOScheduler.
|
||||
|
||||
Features:
|
||||
- start/stop lifecycle
|
||||
- register timed events with either cron or interval style
|
||||
- remove events by id
|
||||
"""
|
||||
|
||||
def __init__(self, timezone: str = "Europe/Zurich"):
|
||||
self._timezone = ZoneInfo(timezone)
|
||||
self._scheduler: Optional[AsyncIOScheduler] = None
|
||||
|
||||
@property
|
||||
def scheduler(self) -> AsyncIOScheduler:
|
||||
if self._scheduler is None:
|
||||
self._scheduler = AsyncIOScheduler(timezone=self._timezone)
|
||||
return self._scheduler
|
||||
|
||||
def start(self) -> None:
|
||||
if not self.scheduler.running:
|
||||
self.scheduler.start()
|
||||
logger.info("EventManagement scheduler started")
|
||||
|
||||
def stop(self) -> None:
|
||||
if self._scheduler and self._scheduler.running:
|
||||
try:
|
||||
self._scheduler.shutdown(wait=False)
|
||||
logger.info("EventManagement scheduler stopped")
|
||||
except Exception as exc:
|
||||
logger.error(f"Error stopping scheduler: {exc}")
|
||||
|
||||
def register_cron(
|
||||
self,
|
||||
job_id: str,
|
||||
func: Callable,
|
||||
*,
|
||||
cron_kwargs: Optional[Dict[str, Any]] = None,
|
||||
replace_existing: bool = True,
|
||||
coalesce: bool = True,
|
||||
max_instances: int = 1,
|
||||
misfire_grace_time: int = 1800,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""
|
||||
Register a job using CronTrigger. Provide cron fields as keyword args, e.g.:
|
||||
cron_kwargs={"minute": "0,20,40"}
|
||||
"""
|
||||
trigger = CronTrigger(timezone=self._timezone, **(cron_kwargs or {}))
|
||||
self.scheduler.add_job(
|
||||
func,
|
||||
trigger,
|
||||
id=job_id,
|
||||
replace_existing=replace_existing,
|
||||
coalesce=coalesce,
|
||||
max_instances=max_instances,
|
||||
misfire_grace_time=misfire_grace_time,
|
||||
**kwargs,
|
||||
)
|
||||
logger.info(f"Registered cron job '{job_id}' with args {cron_kwargs}")
|
||||
|
||||
def register_interval(
|
||||
self,
|
||||
job_id: str,
|
||||
func: Callable,
|
||||
*,
|
||||
seconds: Optional[int] = None,
|
||||
minutes: Optional[int] = None,
|
||||
hours: Optional[int] = None,
|
||||
replace_existing: bool = True,
|
||||
coalesce: bool = True,
|
||||
max_instances: int = 1,
|
||||
misfire_grace_time: int = 1800,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""
|
||||
Register a job using IntervalTrigger.
|
||||
"""
|
||||
trigger = IntervalTrigger(
|
||||
seconds=seconds, minutes=minutes, hours=hours, timezone=self._timezone
|
||||
)
|
||||
self.scheduler.add_job(
|
||||
func,
|
||||
trigger,
|
||||
id=job_id,
|
||||
replace_existing=replace_existing,
|
||||
coalesce=coalesce,
|
||||
max_instances=max_instances,
|
||||
misfire_grace_time=misfire_grace_time,
|
||||
**kwargs,
|
||||
)
|
||||
logger.info(
|
||||
f"Registered interval job '{job_id}' (h={hours}, m={minutes}, s={seconds})"
|
||||
)
|
||||
|
||||
def remove(self, job_id: str) -> None:
|
||||
try:
|
||||
self.scheduler.remove_job(job_id)
|
||||
logger.info(f"Removed job '{job_id}'")
|
||||
except Exception as exc:
|
||||
logger.warning(f"Could not remove job '{job_id}': {exc}")
|
||||
|
||||
|
||||
# Singleton instance for easy import and reuse
|
||||
eventManager = EventManagement()
|
||||
|
||||
|
||||
|
|
@ -20,6 +20,9 @@ class MethodAi(MethodBase):
|
|||
super().__init__(service)
|
||||
self.name = "ai"
|
||||
self.description = "AI processing methods"
|
||||
# Centralized services interface (for AI)
|
||||
from modules.services import getInterface as getServices
|
||||
self.services = getServices(self.service.user, self.service.workflow)
|
||||
|
||||
def _format_timestamp_for_filename(self) -> str:
|
||||
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
||||
|
|
@ -177,10 +180,43 @@ class MethodAi(MethodBase):
|
|||
min_tokens_hint = "\n\nPlease ensure the response is substantial and complete."
|
||||
call_prompt = enhanced_prompt + min_tokens_hint
|
||||
|
||||
if processingMode in ["advanced", "detailed"]:
|
||||
result = await self.service.callAiTextAdvanced(call_prompt, context)
|
||||
else:
|
||||
result = await self.service.callAiTextBasic(call_prompt, context)
|
||||
# Centralized AI call with optional document context
|
||||
documents = []
|
||||
try:
|
||||
if documentList:
|
||||
for d in (chatDocuments or []):
|
||||
try:
|
||||
file_data = self.service.getFileData(d.fileId)
|
||||
documents.append(
|
||||
ChatDocument(
|
||||
fileData=file_data,
|
||||
fileName=d.fileName,
|
||||
mimeType=d.mimeType
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
documents = None
|
||||
|
||||
output_format = output_extension.replace('.', '') or 'txt'
|
||||
result = await self.services.ai.callAi(
|
||||
prompt=call_prompt,
|
||||
documents=documents or None,
|
||||
options={
|
||||
"process_type": "text",
|
||||
"operation_type": "generate_content",
|
||||
"priority": "quality" if processingMode in ["advanced", "detailed"] else "speed",
|
||||
"compress_prompt": processingMode != "detailed",
|
||||
"compress_documents": True,
|
||||
"process_documents_individually": True,
|
||||
"processing_mode": processingMode,
|
||||
"result_format_requested": output_format,
|
||||
"include_metadata": includeMetadata,
|
||||
"max_cost": 0.05 if processingMode in ["advanced", "detailed"] else 0.02,
|
||||
"max_processing_time": 45 if processingMode in ["advanced", "detailed"] else 20
|
||||
}
|
||||
)
|
||||
|
||||
# If expected JSON and too short/not JSON, retry with stricter JSON guardrails
|
||||
if output_extension == ".json":
|
||||
|
|
@ -207,7 +243,23 @@ class MethodAi(MethodBase):
|
|||
"Include all requested fields with detailed content."
|
||||
)
|
||||
try:
|
||||
result = await self.service.callAiTextAdvanced(guardrail_prompt, context)
|
||||
result = await self.services.ai.callAi(
|
||||
prompt=guardrail_prompt,
|
||||
documents=context or None,
|
||||
options={
|
||||
"process_type": "text",
|
||||
"operation_type": "generate_content",
|
||||
"priority": "quality",
|
||||
"compress_prompt": False,
|
||||
"compress_documents": True,
|
||||
"process_documents_individually": True,
|
||||
"processing_mode": "detailed",
|
||||
"result_format_requested": "json",
|
||||
"include_metadata": False,
|
||||
"max_cost": 0.03,
|
||||
"max_processing_time": 30
|
||||
}
|
||||
)
|
||||
except Exception:
|
||||
result = cleaned # fallback to first attempt
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,9 @@ class MethodDocument(MethodBase):
|
|||
super().__init__(serviceCenter)
|
||||
self.name = "document"
|
||||
self.description = "Handle document operations like extraction and analysis"
|
||||
# Centralized services interface (for AI)
|
||||
from modules.services import getInterface as getServices
|
||||
self.services = getServices(self.service.user, self.service.workflow)
|
||||
|
||||
def _format_timestamp_for_filename(self) -> str:
|
||||
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
||||
|
|
@ -530,7 +533,18 @@ class MethodDocument(MethodBase):
|
|||
|
||||
# Call AI to generate the formatted content
|
||||
logger.info(f"Calling AI for {extension} format conversion")
|
||||
formatted_content = await self.service.callAiTextBasic(ai_prompt, content)
|
||||
formatted_content = await self.services.ai.callAi(
|
||||
prompt=ai_prompt,
|
||||
documents=None,
|
||||
options={
|
||||
"process_type": "text",
|
||||
"operation_type": "generate_content",
|
||||
"priority": "speed",
|
||||
"compress_prompt": True,
|
||||
"compress_documents": False,
|
||||
"max_cost": 0.02
|
||||
}
|
||||
)
|
||||
|
||||
if not formatted_content or formatted_content.strip() == "":
|
||||
logger.warning("AI format conversion failed, using fallback")
|
||||
|
|
@ -751,7 +765,36 @@ SOURCE DOCUMENT CONTENT:
|
|||
|
||||
# Call AI to generate the report
|
||||
logger.info(f"Generating AI report for {len(validDocuments)} documents")
|
||||
aiReport = await self.service.callAiTextAdvanced(aiPrompt, combinedContent)
|
||||
# Build ChatDocument list from chatDocuments
|
||||
documents = []
|
||||
try:
|
||||
from modules.interfaces.interfaceChatModel import ChatDocument as ChatDoc
|
||||
for d in validDocuments:
|
||||
try:
|
||||
data = self.service.getFileData(d.fileId) if hasattr(d, 'fileId') else None
|
||||
if data:
|
||||
documents.append(ChatDoc(fileData=data, fileName=d.fileName, mimeType=d.mimeType))
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
documents = None
|
||||
aiReport = await self.services.ai.callAi(
|
||||
prompt=aiPrompt,
|
||||
documents=documents or None,
|
||||
options={
|
||||
"process_type": "text",
|
||||
"operation_type": "report_generation",
|
||||
"priority": "quality",
|
||||
"compress_prompt": False,
|
||||
"compress_documents": True,
|
||||
"process_documents_individually": True,
|
||||
"result_format_requested": "html",
|
||||
"include_metadata": includeMetadata,
|
||||
"processing_mode": "detailed",
|
||||
"max_cost": 0.08,
|
||||
"max_processing_time": 90
|
||||
}
|
||||
)
|
||||
|
||||
# If AI call fails, return error - AI is crucial for report generation
|
||||
if not aiReport or aiReport.strip() == "":
|
||||
|
|
|
|||
|
|
@ -96,6 +96,9 @@ class MethodOutlook(MethodBase):
|
|||
super().__init__(serviceCenter)
|
||||
self.name = "outlook"
|
||||
self.description = "Handle Microsoft Outlook email operations"
|
||||
# Centralized services interface (for AI)
|
||||
from modules.services import getInterface as getServices
|
||||
self.services = getServices(self.service.user, self.service.workflow)
|
||||
|
||||
def _format_timestamp_for_filename(self) -> str:
|
||||
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
||||
|
|
@ -116,8 +119,9 @@ class MethodOutlook(MethodBase):
|
|||
|
||||
logger.debug(f"Found connection: {userConnection.id}, status: {userConnection.status.value}, authority: {userConnection.authority.value}")
|
||||
|
||||
# Get the token for this specific connection
|
||||
token = self.service.interfaceApp.getConnectionToken(userConnection.id)
|
||||
# Get a fresh token for this specific connection
|
||||
from modules.security.tokenManager import TokenManager
|
||||
token = TokenManager().getFreshToken(self.service.interfaceApp, userConnection.id)
|
||||
if not token:
|
||||
logger.error(f"Token not found for connection: {userConnection.id}")
|
||||
logger.debug(f"Connection details: {userConnection}")
|
||||
|
|
@ -1605,7 +1609,36 @@ class MethodOutlook(MethodBase):
|
|||
|
||||
# Call AI to compose the email
|
||||
try:
|
||||
composed_email = await self.service.interfaceAiCalls.callAiTextAdvanced(ai_prompt)
|
||||
# Centralized AI call for email composition with document context
|
||||
documents = []
|
||||
try:
|
||||
if composition_documents:
|
||||
from modules.interfaces.interfaceChatModel import ChatDocument as ChatDoc
|
||||
for d in composition_documents:
|
||||
try:
|
||||
data = self.service.getFileData(d.fileId) if hasattr(d, 'fileId') else None
|
||||
if data:
|
||||
documents.append(ChatDoc(fileData=data, fileName=d.fileName, mimeType=d.mimeType))
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
documents = None
|
||||
|
||||
composed_email = await self.services.ai.callAi(
|
||||
prompt=ai_prompt,
|
||||
documents=documents or None,
|
||||
options={
|
||||
"process_type": "text",
|
||||
"operation_type": "email_composition",
|
||||
"priority": "speed",
|
||||
"compress_prompt": True,
|
||||
"compress_documents": True,
|
||||
"process_documents_individually": False,
|
||||
"include_metadata": True,
|
||||
"max_cost": 0.02,
|
||||
"max_processing_time": 15
|
||||
}
|
||||
)
|
||||
|
||||
# Parse the AI response to ensure it's valid JSON
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -48,8 +48,9 @@ class MethodSharepoint(MethodBase):
|
|||
logger.warning(f"Connection {userConnection.id} status is not active/pending: {userConnection.status.value}")
|
||||
return None
|
||||
|
||||
# Get the token for this specific connection
|
||||
token = self.service.interfaceApp.getConnectionToken(userConnection.id)
|
||||
# Get a fresh token for this specific connection
|
||||
from modules.security.tokenManager import TokenManager
|
||||
token = TokenManager().getFreshToken(self.service.interfaceApp, userConnection.id)
|
||||
if not token:
|
||||
logger.warning(f"No token found for connection {userConnection.id}")
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -22,6 +22,9 @@ class MethodWeb(MethodBase):
|
|||
super().__init__(serviceCenter)
|
||||
self.name = "web"
|
||||
self.description = "Web search, crawling, and scraping operations using Tavily"
|
||||
# Centralized services interface (for AI)
|
||||
from modules.services import getInterface as getServices
|
||||
self.services = getServices(self.service.user, self.service.workflow)
|
||||
|
||||
@action
|
||||
async def search(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
|
|
@ -274,7 +277,21 @@ class MethodWeb(MethodBase):
|
|||
"Return only bullet points without any preface."
|
||||
)
|
||||
context = content[:4000]
|
||||
summary = await self.service.callAiTextBasic(prompt, context)
|
||||
# Centralized AI summary (balanced analyse_content)
|
||||
summary = await self.services.ai.callAi(
|
||||
prompt=prompt,
|
||||
documents=None,
|
||||
options={
|
||||
"process_type": "text",
|
||||
"operation_type": "analyse_content",
|
||||
"priority": "balanced",
|
||||
"compress_prompt": True,
|
||||
"compress_documents": False,
|
||||
"processing_mode": "advanced",
|
||||
"max_cost": 0.05,
|
||||
"max_processing_time": 30
|
||||
}
|
||||
)
|
||||
summary = summary.strip()
|
||||
except Exception:
|
||||
summary = ""
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ from modules.interfaces.interfaceChatModel import (
|
|||
)
|
||||
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from modules.workflows._transfer.executionState import TaskExecutionState
|
||||
from modules.workflows._transfer.promptFactory import (
|
||||
from modules.workflows.processing.executionState import TaskExecutionState
|
||||
from modules.workflows.processing.promptFactory import (
|
||||
createTaskPlanningPrompt,
|
||||
createActionDefinitionPrompt,
|
||||
createResultReviewPrompt,
|
||||
|
|
@ -21,7 +21,8 @@ from modules.workflows._transfer.promptFactory import (
|
|||
createActionParameterPrompt,
|
||||
createRefinementPrompt
|
||||
)
|
||||
from modules.services.serviceDocument.documentGeneration import DocumentGenerator
|
||||
from modules.services.serviceDocument.mainServiceDocumentGeneration import DocumentGenerationService
|
||||
from modules.workflows.processing.promptFactory import methods
|
||||
import uuid
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -31,13 +32,10 @@ class WorkflowStoppedException(Exception):
|
|||
pass
|
||||
|
||||
class HandlingTasks:
|
||||
def __init__(self, chatInterface, currentUser, workflow=None):
|
||||
self.chatInterface = chatInterface
|
||||
self.currentUser = currentUser
|
||||
def __init__(self, services, workflow=None):
|
||||
self.services = services
|
||||
self.workflow = workflow
|
||||
from modules.services.serviceCenter import ServiceCenter
|
||||
self.service = ServiceCenter(currentUser, workflow)
|
||||
self.documentGenerator = DocumentGenerator(self.service)
|
||||
self.documentGenerator = DocumentGenerationService(self.services.center)
|
||||
|
||||
def _checkWorkflowStopped(self):
|
||||
"""
|
||||
|
|
@ -46,7 +44,7 @@ class HandlingTasks:
|
|||
"""
|
||||
try:
|
||||
# Get the current workflow status from the database to avoid stale data
|
||||
current_workflow = self.chatInterface.getWorkflow(self.service.workflow.id)
|
||||
current_workflow = services.chatInterface.getWorkflow(self.service.workflow.id)
|
||||
if current_workflow and current_workflow.status == "stopped":
|
||||
logger.info("Workflow stopped by user, aborting execution")
|
||||
raise WorkflowStoppedException("Workflow was stopped by user")
|
||||
|
|
@ -113,9 +111,23 @@ class HandlingTasks:
|
|||
# Log task planning prompt sent to AI
|
||||
logger.info("=== TASK PLANNING PROMPT SENT TO AI ===")
|
||||
# Trace task planning prompt
|
||||
self.service.writeTraceLog("Task Plan Prompt", task_planning_prompt)
|
||||
self.writeTraceLog("Task Plan Prompt", task_planning_prompt)
|
||||
|
||||
prompt = await self.service.callAiTextAdvanced(task_planning_prompt)
|
||||
# Centralized AI call: Task planning (quality, detailed)
|
||||
prompt = await self.services.ai.callAi(
|
||||
prompt=task_planning_prompt,
|
||||
documents=None,
|
||||
options={
|
||||
"process_type": "text",
|
||||
"operation_type": "generate_plan",
|
||||
"priority": "quality",
|
||||
"compress_prompt": False,
|
||||
"compress_documents": False,
|
||||
"processing_mode": "detailed",
|
||||
"max_cost": 0.10,
|
||||
"max_processing_time": 30
|
||||
}
|
||||
)
|
||||
|
||||
# Check if AI response is valid
|
||||
if not prompt:
|
||||
|
|
@ -125,7 +137,7 @@ class HandlingTasks:
|
|||
logger.info("=== TASK PLANNING AI RESPONSE RECEIVED ===")
|
||||
logger.info(f"Response length: {len(prompt) if prompt else 0}")
|
||||
# Trace task planning response
|
||||
self.service.writeTraceLog("Task Plan Response", prompt)
|
||||
self.writeTraceLog("Task Plan Response", prompt)
|
||||
|
||||
# Inline _parseTaskPlanResponse logic
|
||||
try:
|
||||
|
|
@ -251,7 +263,7 @@ class HandlingTasks:
|
|||
"taskProgress": "pending"
|
||||
}
|
||||
|
||||
message = self.chatInterface.createMessage(message_data)
|
||||
message = services.chatInterface.createMessage(message_data)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
||||
|
|
@ -359,9 +371,23 @@ class HandlingTasks:
|
|||
# Generate the action definition prompt
|
||||
action_prompt = await createActionDefinitionPrompt(action_context, self.service)
|
||||
# Trace action planning prompt
|
||||
self.service.writeTraceLog("Action Plan Prompt", action_prompt)
|
||||
self.writeTraceLog("Action Plan Prompt", action_prompt)
|
||||
|
||||
prompt = await self.service.callAiTextAdvanced(action_prompt)
|
||||
# Centralized AI call: Action planning (quality, detailed)
|
||||
prompt = await self.services.ai.callAi(
|
||||
prompt=action_prompt,
|
||||
documents=None,
|
||||
options={
|
||||
"process_type": "text",
|
||||
"operation_type": "generate_plan",
|
||||
"priority": "quality",
|
||||
"compress_prompt": False,
|
||||
"compress_documents": False,
|
||||
"processing_mode": "detailed",
|
||||
"max_cost": 0.10,
|
||||
"max_processing_time": 30
|
||||
}
|
||||
)
|
||||
|
||||
# Check if AI response is valid
|
||||
if not prompt:
|
||||
|
|
@ -371,7 +397,7 @@ class HandlingTasks:
|
|||
logger.info("=== ACTION PLAN AI RESPONSE RECEIVED ===")
|
||||
logger.info(f"Response length: {len(prompt) if prompt else 0}")
|
||||
# Trace action planning response
|
||||
self.service.writeTraceLog("Action Plan Response", prompt)
|
||||
self.writeTraceLog("Action Plan Response", prompt)
|
||||
|
||||
# Inline parseActionResponse logic here
|
||||
json_start = prompt.find('{')
|
||||
|
|
@ -438,9 +464,23 @@ class HandlingTasks:
|
|||
async def plan_select(self, context: TaskContext) -> Dict[str, Any]:
|
||||
"""Plan: select exactly one action. Returns {"action": {method, name}}"""
|
||||
prompt = createActionSelectionPrompt(context, self.service)
|
||||
self.service.writeTraceLog("React Plan Selection Prompt", prompt)
|
||||
response = await self.service.callAiTextAdvanced(prompt)
|
||||
self.service.writeTraceLog("React Plan Selection Response", response)
|
||||
self.writeTraceLog("React Plan Selection Prompt", prompt)
|
||||
# Centralized AI call for plan selection (use plan generation quality)
|
||||
response = await self.services.ai.callAi(
|
||||
prompt=prompt,
|
||||
documents=None,
|
||||
options={
|
||||
"process_type": "text",
|
||||
"operation_type": "generate_plan",
|
||||
"priority": "quality",
|
||||
"compress_prompt": False,
|
||||
"compress_documents": False,
|
||||
"processing_mode": "detailed",
|
||||
"max_cost": 0.10,
|
||||
"max_processing_time": 30
|
||||
}
|
||||
)
|
||||
self.writeTraceLog("React Plan Selection Response", response)
|
||||
json_start = response.find('{') if response else -1
|
||||
json_end = response.rfind('}') + 1 if response else 0
|
||||
if json_start == -1 or json_end == 0:
|
||||
|
|
@ -454,9 +494,23 @@ class HandlingTasks:
|
|||
"""Act: request minimal parameters then execute selected action."""
|
||||
action = selection.get('action', {})
|
||||
params_prompt = createActionParameterPrompt(context, action, self.service)
|
||||
self.service.writeTraceLog("React Parameters Prompt", params_prompt)
|
||||
params_resp = await self.service.callAiTextAdvanced(params_prompt)
|
||||
self.service.writeTraceLog("React Parameters Response", params_resp)
|
||||
self.writeTraceLog("React Parameters Prompt", params_prompt)
|
||||
# Centralized AI call for parameter suggestion (balanced analysis)
|
||||
params_resp = await self.services.ai.callAi(
|
||||
prompt=params_prompt,
|
||||
documents=None,
|
||||
options={
|
||||
"process_type": "text",
|
||||
"operation_type": "analyse_content",
|
||||
"priority": "balanced",
|
||||
"compress_prompt": True,
|
||||
"compress_documents": False,
|
||||
"processing_mode": "advanced",
|
||||
"max_cost": 0.05,
|
||||
"max_processing_time": 30
|
||||
}
|
||||
)
|
||||
self.writeTraceLog("React Parameters Response", params_resp)
|
||||
js = params_resp[params_resp.find('{'):params_resp.rfind('}')+1] if params_resp else '{}'
|
||||
try:
|
||||
param_obj = json.loads(js)
|
||||
|
|
@ -508,9 +562,23 @@ class HandlingTasks:
|
|||
async def refine_decide(self, context: TaskContext, observation: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Refine: decide continue or stop, with reason"""
|
||||
prompt = createRefinementPrompt(context, observation)
|
||||
self.service.writeTraceLog("React Refinement Prompt", prompt)
|
||||
resp = await self.service.callAiTextAdvanced(prompt)
|
||||
self.service.writeTraceLog("React Refinement Response", resp)
|
||||
self.writeTraceLog("React Refinement Prompt", prompt)
|
||||
# Centralized AI call for refinement decision (balanced analysis)
|
||||
resp = await self.services.ai.callAi(
|
||||
prompt=prompt,
|
||||
documents=None,
|
||||
options={
|
||||
"process_type": "text",
|
||||
"operation_type": "analyse_content",
|
||||
"priority": "balanced",
|
||||
"compress_prompt": True,
|
||||
"compress_documents": False,
|
||||
"processing_mode": "advanced",
|
||||
"max_cost": 0.05,
|
||||
"max_processing_time": 30
|
||||
}
|
||||
)
|
||||
self.writeTraceLog("React Refinement Response", resp)
|
||||
js = resp[resp.find('{'):resp.rfind('}')+1] if resp else '{}'
|
||||
try:
|
||||
decision = json.loads(js)
|
||||
|
|
@ -560,7 +628,7 @@ class HandlingTasks:
|
|||
if task_step.userMessage:
|
||||
task_start_message["message"] += f"\n\n💬 {task_step.userMessage}"
|
||||
|
||||
message = self.chatInterface.createMessage(task_start_message)
|
||||
message = services.chatInterface.createMessage(task_start_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Task start message created for task {task_index}")
|
||||
|
|
@ -590,7 +658,7 @@ class HandlingTasks:
|
|||
decision = await self.refine_decide(context, observation)
|
||||
# Telemetry: simple duration per step
|
||||
duration = time.time() - t0
|
||||
self.chatInterface.createLog({
|
||||
services.chatInterface.createLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"react_step_duration_sec={duration:.3f}",
|
||||
"type": "info"
|
||||
|
|
@ -611,12 +679,12 @@ class HandlingTasks:
|
|||
"actionNumber": step,
|
||||
"actionProgress": "success" if result.success else "fail"
|
||||
}
|
||||
self.chatInterface.createMessage(msg)
|
||||
services.chatInterface.createMessage(msg)
|
||||
except Exception as e:
|
||||
logger.error(f"React step {step} error: {e}")
|
||||
break
|
||||
|
||||
from modules.workflows._transfer.executionState import should_continue
|
||||
from modules.workflows.processing.executionState import should_continue
|
||||
if not should_continue(observation, last_review_dict, step, state.max_steps):
|
||||
break
|
||||
step += 1
|
||||
|
|
@ -709,7 +777,7 @@ class HandlingTasks:
|
|||
"actionNumber": action_number
|
||||
})
|
||||
|
||||
message = self.chatInterface.createMessage(action_start_message)
|
||||
message = services.chatInterface.createMessage(action_start_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Action start message created for action {action_number}")
|
||||
|
|
@ -763,7 +831,7 @@ class HandlingTasks:
|
|||
"taskProgress": "success"
|
||||
}
|
||||
|
||||
message = self.chatInterface.createMessage(task_completion_message)
|
||||
message = services.chatInterface.createMessage(task_completion_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Task completion message created for task {task_index}")
|
||||
|
|
@ -855,7 +923,7 @@ class HandlingTasks:
|
|||
"taskProgress": "retry"
|
||||
}
|
||||
|
||||
message = self.chatInterface.createMessage(retry_message)
|
||||
message = services.chatInterface.createMessage(retry_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
||||
|
|
@ -908,7 +976,7 @@ class HandlingTasks:
|
|||
}
|
||||
|
||||
try:
|
||||
message = self.chatInterface.createMessage(message_data)
|
||||
message = services.chatInterface.createMessage(message_data)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Created user-facing retry message for failed task: {task_step.objective}")
|
||||
|
|
@ -962,7 +1030,7 @@ class HandlingTasks:
|
|||
}
|
||||
|
||||
try:
|
||||
message = self.chatInterface.createMessage(message_data)
|
||||
message = services.chatInterface.createMessage(message_data)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Created user-facing error message for failed task: {task_step.objective}")
|
||||
|
|
@ -1024,15 +1092,29 @@ class HandlingTasks:
|
|||
logger.info(f"Action Results Count: {len(review_context.action_results) if review_context.action_results else 0}")
|
||||
logger.info(f"Task Actions Count: {len(review_context.task_actions) if review_context.task_actions else 0}")
|
||||
# Trace result review prompt
|
||||
self.service.writeTraceLog("Result Review Prompt", prompt)
|
||||
self.writeTraceLog("Result Review Prompt", prompt)
|
||||
|
||||
response = await self.service.callAiTextAdvanced(prompt)
|
||||
# Centralized AI call: Result validation (balanced analysis)
|
||||
response = await self.services.ai.callAi(
|
||||
prompt=prompt,
|
||||
documents=None,
|
||||
options={
|
||||
"process_type": "text",
|
||||
"operation_type": "analyse_content",
|
||||
"priority": "balanced",
|
||||
"compress_prompt": True,
|
||||
"compress_documents": False,
|
||||
"processing_mode": "advanced",
|
||||
"max_cost": 0.05,
|
||||
"max_processing_time": 30
|
||||
}
|
||||
)
|
||||
|
||||
# Log result review response received
|
||||
logger.info("=== RESULT REVIEW AI RESPONSE RECEIVED ===")
|
||||
logger.info(f"Response length: {len(response) if response else 0}")
|
||||
# Trace result review response
|
||||
self.service.writeTraceLog("Result Review Response", response)
|
||||
self.writeTraceLog("Result Review Response", response)
|
||||
|
||||
# Inline parseReviewResponse logic here
|
||||
json_start = response.find('{')
|
||||
|
|
@ -1169,10 +1251,10 @@ class HandlingTasks:
|
|||
actionData["execParameters"] = {}
|
||||
|
||||
# Use generic field separation based on TaskAction model
|
||||
simple_fields, object_fields = self.chatInterface._separate_object_fields(TaskAction, actionData)
|
||||
simple_fields, object_fields = services.chatInterface._separate_object_fields(TaskAction, actionData)
|
||||
|
||||
# Create action in database
|
||||
createdAction = self.chatInterface.db.recordCreate(TaskAction, simple_fields)
|
||||
createdAction = services.chatInterface.db.recordCreate(TaskAction, simple_fields)
|
||||
|
||||
# Convert to TaskAction model
|
||||
return TaskAction(
|
||||
|
|
@ -1229,7 +1311,7 @@ class HandlingTasks:
|
|||
# Check workflow status before executing the action
|
||||
self._checkWorkflowStopped()
|
||||
|
||||
result = await self.service.executeAction(
|
||||
result = await self.executeAction(
|
||||
methodName=action.execMethod,
|
||||
actionName=action.execAction,
|
||||
parameters=enhanced_parameters
|
||||
|
|
@ -1245,7 +1327,7 @@ class HandlingTasks:
|
|||
"resultLabel": result_label,
|
||||
"documentsCount": len(result.documents) if result.documents else 0
|
||||
}
|
||||
self.service.writeTraceLog("Action Result", action_result_trace)
|
||||
self.writeTraceLog("Action Result", action_result_trace)
|
||||
|
||||
# Process documents from the action result
|
||||
created_documents = []
|
||||
|
|
@ -1276,7 +1358,7 @@ class HandlingTasks:
|
|||
if created_documents:
|
||||
message.documents = created_documents
|
||||
# Update the message in the database
|
||||
self.chatInterface.updateMessage(message.id, {"documents": [doc.dict() for doc in created_documents]})
|
||||
services.chatInterface.updateMessage(message.id, {"documents": [doc.dict() for doc in created_documents]})
|
||||
|
||||
# Log action results
|
||||
logger.info(f"Action completed successfully")
|
||||
|
|
@ -1302,7 +1384,7 @@ class HandlingTasks:
|
|||
message = await self.createActionMessage(action, result, workflow, result_label, [], task_step, task_index)
|
||||
|
||||
# Create database log entry for action failure
|
||||
self.chatInterface.createLog({
|
||||
services.chatInterface.createLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"❌ **Task {task_num}**\n\n❌ **Action {action_num}/{total_actions}** failed: {result.error}",
|
||||
"type": "error"
|
||||
|
|
@ -1398,7 +1480,7 @@ class HandlingTasks:
|
|||
logger.info(f"Creating ERROR message: {message_text}")
|
||||
logger.info(f"Message data: {message_data}")
|
||||
|
||||
message = self.chatInterface.createMessage(message_data)
|
||||
message = services.chatInterface.createMessage(message_data)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Message created: {action.execMethod}.{action.execAction}")
|
||||
|
|
@ -1558,7 +1640,7 @@ class HandlingTasks:
|
|||
self.workflow.totalActions = 0
|
||||
|
||||
# Update in database
|
||||
self.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
||||
services.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
||||
logger.info(f"Updated workflow {self.workflow.id} after task plan created: {update_data}")
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -1582,7 +1664,7 @@ class HandlingTasks:
|
|||
self.workflow.totalActions = 0
|
||||
|
||||
# Update in database
|
||||
self.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
||||
services.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
||||
logger.info(f"Updated workflow {self.workflow.id} before executing task {task_number}: {update_data}")
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -1602,7 +1684,7 @@ class HandlingTasks:
|
|||
self.workflow.totalActions = total_actions
|
||||
|
||||
# Update in database
|
||||
self.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
||||
services.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
||||
logger.info(f"Updated workflow {self.workflow.id} after action planning: {update_data}")
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -1622,7 +1704,7 @@ class HandlingTasks:
|
|||
self.workflow.currentAction = action_number
|
||||
|
||||
# Update in database
|
||||
self.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
||||
services.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
||||
logger.info(f"Updated workflow {self.workflow.id} before executing action {action_number}: {update_data}")
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -1643,7 +1725,7 @@ class HandlingTasks:
|
|||
|
||||
# Update workflow object in database if we have changes
|
||||
if update_data:
|
||||
self.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
||||
services.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
||||
logger.info(f"Updated workflow {self.workflow.id} totals in database: {update_data}")
|
||||
|
||||
logger.debug(f"Updated workflow totals: Tasks {self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 'N/A'}, Actions {self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 'N/A'}")
|
||||
|
|
@ -1662,7 +1744,7 @@ class HandlingTasks:
|
|||
self.workflow.status = 'ready'
|
||||
|
||||
# Update workflow object in database with reset values
|
||||
self.chatInterface.updateWorkflow(self.workflow.id, {
|
||||
services.chatInterface.updateWorkflow(self.workflow.id, {
|
||||
"currentRound": 0,
|
||||
"currentTask": 0,
|
||||
"currentAction": 0,
|
||||
|
|
@ -1673,4 +1755,104 @@ class HandlingTasks:
|
|||
|
||||
logger.info("Workflow reset for new session - all values set to initial state and updated in database")
|
||||
except Exception as e:
|
||||
logger.error(f"Error resetting workflow for new session: {str(e)}")
|
||||
logger.error(f"Error resetting workflow for new session: {str(e)}")
|
||||
|
||||
# ===== Functions moved from serviceCenter =====
|
||||
|
||||
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""Execute a method action"""
|
||||
try:
|
||||
if methodName not in methods:
|
||||
raise ValueError(f"Unknown method: {methodName}")
|
||||
|
||||
method = methods[methodName]
|
||||
if actionName not in method['actions']:
|
||||
raise ValueError(f"Unknown action: {actionName} for method {methodName}")
|
||||
|
||||
action = method['actions'][actionName]
|
||||
|
||||
# Execute the action
|
||||
return await action['method'](parameters)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing method {methodName}.{actionName}: {str(e)}")
|
||||
raise
|
||||
|
||||
def writeTraceLog(self, contextText: str, data: Any) -> None:
|
||||
"""Write trace data to configured trace file if in debug mode"""
|
||||
try:
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, UTC
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
# Only write if logger is in debug mode
|
||||
if logger.level > logging.DEBUG:
|
||||
return
|
||||
|
||||
# Get log directory from configuration
|
||||
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
|
||||
if not os.path.isabs(logDir):
|
||||
# If relative path, make it relative to the gateway directory
|
||||
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
logDir = os.path.join(gatewayDir, logDir)
|
||||
|
||||
# Ensure log directory exists
|
||||
os.makedirs(logDir, exist_ok=True)
|
||||
|
||||
# Create trace file path
|
||||
trace_file = os.path.join(logDir, "log_trace.log")
|
||||
|
||||
# Format the trace entry
|
||||
timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||
trace_entry = f"[{timestamp}] {contextText}\n"
|
||||
|
||||
# Add data if provided
|
||||
if data is not None:
|
||||
if isinstance(data, (dict, list)):
|
||||
import json
|
||||
trace_entry += f"Data: {json.dumps(data, indent=2, default=str)}\n"
|
||||
else:
|
||||
trace_entry += f"Data: {str(data)}\n"
|
||||
|
||||
trace_entry += "-" * 80 + "\n\n"
|
||||
|
||||
# Write to trace file
|
||||
with open(trace_file, "a", encoding="utf-8") as f:
|
||||
f.write(trace_entry)
|
||||
|
||||
except Exception as e:
|
||||
# Don't log trace errors to avoid recursion
|
||||
pass
|
||||
|
||||
def clearTraceLog(self) -> None:
|
||||
"""Clear the trace log file"""
|
||||
try:
|
||||
import logging
|
||||
import os
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
# Get log directory from configuration
|
||||
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
|
||||
if not os.path.isabs(logDir):
|
||||
# If relative path, make it relative to the gateway directory
|
||||
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
logDir = os.path.join(gatewayDir, logDir)
|
||||
|
||||
# Create trace file path
|
||||
trace_file = os.path.join(logDir, "log_trace.log")
|
||||
|
||||
# Only clear if logger is in debug mode
|
||||
if logger.level > logging.DEBUG:
|
||||
# Delete file if not in debug mode
|
||||
if os.path.exists(trace_file):
|
||||
os.remove(trace_file)
|
||||
return
|
||||
|
||||
# Create empty file if in debug mode
|
||||
with open(trace_file, "w", encoding="utf-8") as f:
|
||||
f.write("")
|
||||
|
||||
except Exception as e:
|
||||
# Don't log trace errors to avoid recursion
|
||||
pass
|
||||
|
|
@ -3,13 +3,177 @@
|
|||
|
||||
import json
|
||||
import logging
|
||||
import importlib
|
||||
import pkgutil
|
||||
import inspect
|
||||
from typing import Any, Dict, List
|
||||
from modules.interfaces.interfaceChatModel import TaskContext, ReviewContext
|
||||
from modules.interfaces.interfaceChatModel import TaskContext, ReviewContext, ChatDocument, DocumentExchange
|
||||
from modules.services.serviceDocument.documentUtility import getFileExtension
|
||||
from modules.workflows.methods.methodBase import MethodBase
|
||||
|
||||
# Set up logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Global methods catalog - moved from serviceCenter
|
||||
methods = {}
|
||||
|
||||
def _discoverMethods(service_center):
|
||||
"""Dynamically discover all method classes and their actions in modules methods package"""
|
||||
try:
|
||||
# Import the methods package
|
||||
methodsPackage = importlib.import_module('modules.workflows.methods')
|
||||
|
||||
# Discover all modules in the package
|
||||
for _, name, isPkg in pkgutil.iter_modules(methodsPackage.__path__):
|
||||
if not isPkg and name.startswith('method'):
|
||||
try:
|
||||
# Import the module
|
||||
module = importlib.import_module(f'modules.workflows.methods.{name}')
|
||||
|
||||
# Find all classes in the module that inherit from MethodBase
|
||||
for itemName, item in inspect.getmembers(module):
|
||||
if (inspect.isclass(item) and
|
||||
issubclass(item, MethodBase) and
|
||||
item != MethodBase):
|
||||
# Instantiate the method
|
||||
methodInstance = item(service_center)
|
||||
|
||||
# Discover actions from public methods
|
||||
actions = {}
|
||||
for methodName, method in inspect.getmembers(type(methodInstance), predicate=inspect.iscoroutinefunction):
|
||||
if not methodName.startswith('_'):
|
||||
# Bind the method to the instance
|
||||
bound_method = method.__get__(methodInstance, type(methodInstance))
|
||||
sig = inspect.signature(method)
|
||||
params = {}
|
||||
for paramName, param in sig.parameters.items():
|
||||
if paramName not in ['self']:
|
||||
# Get parameter type
|
||||
paramType = param.annotation if param.annotation != param.empty else Any
|
||||
|
||||
# Get parameter description from docstring or default
|
||||
paramDesc = None
|
||||
if param.default != param.empty and hasattr(param.default, '__doc__'):
|
||||
paramDesc = param.default.__doc__
|
||||
|
||||
params[paramName] = {
|
||||
'type': paramType,
|
||||
'required': param.default == param.empty,
|
||||
'description': paramDesc,
|
||||
'default': param.default if param.default != param.empty else None
|
||||
}
|
||||
|
||||
actions[methodName] = {
|
||||
'description': method.__doc__ or '',
|
||||
'parameters': params,
|
||||
'method': bound_method
|
||||
}
|
||||
|
||||
# Add method instance with discovered actions
|
||||
methods[methodInstance.name] = {
|
||||
'instance': methodInstance,
|
||||
'description': methodInstance.description,
|
||||
'actions': actions
|
||||
}
|
||||
logger.info(f"Discovered method: {methodInstance.name} with {len(actions)} actions")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading method module {name}: {str(e)}", exc_info=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error discovering methods: {str(e)}")
|
||||
|
||||
def getMethodsList(service_center) -> List[str]:
|
||||
"""Get list of available methods with their signatures in the required format"""
|
||||
# Initialize methods if not already done
|
||||
if not methods:
|
||||
_discoverMethods(service_center)
|
||||
|
||||
methodList = []
|
||||
for methodName, method in methods.items():
|
||||
methodInstance = method['instance']
|
||||
for actionName, action in method['actions'].items():
|
||||
# Use the new signature format from MethodBase
|
||||
signature = methodInstance.getActionSignature(actionName)
|
||||
if signature:
|
||||
methodList.append(signature)
|
||||
return methodList
|
||||
|
||||
def getEnhancedDocumentContext(service_center) -> str:
|
||||
"""Get enhanced document context formatted for action planning prompts with proper docList and docItem references"""
|
||||
try:
|
||||
document_list = service_center.getDocumentReferenceList()
|
||||
|
||||
# Build technical context string for AI action planning
|
||||
context = "AVAILABLE DOCUMENTS:\n\n"
|
||||
|
||||
# Process chat exchanges (current round)
|
||||
if document_list["chat"]:
|
||||
context += "CURRENT ROUND DOCUMENTS:\n"
|
||||
for exchange in document_list["chat"]:
|
||||
# Generate docList reference for the exchange (using message ID and label)
|
||||
# Find the message that corresponds to this exchange
|
||||
message_id = None
|
||||
for message in service_center.workflow.messages:
|
||||
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange.documentsLabel:
|
||||
message_id = message.id
|
||||
break
|
||||
|
||||
if message_id:
|
||||
doc_list_ref = f"docList:{message_id}:{exchange.documentsLabel}"
|
||||
else:
|
||||
# Fallback to label-only format if message ID not found
|
||||
doc_list_ref = f"docList:{exchange.documentsLabel}"
|
||||
|
||||
logger.debug(f"Using document label for action planning: {exchange.documentsLabel} (message_id: {message_id})")
|
||||
context += f"- {doc_list_ref} contains:\n"
|
||||
# Generate docItem references for each document in the list
|
||||
for doc_ref in exchange.documents:
|
||||
if doc_ref.startswith("docItem:"):
|
||||
context += f" - {doc_ref}\n"
|
||||
else:
|
||||
# Convert to proper docItem format if needed
|
||||
context += f" - docItem:{doc_ref}\n"
|
||||
context += "\n"
|
||||
|
||||
# Process history exchanges (previous rounds)
|
||||
if document_list["history"]:
|
||||
context += "WORKFLOW HISTORY DOCUMENTS:\n"
|
||||
for exchange in document_list["history"]:
|
||||
# Generate docList reference for the exchange (using message ID and label)
|
||||
# Find the message that corresponds to this exchange
|
||||
message_id = None
|
||||
for message in service_center.workflow.messages:
|
||||
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange.documentsLabel:
|
||||
message_id = message.id
|
||||
break
|
||||
|
||||
if message_id:
|
||||
doc_list_ref = f"docList:{message_id}:{exchange.documentsLabel}"
|
||||
else:
|
||||
# Fallback to label-only format if message ID not found
|
||||
doc_list_ref = f"docList:{exchange.documentsLabel}"
|
||||
|
||||
logger.debug(f"Using history document label for action planning: {exchange.documentsLabel} (message_id: {message_id})")
|
||||
context += f"- {doc_list_ref} contains:\n"
|
||||
# Generate docItem references for each document in the list
|
||||
for doc_ref in exchange.documents:
|
||||
if doc_ref.startswith("docItem:"):
|
||||
context += f" - {doc_ref}\n"
|
||||
else:
|
||||
# Convert to proper docItem format if needed
|
||||
context += f" - docItem:{doc_ref}\n"
|
||||
context += "\n"
|
||||
|
||||
if not document_list["chat"] and not document_list["history"]:
|
||||
context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
|
||||
|
||||
return context
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating enhanced document context: {str(e)}")
|
||||
return "NO DOCUMENTS AVAILABLE - Error generating document context."
|
||||
|
||||
# Prompt creation helpers
|
||||
|
||||
def _getAvailableDocuments(workflow) -> str:
|
||||
|
|
@ -275,7 +439,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
|
||||
async def createActionDefinitionPrompt(context: TaskContext, service) -> str:
|
||||
"""Create enhanced prompt for action generation with user-friendly messages and enhanced document context"""
|
||||
methodList = service.getMethodsList()
|
||||
methodList = getMethodsList(service)
|
||||
method_actions = {}
|
||||
for sig in methodList:
|
||||
if '.' in sig:
|
||||
|
|
@ -283,10 +447,10 @@ async def createActionDefinitionPrompt(context: TaskContext, service) -> str:
|
|||
action = rest.split('(')[0]
|
||||
method_actions.setdefault(method, []).append((action, sig))
|
||||
|
||||
messageSummary = await service.summarizeChat(context.workflow.messages) if context.workflow else ""
|
||||
messageSummary = await service.methodService.summarizeChat(context.workflow.messages) if context.workflow else ""
|
||||
|
||||
# Get enhanced document context using the new method
|
||||
available_documents_str = service.getEnhancedDocumentContext()
|
||||
available_documents_str = getEnhancedDocumentContext(service)
|
||||
|
||||
# Get available documents and connections using generic functions
|
||||
available_docs_summary = _getAvailableDocuments(context.workflow)
|
||||
|
|
@ -299,7 +463,7 @@ async def createActionDefinitionPrompt(context: TaskContext, service) -> str:
|
|||
for method, actions in method_actions.items():
|
||||
available_methods_json[method] = {}
|
||||
# Get the method instance for accessing docstrings
|
||||
method_instance = service.methods.get(method, {}).get('instance') if hasattr(service, 'methods') else None
|
||||
method_instance = methods.get(method, {}).get('instance') if methods else None
|
||||
|
||||
for action, sig in actions:
|
||||
# Parse the signature to extract parameters
|
||||
|
|
@ -405,7 +569,7 @@ Previous review feedback:
|
|||
user_language = service.user.language if service and service.user else 'en'
|
||||
|
||||
# Get current workflow context for dynamic examples
|
||||
workflow_context = service.getWorkflowContext()
|
||||
workflow_context = service.methodService.getWorkflowContext()
|
||||
current_round = workflow_context.get('currentRound', 0)
|
||||
current_task = workflow_context.get('currentTask', 1)
|
||||
|
||||
|
|
@ -730,7 +894,7 @@ def createResultReviewPrompt(context: ReviewContext, service) -> str:
|
|||
document_validation_summary += f" - No documents produced\n"
|
||||
|
||||
# Get enhanced document context using the new method
|
||||
document_context = service.getEnhancedDocumentContext()
|
||||
document_context = getEnhancedDocumentContext(service)
|
||||
|
||||
# Get user language from service
|
||||
user_language = service.user.language if service and service.user else 'en'
|
||||
|
|
@ -837,7 +1001,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
def _build_tiny_catalog(service) -> str:
|
||||
"""Return minimal tool catalog: method -> { action -> [paramNames] }"""
|
||||
try:
|
||||
method_signatures = service.getMethodsList()
|
||||
method_signatures = getMethodsList(service)
|
||||
except Exception:
|
||||
method_signatures = []
|
||||
catalog: Dict[str, Dict[str, List[str]]] = {}
|
||||
|
|
@ -890,8 +1054,8 @@ def createActionParameterPrompt(context: TaskContext, selected_action: Dict[str,
|
|||
|
||||
# Get action signature from service center
|
||||
action_signature = ""
|
||||
if service and hasattr(service, 'methods') and method in service.methods:
|
||||
method_instance = service.methods[method]['instance']
|
||||
if service and method in methods:
|
||||
method_instance = methods[method]['instance']
|
||||
action_signature = method_instance.getActionSignature(name)
|
||||
|
||||
return f"""Provide only the required parameters for this action.
|
||||
|
|
@ -6,20 +6,20 @@ import asyncio
|
|||
|
||||
from modules.interfaces.interfaceAppObjects import User
|
||||
|
||||
from modules.interfaces.interfaceChatModel import (UserInputRequest, ChatMessage, ChatWorkflow, TaskItem, TaskStatus)
|
||||
from modules.interfaces.interfaceChatModel import (UserInputRequest, ChatMessage, ChatWorkflow, TaskItem, TaskStatus, ChatDocument)
|
||||
from modules.interfaces.interfaceChatObjects import ChatObjects
|
||||
from modules.workflows._transfer.handlingTasks import HandlingTasks, WorkflowStoppedException
|
||||
from modules.workflows.processing.handlingTasks import HandlingTasks, WorkflowStoppedException
|
||||
from modules.interfaces.interfaceChatModel import WorkflowResult
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
import uuid
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class WorkflowManager:
|
||||
"""Manager for workflow processing and coordination"""
|
||||
|
||||
def __init__(self, chatInterface: ChatObjects, currentUser: User):
|
||||
self.chatInterface = chatInterface
|
||||
self.currentUser = currentUser
|
||||
def __init__(self, services):
|
||||
self.services = services
|
||||
self.handlingTasks = None
|
||||
|
||||
# Exported functions
|
||||
|
|
@ -32,19 +32,22 @@ class WorkflowManager:
|
|||
currentTime = get_utc_timestamp()
|
||||
|
||||
if workflowId:
|
||||
workflow = self.chatInterface.getWorkflow(workflowId)
|
||||
workflow = self.services.getWorkflow(workflowId)
|
||||
if not workflow:
|
||||
raise ValueError(f"Workflow {workflowId} not found")
|
||||
|
||||
# Add workflow to services
|
||||
self.services.workflow = workflow
|
||||
|
||||
if workflow.status == "running":
|
||||
logger.info(f"Stopping running workflow {workflowId} before processing new prompt")
|
||||
workflow.status = "stopped"
|
||||
workflow.lastActivity = currentTime
|
||||
self.chatInterface.updateWorkflow(workflowId, {
|
||||
self.services.updateWorkflow(workflowId, {
|
||||
"status": "stopped",
|
||||
"lastActivity": currentTime
|
||||
})
|
||||
self.chatInterface.createLog({
|
||||
self.services.createLog({
|
||||
"workflowId": workflowId,
|
||||
"message": "Workflow stopped for new prompt",
|
||||
"type": "info",
|
||||
|
|
@ -54,17 +57,17 @@ class WorkflowManager:
|
|||
await asyncio.sleep(0.1)
|
||||
|
||||
newRound = workflow.currentRound + 1
|
||||
self.chatInterface.updateWorkflow(workflowId, {
|
||||
self.services.updateWorkflow(workflowId, {
|
||||
"status": "running",
|
||||
"lastActivity": currentTime,
|
||||
"currentRound": newRound
|
||||
})
|
||||
|
||||
workflow = self.chatInterface.getWorkflow(workflowId)
|
||||
workflow = self.services.getWorkflow(workflowId)
|
||||
if not workflow:
|
||||
raise ValueError(f"Failed to reload workflow {workflowId} after update")
|
||||
|
||||
self.chatInterface.createLog({
|
||||
self.services.createLog({
|
||||
"workflowId": workflowId,
|
||||
"message": f"Workflow resumed (round {workflow.currentRound})",
|
||||
"type": "info",
|
||||
|
|
@ -82,7 +85,7 @@ class WorkflowManager:
|
|||
"currentAction": 0,
|
||||
"totalTasks": 0,
|
||||
"totalActions": 0,
|
||||
"mandateId": self.chatInterface.mandateId,
|
||||
"mandateId": self.services.mandateId,
|
||||
"messageIds": [],
|
||||
"workflowMode": workflowMode,
|
||||
"maxSteps": 5 if workflowMode == "React" else 1, # Set maxSteps for React mode
|
||||
|
|
@ -96,12 +99,15 @@ class WorkflowManager:
|
|||
}
|
||||
}
|
||||
|
||||
workflow = self.chatInterface.createWorkflow(workflowData)
|
||||
workflow = self.services.createWorkflow(workflowData)
|
||||
logger.info(f"Created workflow with mode: {getattr(workflow, 'workflowMode', 'NOT_SET')}")
|
||||
logger.info(f"Workflow data passed: {workflowData.get('workflowMode', 'NOT_IN_DATA')}")
|
||||
workflow.currentRound = 1
|
||||
self.chatInterface.updateWorkflow(workflow.id, {"currentRound": 1})
|
||||
self.chatInterface.updateWorkflowStats(workflow.id, bytesSent=0, bytesReceived=0)
|
||||
self.services.updateWorkflow(workflow.id, {"currentRound": 1})
|
||||
self.services.updateWorkflowStats(workflow.id, bytesSent=0, bytesReceived=0)
|
||||
|
||||
# Add workflow to services
|
||||
self.services.workflow = workflow
|
||||
|
||||
# Start workflow processing asynchronously
|
||||
asyncio.create_task(self._workflowProcess(userInput, workflow))
|
||||
|
|
@ -114,17 +120,17 @@ class WorkflowManager:
|
|||
async def workflowStop(self, workflowId: str) -> ChatWorkflow:
|
||||
"""Stops a running workflow."""
|
||||
try:
|
||||
workflow = self.chatInterface.getWorkflow(workflowId)
|
||||
workflow = self.services.getWorkflow(workflowId)
|
||||
if not workflow:
|
||||
raise ValueError(f"Workflow {workflowId} not found")
|
||||
|
||||
workflow.status = "stopped"
|
||||
workflow.lastActivity = get_utc_timestamp()
|
||||
self.chatInterface.updateWorkflow(workflowId, {
|
||||
self.services.updateWorkflow(workflowId, {
|
||||
"status": "stopped",
|
||||
"lastActivity": workflow.lastActivity
|
||||
})
|
||||
self.chatInterface.createLog({
|
||||
self.services.createLog({
|
||||
"workflowId": workflowId,
|
||||
"message": "Workflow stopped",
|
||||
"type": "warning",
|
||||
|
|
@ -141,8 +147,7 @@ class WorkflowManager:
|
|||
async def _workflowProcess(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> None:
|
||||
"""Process a workflow with user input"""
|
||||
try:
|
||||
self.handlingTasks = HandlingTasks(self.chatInterface, self.currentUser, workflow)
|
||||
self.handlingTasks.service.setUserLanguage(userInput.userLanguage)
|
||||
self.handlingTasks = HandlingTasks(self.services, workflow)
|
||||
message = await self._sendFirstMessage(userInput, workflow)
|
||||
task_plan = await self._planTasks(userInput, workflow)
|
||||
workflow_result = await self._executeTasks(task_plan, workflow)
|
||||
|
|
@ -187,20 +192,20 @@ class WorkflowManager:
|
|||
}
|
||||
|
||||
# Create message first to get messageId
|
||||
message = self.chatInterface.createMessage(messageData)
|
||||
message = self.services.createMessage(messageData)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
||||
# Clear trace log for new workflow session
|
||||
self.handlingTasks.service.clearTraceLog()
|
||||
self.handlingTasks.clearTraceLog()
|
||||
|
||||
# Add documents if any, now with messageId
|
||||
if userInput.listFileId:
|
||||
# Process file IDs and add to message data
|
||||
documents = await self.handlingTasks.service.processFileIds(userInput.listFileId, message.id)
|
||||
documents = await self._processFileIds(userInput.listFileId, message.id)
|
||||
message.documents = documents
|
||||
# Update the message with documents in database
|
||||
self.chatInterface.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]})
|
||||
self.services.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]})
|
||||
|
||||
return message
|
||||
else:
|
||||
|
|
@ -302,14 +307,14 @@ class WorkflowManager:
|
|||
"taskProgress": "stopped",
|
||||
"actionProgress": "stopped"
|
||||
}
|
||||
message = self.chatInterface.createMessage(stopped_message)
|
||||
message = self.services.createMessage(stopped_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
||||
# Update workflow status to stopped
|
||||
workflow.status = "stopped"
|
||||
workflow.lastActivity = get_utc_timestamp()
|
||||
self.chatInterface.updateWorkflow(workflow.id, {
|
||||
self.services.updateWorkflow(workflow.id, {
|
||||
"status": "stopped",
|
||||
"lastActivity": workflow.lastActivity
|
||||
})
|
||||
|
|
@ -334,14 +339,14 @@ class WorkflowManager:
|
|||
"taskProgress": "stopped",
|
||||
"actionProgress": "stopped"
|
||||
}
|
||||
message = self.chatInterface.createMessage(stopped_message)
|
||||
message = self.services.createMessage(stopped_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
||||
# Update workflow status to stopped
|
||||
workflow.status = "stopped"
|
||||
workflow.lastActivity = get_utc_timestamp()
|
||||
self.chatInterface.updateWorkflow(workflow.id, {
|
||||
self.services.updateWorkflow(workflow.id, {
|
||||
"status": "stopped",
|
||||
"lastActivity": workflow.lastActivity,
|
||||
"totalTasks": workflow.totalTasks,
|
||||
|
|
@ -349,7 +354,7 @@ class WorkflowManager:
|
|||
})
|
||||
|
||||
# Add stopped log entry
|
||||
self.chatInterface.createLog({
|
||||
self.services.createLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": "Workflow stopped by user",
|
||||
"type": "warning",
|
||||
|
|
@ -376,14 +381,14 @@ class WorkflowManager:
|
|||
"taskProgress": "fail",
|
||||
"actionProgress": "fail"
|
||||
}
|
||||
message = self.chatInterface.createMessage(error_message)
|
||||
message = self.services.createMessage(error_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
||||
# Update workflow status to failed
|
||||
workflow.status = "failed"
|
||||
workflow.lastActivity = get_utc_timestamp()
|
||||
self.chatInterface.updateWorkflow(workflow.id, {
|
||||
self.services.updateWorkflow(workflow.id, {
|
||||
"status": "failed",
|
||||
"lastActivity": workflow.lastActivity,
|
||||
"totalTasks": workflow.totalTasks,
|
||||
|
|
@ -391,7 +396,7 @@ class WorkflowManager:
|
|||
})
|
||||
|
||||
# Add failed log entry
|
||||
self.chatInterface.createLog({
|
||||
self.services.createLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Workflow failed: {workflow_result.error or 'Unknown error'}",
|
||||
"type": "error",
|
||||
|
|
@ -423,14 +428,14 @@ class WorkflowManager:
|
|||
"taskProgress": "fail",
|
||||
"actionProgress": "fail"
|
||||
}
|
||||
message = self.chatInterface.createMessage(error_message)
|
||||
message = self.services.createMessage(error_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
||||
# Update workflow status to failed
|
||||
workflow.status = "failed"
|
||||
workflow.lastActivity = get_utc_timestamp()
|
||||
self.chatInterface.updateWorkflow(workflow.id, {
|
||||
self.services.updateWorkflow(workflow.id, {
|
||||
"status": "failed",
|
||||
"lastActivity": workflow.lastActivity,
|
||||
"totalTasks": workflow.totalTasks,
|
||||
|
|
@ -468,7 +473,7 @@ class WorkflowManager:
|
|||
}
|
||||
|
||||
# Create message using interface
|
||||
message = self.chatInterface.createMessage(messageData)
|
||||
message = self.services.createMessage(messageData)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
||||
|
|
@ -477,13 +482,13 @@ class WorkflowManager:
|
|||
workflow.lastActivity = get_utc_timestamp()
|
||||
|
||||
# Update workflow in database
|
||||
self.chatInterface.updateWorkflow(workflow.id, {
|
||||
self.services.updateWorkflow(workflow.id, {
|
||||
"status": "completed",
|
||||
"lastActivity": workflow.lastActivity
|
||||
})
|
||||
|
||||
# Add completion log entry
|
||||
self.chatInterface.createLog({
|
||||
self.services.createLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": "Workflow completed",
|
||||
"type": "success",
|
||||
|
|
@ -529,7 +534,7 @@ class WorkflowManager:
|
|||
# Update workflow status to stopped
|
||||
workflow.status = "stopped"
|
||||
workflow.lastActivity = get_utc_timestamp()
|
||||
self.chatInterface.updateWorkflow(workflow.id, {
|
||||
self.services.updateWorkflow(workflow.id, {
|
||||
"status": "stopped",
|
||||
"lastActivity": workflow.lastActivity,
|
||||
"totalTasks": workflow.totalTasks,
|
||||
|
|
@ -554,12 +559,12 @@ class WorkflowManager:
|
|||
"taskProgress": "pending",
|
||||
"actionProgress": "pending"
|
||||
}
|
||||
message = self.chatInterface.createMessage(stopped_message)
|
||||
message = self.services.createMessage(stopped_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
||||
# Add log entry
|
||||
self.chatInterface.createLog({
|
||||
self.services.createLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": "Workflow stopped by user",
|
||||
"type": "warning",
|
||||
|
|
@ -574,7 +579,7 @@ class WorkflowManager:
|
|||
# Update workflow status to failed
|
||||
workflow.status = "failed"
|
||||
workflow.lastActivity = get_utc_timestamp()
|
||||
self.chatInterface.updateWorkflow(workflow.id, {
|
||||
self.services.updateWorkflow(workflow.id, {
|
||||
"status": "failed",
|
||||
"lastActivity": workflow.lastActivity,
|
||||
"totalTasks": workflow.totalTasks,
|
||||
|
|
@ -599,12 +604,12 @@ class WorkflowManager:
|
|||
"taskProgress": "fail",
|
||||
"actionProgress": "fail"
|
||||
}
|
||||
message = self.chatInterface.createMessage(error_message)
|
||||
message = self.services.createMessage(error_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
||||
# Add error log entry
|
||||
self.chatInterface.createLog({
|
||||
self.services.createLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Workflow failed: {str(error)}",
|
||||
"type": "error",
|
||||
|
|
@ -613,3 +618,32 @@ class WorkflowManager:
|
|||
})
|
||||
|
||||
raise
|
||||
|
||||
async def _processFileIds(self, fileIds: List[str], messageId: str = None) -> List[ChatDocument]:
|
||||
"""Process file IDs from existing files and return ChatDocument objects"""
|
||||
documents = []
|
||||
for fileId in fileIds:
|
||||
try:
|
||||
# Get file info from service
|
||||
fileInfo = self.handlingTasks.service.methodService.getFileInfo(fileId)
|
||||
if fileInfo:
|
||||
# Create document directly with all file attributes
|
||||
document = ChatDocument(
|
||||
id=str(uuid.uuid4()),
|
||||
messageId=messageId or "", # Use provided messageId or empty string as fallback
|
||||
fileId=fileId,
|
||||
fileName=fileInfo.get("fileName", "unknown"),
|
||||
fileSize=fileInfo.get("size", 0),
|
||||
mimeType=fileInfo.get("mimeType", "application/octet-stream")
|
||||
)
|
||||
documents.append(document)
|
||||
logger.info(f"Processed file ID {fileId} -> {document.fileName}")
|
||||
else:
|
||||
logger.warning(f"No file info found for file ID {fileId}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file ID {fileId}: {str(e)}")
|
||||
return documents
|
||||
|
||||
def _setUserLanguage(self, language: str) -> None:
|
||||
"""Set user language for the service center"""
|
||||
self.handlingTasks.service.user.language = language
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ def parse_line(line: str) -> Tuple[Optional[str], Optional[str], Optional[dateti
|
|||
Extract (logger, function, timestamp) from a log line.
|
||||
|
||||
Expected format examples (single line):
|
||||
2025-09-18 16:35:04 - INFO - modules.workflows._transfer.handlingTasks - Task 1 - Starting action 3/4 - D:\\Athi\\...\\handlingTasks.py:572 - executeTask
|
||||
2025-09-18 16:35:04 - INFO - modules.workflows.processing.handlingTasks - Task 1 - Starting action 3/4 - D:\\Athi\\...\\handlingTasks.py:572 - executeTask
|
||||
|
||||
Returns (logger, function, timestamp_dt) or (None, None, None) if not matched.
|
||||
"""
|
||||
|
|
|
|||
Loading…
Reference in a new issue