Refactor full workflow engine 3.0
This commit is contained in:
parent
1019cb7a65
commit
472353fea0
40 changed files with 2605 additions and 3235 deletions
49
app.py
49
app.py
|
|
@ -4,7 +4,7 @@ os.environ["NUMEXPR_MAX_THREADS"] = "12"
|
||||||
from fastapi import FastAPI, HTTPException, Depends, Body, status, Response
|
from fastapi import FastAPI, HTTPException, Depends, Body, status, Response
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from zoneinfo import ZoneInfo
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from logging.handlers import RotatingFileHandler
|
from logging.handlers import RotatingFileHandler
|
||||||
|
|
@ -12,8 +12,7 @@ from datetime import timedelta, datetime
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
from modules.shared.eventManagement import eventManager
|
||||||
from apscheduler.triggers.cron import CronTrigger
|
|
||||||
|
|
||||||
|
|
||||||
class DailyRotatingFileHandler(RotatingFileHandler):
|
class DailyRotatingFileHandler(RotatingFileHandler):
|
||||||
|
|
@ -202,46 +201,15 @@ instanceLabel = APP_CONFIG.get("APP_ENV_LABEL")
|
||||||
# Define lifespan context manager for application startup/shutdown events
|
# Define lifespan context manager for application startup/shutdown events
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI):
|
async def lifespan(app: FastAPI):
|
||||||
# Startup logic
|
|
||||||
logger.info("Application is starting up")
|
logger.info("Application is starting up")
|
||||||
|
eventManager.start()
|
||||||
# Setup APScheduler for JIRA sync
|
|
||||||
scheduler = AsyncIOScheduler(timezone=ZoneInfo("Europe/Zurich"))
|
|
||||||
try:
|
|
||||||
from modules.features.syncDelta.mainSyncDelta import perform_sync_jira_delta_group
|
|
||||||
# Schedule sync every 20 minutes (at minutes 00, 20, 40)
|
|
||||||
scheduler.add_job(
|
|
||||||
perform_sync_jira_delta_group,
|
|
||||||
CronTrigger(minute="0,20,40"),
|
|
||||||
id="jira_delta_group_sync",
|
|
||||||
replace_existing=True,
|
|
||||||
coalesce=True,
|
|
||||||
max_instances=1,
|
|
||||||
misfire_grace_time=1800,
|
|
||||||
)
|
|
||||||
scheduler.start()
|
|
||||||
logger.info("APScheduler started (jira_delta_group_sync every 20 minutes at 00, 20, 40)")
|
|
||||||
|
|
||||||
# Run initial sync on startup (non-blocking failure)
|
|
||||||
try:
|
|
||||||
logger.info("Running initial JIRA sync on app startup...")
|
|
||||||
await perform_sync_jira_delta_group()
|
|
||||||
logger.info("Initial JIRA sync completed successfully")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Initial JIRA sync failed: {str(e)}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to initialize scheduler or JIRA sync: {str(e)}")
|
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
eventManager.stop()
|
||||||
# Shutdown logic
|
|
||||||
logger.info("Application has been shut down")
|
logger.info("Application has been shut down")
|
||||||
try:
|
|
||||||
if 'scheduler' in locals() and scheduler.running:
|
|
||||||
scheduler.shutdown(wait=False)
|
|
||||||
logger.info("APScheduler stopped")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error shutting down scheduler: {str(e)}")
|
|
||||||
|
|
||||||
# START APP
|
# START APP
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
|
|
@ -250,7 +218,6 @@ app = FastAPI(
|
||||||
lifespan=lifespan
|
lifespan=lifespan
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Parse CORS origins from environment variable
|
# Parse CORS origins from environment variable
|
||||||
def get_allowed_origins():
|
def get_allowed_origins():
|
||||||
origins_str = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080")
|
origins_str = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080")
|
||||||
|
|
|
||||||
|
|
@ -24,8 +24,11 @@ async def chatStart(interfaceChat, currentUser: User, userInput: UserInputReques
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
from modules.workflows.workflowManager import WorkflowManager
|
from modules.workflows.workflowManager import WorkflowManager
|
||||||
workflowManager = WorkflowManager(interfaceChat, currentUser)
|
from modules.services import getInterface as getServices
|
||||||
return await workflowManager.workflowStart(userInput, workflowId, workflowMode)
|
services = getServices(currentUser, None)
|
||||||
|
workflowManager = WorkflowManager(services)
|
||||||
|
workflow = await workflowManager.workflowStart(userInput, workflowId, workflowMode)
|
||||||
|
return workflow
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error starting chat: {str(e)}")
|
logger.error(f"Error starting chat: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
@ -34,7 +37,9 @@ async def chatStop(interfaceChat, currentUser: User, workflowId: str) -> ChatWor
|
||||||
"""Stops a running chat."""
|
"""Stops a running chat."""
|
||||||
try:
|
try:
|
||||||
from modules.workflows.workflowManager import WorkflowManager
|
from modules.workflows.workflowManager import WorkflowManager
|
||||||
workflowManager = WorkflowManager(interfaceChat, currentUser)
|
from modules.services import getInterface as getServices
|
||||||
|
services = getServices(currentUser, None)
|
||||||
|
workflowManager = WorkflowManager(services)
|
||||||
return await workflowManager.workflowStop(workflowId)
|
return await workflowManager.workflowStop(workflowId)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error stopping chat: {str(e)}")
|
logger.error(f"Error stopping chat: {str(e)}")
|
||||||
|
|
|
||||||
|
|
@ -1,587 +1,285 @@
|
||||||
"""
|
|
||||||
Data Neutralization Service
|
|
||||||
Handles file processing for data neutralization including SharePoint integration
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
from typing import Any, Dict, List, Optional
|
||||||
import uuid
|
|
||||||
from typing import Dict, List, Any, Optional, Tuple
|
|
||||||
from datetime import datetime
|
|
||||||
from pathlib import Path
|
|
||||||
import mimetypes
|
|
||||||
|
|
||||||
from modules.interfaces.interfaceAppObjects import getInterface
|
from modules.interfaces.interfaceAppModel import User
|
||||||
from modules.interfaces.interfaceAppModel import User, DataNeutraliserConfig, DataNeutralizerAttributes
|
from modules.services.serviceNeutralization.mainNeutralization import NeutralizationService
|
||||||
from modules.services.serviceNeutralization.neutralizer import DataAnonymizer
|
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class NeutralizationService:
|
|
||||||
"""Service for handling data neutralization operations"""
|
|
||||||
|
|
||||||
def __init__(self, current_user: User):
|
class NeutralizationPlayground:
|
||||||
"""Initialize the service with user context"""
|
"""Feature/UI wrapper around NeutralizationService for playground & routes."""
|
||||||
self.current_user = current_user
|
|
||||||
self.app_interface = getInterface(current_user)
|
|
||||||
|
|
||||||
def get_config(self) -> Optional[DataNeutraliserConfig]:
|
def __init__(self, currentUser: User):
|
||||||
"""Get the neutralization configuration for the current user's mandate"""
|
self.currentUser = currentUser
|
||||||
return self.app_interface.getNeutralizationConfig()
|
self.service = NeutralizationService(currentUser)
|
||||||
|
|
||||||
def save_config(self, config_data: Dict[str, Any]) -> DataNeutraliserConfig:
|
def processText(self, text: str) -> Dict[str, Any]:
|
||||||
"""Save or update the neutralization configuration"""
|
return self.service.processText(text)
|
||||||
return self.app_interface.createOrUpdateNeutralizationConfig(config_data)
|
|
||||||
|
|
||||||
def neutralize_text(self, text: str, file_id: Optional[str] = None) -> Dict[str, Any]:
|
def processFiles(self, fileIds: List[str]) -> Dict[str, Any]:
|
||||||
"""Neutralize text content and return results with attribute mappings"""
|
results: List[Dict[str, Any]] = []
|
||||||
return self.app_interface.neutralizeText(text, file_id)
|
errors: List[str] = []
|
||||||
|
for fileId in fileIds:
|
||||||
def get_attributes(self, file_id: Optional[str] = None) -> List[DataNeutralizerAttributes]:
|
|
||||||
"""Get neutralization attributes, optionally filtered by file ID"""
|
|
||||||
return self.app_interface.getNeutralizationAttributes(file_id)
|
|
||||||
|
|
||||||
def resolve_text(self, text: str) -> str:
|
|
||||||
"""Resolve UIDs in neutralized text back to original text"""
|
|
||||||
return self.app_interface.resolveNeutralizedText(text)
|
|
||||||
|
|
||||||
async def process_sharepoint_files(self, source_path: str, target_path: str) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Process files from SharePoint source path, neutralize them, and store in target path
|
|
||||||
|
|
||||||
Args:
|
|
||||||
source_path: SharePoint path to read files from
|
|
||||||
target_path: SharePoint path to store neutralized files
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary with processing results
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
logger.info(f"Processing SharePoint files from {source_path} to {target_path}")
|
res = self.service.processFile(fileId)
|
||||||
|
results.append({
|
||||||
# Get user's SharePoint connection that matches the source path
|
'file_id': fileId,
|
||||||
sharepoint_connection = await self._get_sharepoint_connection(source_path)
|
'neutralized_file_name': res.get('neutralized_file_name'),
|
||||||
if not sharepoint_connection:
|
'attributes_count': len(res.get('attributes', []))
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing file {fileId}: {str(e)}")
|
||||||
|
errors.append(f"{fileId}: {str(e)}")
|
||||||
return {
|
return {
|
||||||
"success": False,
|
'success': len(errors) == 0,
|
||||||
"message": "No SharePoint connection found for user",
|
'total_files': len(fileIds),
|
||||||
"processed_files": 0,
|
'successful_files': len(results),
|
||||||
"errors": ["No SharePoint connection found"]
|
'failed_files': len(errors),
|
||||||
|
'results': results,
|
||||||
|
'errors': errors,
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info(f"Using SharePoint connection: {sharepoint_connection.get('id')} for path: {source_path}")
|
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
|
||||||
|
from modules.features.neutralizePlayground.sharepoint import SharepointProcessor
|
||||||
|
processor = SharepointProcessor(self.currentUser, self.service)
|
||||||
|
return await processor.processSharepointFiles(sourcePath, targetPath)
|
||||||
|
|
||||||
# Get SharePoint access token
|
# Cleanup attributes
|
||||||
sharepoint_token = self.app_interface.getConnectionToken(sharepoint_connection["id"])
|
def cleanAttributes(self, fileId: str) -> bool:
|
||||||
if not sharepoint_token:
|
if not self.service.app_interface:
|
||||||
|
return False
|
||||||
|
return self.service.app_interface.deleteNeutralizationAttributes(fileId)
|
||||||
|
|
||||||
|
# Stats
|
||||||
|
def getStats(self) -> Dict[str, Any]:
|
||||||
|
try:
|
||||||
|
allAttributes = self.service._getAttributes()
|
||||||
|
patternCounts: Dict[str, int] = {}
|
||||||
|
for attr in allAttributes:
|
||||||
|
patternType = attr.patternType
|
||||||
|
patternCounts[patternType] = patternCounts.get(patternType, 0) + 1
|
||||||
|
uniqueFiles = set(attr.fileId for attr in allAttributes if attr.fileId)
|
||||||
return {
|
return {
|
||||||
"success": False,
|
'total_attributes': len(allAttributes),
|
||||||
"message": "No SharePoint access token found",
|
'unique_files': len(uniqueFiles),
|
||||||
"processed_files": 0,
|
'pattern_counts': patternCounts,
|
||||||
"errors": ["No SharePoint access token found"]
|
'mandate_id': self.currentUser.mandateId if self.currentUser else None,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting stats: {str(e)}")
|
||||||
|
return {
|
||||||
|
'total_attributes': 0,
|
||||||
|
'unique_files': 0,
|
||||||
|
'pattern_counts': {},
|
||||||
|
'error': str(e),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Process files asynchronously
|
|
||||||
return await self._process_sharepoint_files_async(
|
|
||||||
source_path, target_path, sharepoint_token.tokenAccess
|
|
||||||
)
|
|
||||||
|
|
||||||
|
# Internal SharePoint helper module separated to keep feature logic tidy
|
||||||
|
class SharepointProcessor:
|
||||||
|
def __init__(self, currentUser: User, service: NeutralizationService):
|
||||||
|
self.currentUser = currentUser
|
||||||
|
self.service = service
|
||||||
|
|
||||||
|
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
|
||||||
|
try:
|
||||||
|
logger.info(f"Processing SharePoint files from {sourcePath} to {targetPath}")
|
||||||
|
connection = await self._getSharepointConnection(sourcePath)
|
||||||
|
if not connection:
|
||||||
|
return {
|
||||||
|
'success': False,
|
||||||
|
'message': 'No SharePoint connection found for user',
|
||||||
|
'processed_files': 0,
|
||||||
|
'errors': ['No SharePoint connection found'],
|
||||||
|
}
|
||||||
|
from modules.security.tokenManager import TokenManager
|
||||||
|
token = TokenManager().getFreshToken(self.service.app_interface, connection['id'])
|
||||||
|
if not token:
|
||||||
|
return {
|
||||||
|
'success': False,
|
||||||
|
'message': 'No SharePoint access token found',
|
||||||
|
'processed_files': 0,
|
||||||
|
'errors': ['No SharePoint access token found'],
|
||||||
|
}
|
||||||
|
return await self._processSharepointFilesAsync(sourcePath, targetPath, token.tokenAccess)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error processing SharePoint files: {str(e)}")
|
logger.error(f"Error processing SharePoint files: {str(e)}")
|
||||||
return {
|
return {
|
||||||
"success": False,
|
'success': False,
|
||||||
"message": f"Error processing SharePoint files: {str(e)}",
|
'message': f'Error processing SharePoint files: {str(e)}',
|
||||||
"processed_files": 0,
|
'processed_files': 0,
|
||||||
"errors": [str(e)]
|
'errors': [str(e)],
|
||||||
}
|
}
|
||||||
|
|
||||||
async def _get_sharepoint_connection(self, sharepoint_path: str = None):
|
async def _getSharepointConnection(self, sharepointPath: str = None):
|
||||||
"""Get user's SharePoint connection that matches the given path"""
|
|
||||||
try:
|
try:
|
||||||
# Get all user connections
|
|
||||||
from modules.interfaces.interfaceAppModel import UserConnection
|
from modules.interfaces.interfaceAppModel import UserConnection
|
||||||
connections = self.app_interface.db.getRecordset(
|
connections = self.service.app_interface.db.getRecordset(
|
||||||
UserConnection,
|
UserConnection,
|
||||||
recordFilter={"userId": self.app_interface.userId}
|
recordFilter={"userId": self.service.app_interface.userId}
|
||||||
)
|
)
|
||||||
|
msftConnections = [c for c in connections if c.get('authority') == 'msft']
|
||||||
# Find all Microsoft connections
|
if not msftConnections:
|
||||||
msft_connections = [conn for conn in connections if conn.get("authority") == "msft"]
|
logger.warning('No Microsoft connections found for user')
|
||||||
|
return None
|
||||||
if not msft_connections:
|
if len(msftConnections) == 1:
|
||||||
logger.warning("No Microsoft connections found for user")
|
logger.info(f"Found single Microsoft connection: {msftConnections[0].get('id')}")
|
||||||
|
return msftConnections[0]
|
||||||
|
if sharepointPath:
|
||||||
|
return await self._matchConnectionToPath(msftConnections, sharepointPath)
|
||||||
|
logger.info(f"Multiple Microsoft connections found, using first one: {msftConnections[0].get('id')}")
|
||||||
|
return msftConnections[0]
|
||||||
|
except Exception:
|
||||||
|
logger.error('Error getting SharePoint connection')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if len(msft_connections) == 1:
|
async def _matchConnectionToPath(self, connections: list, sharepointPath: str):
|
||||||
logger.info(f"Found single Microsoft connection: {msft_connections[0].get('id')}")
|
|
||||||
return msft_connections[0]
|
|
||||||
|
|
||||||
# If multiple connections and we have a path, try to match
|
|
||||||
if sharepoint_path:
|
|
||||||
return await self._match_connection_to_path(msft_connections, sharepoint_path)
|
|
||||||
|
|
||||||
# If no path provided, return the first one
|
|
||||||
logger.info(f"Multiple Microsoft connections found, using first one: {msft_connections[0].get('id')}")
|
|
||||||
return msft_connections[0]
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error getting SharePoint connection: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
async def _match_connection_to_path(self, connections: list, sharepoint_path: str):
|
|
||||||
"""Match a connection to the SharePoint path by testing access"""
|
|
||||||
try:
|
try:
|
||||||
# Extract domain from the path
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
parsed_url = urlparse(sharepoint_path)
|
targetDomain = urlparse(sharepointPath).netloc.lower()
|
||||||
target_domain = parsed_url.netloc.lower()
|
logger.info(f"Looking for connection matching domain: {targetDomain}")
|
||||||
|
from modules.security.tokenManager import TokenManager
|
||||||
logger.info(f"Looking for connection matching domain: {target_domain}")
|
|
||||||
|
|
||||||
# Try each connection to see which one can access the site
|
|
||||||
for connection in connections:
|
for connection in connections:
|
||||||
try:
|
try:
|
||||||
# Get token for this connection
|
token = TokenManager().getFreshToken(self.service.app_interface, connection['id'])
|
||||||
token = self.app_interface.getConnectionToken(connection["id"])
|
|
||||||
if not token:
|
if not token:
|
||||||
continue
|
continue
|
||||||
|
if await self._testSharepointAccess(token.tokenAccess, sharepointPath):
|
||||||
# Test if this connection can access the SharePoint site
|
logger.info(f"Found matching connection for domain {targetDomain}: {connection.get('id')}")
|
||||||
if await self._test_sharepoint_access(token.tokenAccess, sharepoint_path):
|
|
||||||
logger.info(f"Found matching connection for domain {target_domain}: {connection.get('id')}")
|
|
||||||
return connection
|
return connection
|
||||||
|
except Exception:
|
||||||
except Exception as e:
|
|
||||||
continue
|
continue
|
||||||
|
logger.warning(f"No specific connection match found for {targetDomain}, using first available")
|
||||||
# If no specific match found, return the first connection
|
|
||||||
logger.warning(f"No specific connection match found for {target_domain}, using first available")
|
|
||||||
return connections[0]
|
return connections[0]
|
||||||
|
except Exception:
|
||||||
except Exception as e:
|
logger.error('Error matching connection to path')
|
||||||
logger.error(f"Error matching connection to path: {str(e)}")
|
|
||||||
return connections[0] if connections else None
|
return connections[0] if connections else None
|
||||||
|
|
||||||
async def _test_sharepoint_access(self, access_token: str, sharepoint_path: str) -> bool:
|
async def _testSharepointAccess(self, accessToken: str, sharepointPath: str) -> bool:
|
||||||
"""Test if the access token can access the given SharePoint path"""
|
|
||||||
try:
|
try:
|
||||||
return await self._test_sharepoint_access_async(access_token, sharepoint_path)
|
return await self._testSharepointAccessAsync(accessToken, sharepointPath)
|
||||||
except Exception as e:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def _test_sharepoint_access_async(self, access_token: str, sharepoint_path: str) -> bool:
|
async def _testSharepointAccessAsync(self, accessToken: str, sharepointPath: str) -> bool:
|
||||||
"""Async test for SharePoint access"""
|
|
||||||
try:
|
try:
|
||||||
from modules.connectors.connectorSharepoint import ConnectorSharepoint
|
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
|
||||||
|
connector = SharepointService(access_token=accessToken)
|
||||||
connector = ConnectorSharepoint(access_token=access_token)
|
siteUrl, _ = self._parseSharepointPath(sharepointPath)
|
||||||
|
if not siteUrl:
|
||||||
# Parse the path to get site URL
|
return False
|
||||||
site_url, _ = self._parse_sharepoint_path(sharepoint_path)
|
siteInfo = await connector.find_site_by_web_url(siteUrl)
|
||||||
if not site_url:
|
return siteInfo is not None
|
||||||
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Try to find the site
|
async def _processSharepointFilesAsync(self, sourcePath: str, targetPath: str, accessToken: str) -> Dict[str, Any]:
|
||||||
site_info = await connector.find_site_by_web_url(site_url)
|
|
||||||
return site_info is not None
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def _process_sharepoint_files_async(self, source_path: str, target_path: str, access_token: str) -> Dict[str, Any]:
|
|
||||||
"""Process SharePoint files asynchronously"""
|
|
||||||
try:
|
try:
|
||||||
import asyncio
|
import asyncio
|
||||||
from modules.connectors.connectorSharepoint import ConnectorSharepoint
|
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
|
||||||
|
connector = SharepointService(access_token=accessToken)
|
||||||
# Initialize SharePoint connector
|
sourceSite, sourceFolder = self._parseSharepointPath(sourcePath)
|
||||||
connector = ConnectorSharepoint(access_token=access_token)
|
targetSite, targetFolder = self._parseSharepointPath(targetPath)
|
||||||
|
if not sourceSite or not targetSite:
|
||||||
# Parse source and target paths to extract site and folder info
|
return {'success': False, 'message': 'Invalid SharePoint path format', 'processed_files': 0, 'errors': ['Invalid SharePoint path format']}
|
||||||
source_site, source_folder = self._parse_sharepoint_path(source_path)
|
sourceSiteInfo = await connector.find_site_by_web_url(sourceSite)
|
||||||
target_site, target_folder = self._parse_sharepoint_path(target_path)
|
if not sourceSiteInfo:
|
||||||
|
return {'success': False, 'message': f'Source site not found: {sourceSite}', 'processed_files': 0, 'errors': [f'Source site not found: {sourceSite}']}
|
||||||
if not source_site or not target_site:
|
targetSiteInfo = await connector.find_site_by_web_url(targetSite)
|
||||||
return {
|
if not targetSiteInfo:
|
||||||
"success": False,
|
return {'success': False, 'message': f'Target site not found: {targetSite}', 'processed_files': 0, 'errors': [f'Target site not found: {targetSite}']}
|
||||||
"message": "Invalid SharePoint path format",
|
logger.info(f"Listing files in folder: {sourceFolder} for site: {sourceSiteInfo['id']}")
|
||||||
"processed_files": 0,
|
files = await connector.list_folder_contents(sourceSiteInfo['id'], sourceFolder)
|
||||||
"errors": ["Invalid SharePoint path format"]
|
|
||||||
}
|
|
||||||
|
|
||||||
# Find source site
|
|
||||||
source_site_info = await connector.find_site_by_web_url(source_site)
|
|
||||||
if not source_site_info:
|
|
||||||
return {
|
|
||||||
"success": False,
|
|
||||||
"message": f"Source site not found: {source_site}",
|
|
||||||
"processed_files": 0,
|
|
||||||
"errors": [f"Source site not found: {source_site}"]
|
|
||||||
}
|
|
||||||
|
|
||||||
# Find target site
|
|
||||||
target_site_info = await connector.find_site_by_web_url(target_site)
|
|
||||||
if not target_site_info:
|
|
||||||
return {
|
|
||||||
"success": False,
|
|
||||||
"message": f"Target site not found: {target_site}",
|
|
||||||
"processed_files": 0,
|
|
||||||
"errors": [f"Target site not found: {target_site}"]
|
|
||||||
}
|
|
||||||
|
|
||||||
# List files in source folder
|
|
||||||
logger.info(f"Listing files in folder: {source_folder} for site: {source_site_info['id']}")
|
|
||||||
files = await connector.list_folder_contents(source_site_info["id"], source_folder)
|
|
||||||
|
|
||||||
# If no files found, try listing the root folder to see what's available
|
|
||||||
if not files:
|
if not files:
|
||||||
logger.warning(f"No files found in folder '{source_folder}', trying root folder")
|
logger.warning(f"No files found in folder '{sourceFolder}', trying root folder")
|
||||||
files = await connector.list_folder_contents(source_site_info["id"], "")
|
files = await connector.list_folder_contents(sourceSiteInfo['id'], '')
|
||||||
|
|
||||||
if files:
|
if files:
|
||||||
# List available folders for debugging
|
folders = [f for f in files if f.get('type') == 'folder']
|
||||||
folders = [f for f in files if f.get("type") == "folder"]
|
folderNames = [f.get('name') for f in folders]
|
||||||
folder_names = [f.get('name') for f in folders]
|
logger.info(f"Available folders in root: {folderNames}")
|
||||||
logger.info(f"Available folders in root: {folder_names}")
|
folderList = ", ".join(folderNames) if folderNames else "None"
|
||||||
|
|
||||||
# Format folder list for better UI display
|
|
||||||
folder_list = ", ".join(folder_names) if folder_names else "None"
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"success": False,
|
'success': False,
|
||||||
"message": f"Folder '{source_folder}' not found. Available folders in root: {folder_list}",
|
'message': f"Folder '{sourceFolder}' not found. Available folders in root: {folderList}",
|
||||||
"processed_files": 0,
|
'processed_files': 0,
|
||||||
"errors": [f"Folder '{source_folder}' not found. Available folders: {folder_list}"],
|
'errors': [f"Folder '{sourceFolder}' not found. Available folders: {folderList}"],
|
||||||
"available_folders": folder_names
|
'available_folders': folderNames,
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
return {
|
return {'success': False, 'message': f'No files found in source folder: {sourceFolder}', 'processed_files': 0, 'errors': [f'No files found in source folder: {sourceFolder}']}
|
||||||
"success": False,
|
|
||||||
"message": f"No files found in source folder: {source_folder}",
|
|
||||||
"processed_files": 0,
|
|
||||||
"errors": [f"No files found in source folder: {source_folder}"]
|
|
||||||
}
|
|
||||||
|
|
||||||
# Filter for text files only
|
textFiles = [f for f in files if f.get('type') == 'file']
|
||||||
text_files = [f for f in files if f.get("type") == "file" and self._is_text_file(f.get("name", ""))]
|
processed: List[Dict[str, Any]] = []
|
||||||
|
errors: List[str] = []
|
||||||
|
|
||||||
if not text_files:
|
async def _processSingle(fileInfo: Dict[str, Any]):
|
||||||
return {
|
|
||||||
"success": False,
|
|
||||||
"message": "No text files found in source folder",
|
|
||||||
"processed_files": 0,
|
|
||||||
"errors": ["No text files found in source folder"]
|
|
||||||
}
|
|
||||||
|
|
||||||
# Process files in parallel for better performance
|
|
||||||
processed_files = []
|
|
||||||
errors = []
|
|
||||||
|
|
||||||
# Create tasks for parallel processing
|
|
||||||
async def process_single_file(file_info):
|
|
||||||
"""Process a single file - download, neutralize, upload"""
|
|
||||||
try:
|
try:
|
||||||
# Download file
|
fileContent = await connector.download_file(sourceSiteInfo['id'], fileInfo['id'])
|
||||||
file_content = await connector.download_file(source_site_info["id"], file_info["id"])
|
if not fileContent:
|
||||||
if not file_content:
|
return {'error': f"Failed to download file: {fileInfo['name']}"}
|
||||||
return {"error": f"Failed to download file: {file_info['name']}"}
|
|
||||||
|
|
||||||
# Convert to text
|
|
||||||
try:
|
try:
|
||||||
text_content = file_content.decode('utf-8')
|
textContent = fileContent.decode('utf-8')
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
text_content = file_content.decode('latin-1')
|
textContent = fileContent.decode('latin-1')
|
||||||
|
result = self.service._neutralizeText(textContent, 'text')
|
||||||
# Neutralize the text
|
neutralizedFilename = f"neutralized_{fileInfo['name']}"
|
||||||
neutralization_result = self.app_interface.neutralizeText(text_content, file_info["id"])
|
uploadResult = await connector.upload_file(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8'))
|
||||||
|
if 'error' in uploadResult:
|
||||||
# Create neutralized filename
|
return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"}
|
||||||
neutralized_filename = f"neutralized_{file_info['name']}"
|
|
||||||
|
|
||||||
# Upload neutralized file
|
|
||||||
neutralized_content = neutralization_result["neutralized_text"].encode('utf-8')
|
|
||||||
upload_result = await connector.upload_file(
|
|
||||||
target_site_info["id"],
|
|
||||||
target_folder,
|
|
||||||
neutralized_filename,
|
|
||||||
neutralized_content
|
|
||||||
)
|
|
||||||
|
|
||||||
if "error" in upload_result:
|
|
||||||
return {"error": f"Failed to upload neutralized file: {neutralized_filename} - {upload_result['error']}"}
|
|
||||||
else:
|
|
||||||
return {
|
return {
|
||||||
"success": True,
|
'success': True,
|
||||||
"original_name": file_info["name"],
|
'original_name': fileInfo['name'],
|
||||||
"neutralized_name": neutralized_filename,
|
'neutralized_name': neutralizedFilename,
|
||||||
"attributes_count": len(neutralization_result.get("attributes", []))
|
'attributes_count': len(result.get('attributes', [])),
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_msg = f"Error processing file {file_info['name']}: {str(e)}"
|
return {'error': f"Error processing file {fileInfo['name']}: {str(e)}"}
|
||||||
logger.error(error_msg)
|
|
||||||
return {"error": error_msg}
|
|
||||||
|
|
||||||
# Process all files in parallel
|
tasks = [ _processSingle(f) for f in textFiles ]
|
||||||
logger.info(f"Processing {len(text_files)} files in parallel...")
|
|
||||||
tasks = [process_single_file(file_info) for file_info in text_files]
|
|
||||||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
for i, r in enumerate(results):
|
||||||
# Process results
|
if isinstance(r, Exception):
|
||||||
for i, result in enumerate(results):
|
errors.append(f"Exception processing file {textFiles[i]['name']}: {str(r)}")
|
||||||
if isinstance(result, Exception):
|
elif isinstance(r, dict) and 'error' in r:
|
||||||
error_msg = f"Exception processing file {text_files[i]['name']}: {str(result)}"
|
errors.append(r['error'])
|
||||||
errors.append(error_msg)
|
elif isinstance(r, dict) and r.get('success'):
|
||||||
logger.error(error_msg)
|
processed.append({
|
||||||
elif isinstance(result, dict) and "error" in result:
|
'original_name': r['original_name'],
|
||||||
errors.append(result["error"])
|
'neutralized_name': r['neutralized_name'],
|
||||||
elif isinstance(result, dict) and result.get("success"):
|
'attributes_count': r['attributes_count'],
|
||||||
processed_files.append({
|
|
||||||
"original_name": result["original_name"],
|
|
||||||
"neutralized_name": result["neutralized_name"],
|
|
||||||
"attributes_count": result["attributes_count"]
|
|
||||||
})
|
})
|
||||||
logger.info(f"Successfully processed file: {result['original_name']} -> {result['neutralized_name']}")
|
|
||||||
else:
|
else:
|
||||||
error_msg = f"Unknown result processing file {text_files[i]['name']}: {result}"
|
errors.append(f"Unknown result processing file {textFiles[i]['name']}: {r}")
|
||||||
errors.append(error_msg)
|
|
||||||
logger.error(error_msg)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"success": len(processed_files) > 0,
|
'success': len(processed) > 0,
|
||||||
"message": f"Processed {len(processed_files)} files successfully",
|
'message': f"Processed {len(processed)} files successfully",
|
||||||
"processed_files": len(processed_files),
|
'processed_files': len(processed),
|
||||||
"files": processed_files,
|
'files': processed,
|
||||||
"errors": errors
|
'errors': errors,
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in async SharePoint processing: {str(e)}")
|
logger.error(f"Error in async SharePoint processing: {str(e)}")
|
||||||
return {
|
return {'success': False, 'message': f'Error in async SharePoint processing: {str(e)}', 'processed_files': 0, 'errors': [str(e)]}
|
||||||
"success": False,
|
|
||||||
"message": f"Error in async SharePoint processing: {str(e)}",
|
|
||||||
"processed_files": 0,
|
|
||||||
"errors": [str(e)]
|
|
||||||
}
|
|
||||||
|
|
||||||
def _parse_sharepoint_path(self, path: str) -> tuple[str, str]:
|
def _parseSharepointPath(self, path: str) -> tuple[str, str]:
|
||||||
"""Parse SharePoint path to extract site URL and folder path"""
|
|
||||||
try:
|
try:
|
||||||
# Expected format: https://domain.sharepoint.com/sites/sitename/folder/path
|
if not path.startswith('https://'):
|
||||||
if not path.startswith("https://"):
|
|
||||||
return None, None
|
return None, None
|
||||||
|
if '?' in path:
|
||||||
# Remove query parameters
|
path = path.split('?')[0]
|
||||||
if "?" in path:
|
if '/sites/' not in path:
|
||||||
path = path.split("?")[0]
|
|
||||||
|
|
||||||
# Split by /sites/
|
|
||||||
if "/sites/" not in path:
|
|
||||||
return None, None
|
return None, None
|
||||||
|
parts = path.split('/sites/', 1)
|
||||||
parts = path.split("/sites/", 1)
|
|
||||||
if len(parts) != 2:
|
if len(parts) != 2:
|
||||||
return None, None
|
return None, None
|
||||||
|
domain = parts[0].replace('https://', '')
|
||||||
# Extract domain and site name
|
siteName = parts[1].split('/')[0]
|
||||||
domain = parts[0].replace("https://", "")
|
siteUrl = f"https://{domain}/sites/{siteName}"
|
||||||
site_name = parts[1].split("/")[0]
|
folderParts = parts[1].split('/')[1:]
|
||||||
|
|
||||||
# Create proper site URL for Graph API
|
|
||||||
site_url = f"https://{domain}/sites/{site_name}"
|
|
||||||
|
|
||||||
# Extract folder path (everything after the site name)
|
|
||||||
folder_parts = parts[1].split("/")[1:]
|
|
||||||
folder_path = "/".join(folder_parts) if folder_parts else ""
|
|
||||||
|
|
||||||
# URL decode the folder path
|
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
folder_path = unquote(folder_path)
|
folderPath = unquote('/'.join(folderParts) if folderParts else '')
|
||||||
|
return siteUrl, folderPath
|
||||||
|
except Exception:
|
||||||
return site_url, folder_path
|
logger.error(f"Error parsing SharePoint path '{path}'")
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error parsing SharePoint path '{path}': {str(e)}")
|
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
def _is_text_file(self, filename: str) -> bool:
|
|
||||||
"""Check if file is a text file based on extension"""
|
|
||||||
text_extensions = [
|
|
||||||
'.txt', '.csv', '.json', '.xml', '.md', '.log',
|
|
||||||
'.doc', '.docx', '.rtf', '.odt', # Document formats
|
|
||||||
'.html', '.htm', '.css', '.js', '.ts', '.py', '.java', '.cpp', '.c', '.h', # Code files
|
|
||||||
'.ini', '.cfg', '.conf', '.properties', # Config files
|
|
||||||
'.sql', '.yaml', '.yml', '.toml', # Data/config files
|
|
||||||
'.ps1', '.bat', '.sh', '.bash' # Script files
|
|
||||||
]
|
|
||||||
return any(filename.lower().endswith(ext) for ext in text_extensions)
|
|
||||||
|
|
||||||
def process_file_content(self, file_content: bytes, file_name: str, mime_type: str) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Process file content for neutralization
|
|
||||||
|
|
||||||
Args:
|
|
||||||
file_content: Binary file content
|
|
||||||
file_name: Name of the file
|
|
||||||
mime_type: MIME type of the file
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary with neutralization results
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Determine content type based on MIME type
|
|
||||||
content_type = self._get_content_type_from_mime(mime_type)
|
|
||||||
|
|
||||||
# Decode content to text
|
|
||||||
try:
|
|
||||||
text_content = file_content.decode('utf-8')
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
# Try with different encodings
|
|
||||||
for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
|
|
||||||
try:
|
|
||||||
text_content = file_content.decode(encoding)
|
|
||||||
break
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
raise ValueError("Unable to decode file content")
|
|
||||||
|
|
||||||
# Generate a temporary file ID for tracking
|
|
||||||
temp_file_id = str(uuid.uuid4())
|
|
||||||
|
|
||||||
# Neutralize the content
|
|
||||||
neutralization_result = self.neutralize_text(text_content, temp_file_id)
|
|
||||||
|
|
||||||
# Encode the neutralized content back to bytes
|
|
||||||
neutralized_content = neutralization_result["neutralized_text"].encode('utf-8')
|
|
||||||
|
|
||||||
# Generate neutralized file name
|
|
||||||
neutralized_file_name = f"neutralized_{file_name}"
|
|
||||||
|
|
||||||
return {
|
|
||||||
"success": True,
|
|
||||||
"original_content": text_content,
|
|
||||||
"neutralized_content": neutralization_result["neutralized_text"],
|
|
||||||
"neutralized_file_name": neutralized_file_name,
|
|
||||||
"attributes": neutralization_result["attributes"],
|
|
||||||
"mapping": neutralization_result["mapping"],
|
|
||||||
"file_id": temp_file_id
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error processing file content: {str(e)}")
|
|
||||||
return {
|
|
||||||
"success": False,
|
|
||||||
"error": str(e),
|
|
||||||
"original_content": None,
|
|
||||||
"neutralized_content": None
|
|
||||||
}
|
|
||||||
|
|
||||||
def _get_content_type_from_mime(self, mime_type: str) -> str:
|
|
||||||
"""Determine content type from MIME type for neutralization processing"""
|
|
||||||
if mime_type.startswith('text/'):
|
|
||||||
return 'text'
|
|
||||||
elif mime_type in ['application/json', 'application/xml', 'text/xml']:
|
|
||||||
return 'json' if 'json' in mime_type else 'xml'
|
|
||||||
elif mime_type in ['text/csv', 'application/csv']:
|
|
||||||
return 'csv'
|
|
||||||
else:
|
|
||||||
return 'text' # Default to text processing
|
|
||||||
|
|
||||||
def batch_neutralize_files(self, files_data: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Process multiple files for neutralization
|
|
||||||
|
|
||||||
Args:
|
|
||||||
files_data: List of dictionaries containing file information
|
|
||||||
Each dict should have: content, name, mime_type
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dictionary with batch processing results
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
results = []
|
|
||||||
total_files = len(files_data)
|
|
||||||
successful_files = 0
|
|
||||||
errors = []
|
|
||||||
|
|
||||||
for file_data in files_data:
|
|
||||||
try:
|
|
||||||
result = self.process_file_content(
|
|
||||||
file_data['content'],
|
|
||||||
file_data['name'],
|
|
||||||
file_data['mime_type']
|
|
||||||
)
|
|
||||||
|
|
||||||
if result['success']:
|
|
||||||
successful_files += 1
|
|
||||||
results.append({
|
|
||||||
'file_name': file_data['name'],
|
|
||||||
'neutralized_file_name': result['neutralized_file_name'],
|
|
||||||
'file_id': result['file_id'],
|
|
||||||
'attributes_count': len(result['attributes'])
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
errors.append(f"Failed to process {file_data['name']}: {result['error']}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
error_msg = f"Error processing {file_data['name']}: {str(e)}"
|
|
||||||
errors.append(error_msg)
|
|
||||||
logger.error(error_msg)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"success": len(errors) == 0,
|
|
||||||
"total_files": total_files,
|
|
||||||
"successful_files": successful_files,
|
|
||||||
"failed_files": len(errors),
|
|
||||||
"results": results,
|
|
||||||
"errors": errors
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in batch neutralization: {str(e)}")
|
|
||||||
return {
|
|
||||||
"success": False,
|
|
||||||
"total_files": len(files_data),
|
|
||||||
"successful_files": 0,
|
|
||||||
"failed_files": len(files_data),
|
|
||||||
"results": [],
|
|
||||||
"errors": [str(e)]
|
|
||||||
}
|
|
||||||
|
|
||||||
def cleanup_file_attributes(self, file_id: str) -> bool:
|
|
||||||
"""Clean up neutralization attributes for a specific file"""
|
|
||||||
return self.app_interface.deleteNeutralizationAttributes(file_id)
|
|
||||||
|
|
||||||
def get_processing_stats(self) -> Dict[str, Any]:
|
|
||||||
"""Get statistics about neutralization processing"""
|
|
||||||
try:
|
|
||||||
# Get all attributes for the current mandate
|
|
||||||
all_attributes = self.get_attributes()
|
|
||||||
|
|
||||||
# Group by pattern type
|
|
||||||
pattern_counts = {}
|
|
||||||
for attr in all_attributes:
|
|
||||||
pattern_type = attr.patternType
|
|
||||||
pattern_counts[pattern_type] = pattern_counts.get(pattern_type, 0) + 1
|
|
||||||
|
|
||||||
# Get unique files
|
|
||||||
unique_files = set(attr.fileId for attr in all_attributes if attr.fileId)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"total_attributes": len(all_attributes),
|
|
||||||
"unique_files": len(unique_files),
|
|
||||||
"pattern_counts": pattern_counts,
|
|
||||||
"mandate_id": self.current_user.mandateId
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error getting processing stats: {str(e)}")
|
|
||||||
return {
|
|
||||||
"total_attributes": 0,
|
|
||||||
"unique_files": 0,
|
|
||||||
"pattern_counts": {},
|
|
||||||
"error": str(e)
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import csv
|
||||||
import io
|
import io
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
from typing import Dict, Any, List, Optional
|
from typing import Dict, Any, List, Optional
|
||||||
from modules.connectors.connectorSharepoint import ConnectorSharepoint
|
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
|
||||||
from modules.connectors.connectorTicketJira import ConnectorTicketJira
|
from modules.connectors.connectorTicketJira import ConnectorTicketJira
|
||||||
from modules.interfaces.interfaceAppObjects import getRootInterface
|
from modules.interfaces.interfaceAppObjects import getRootInterface
|
||||||
from modules.interfaces.interfaceAppModel import UserInDB
|
from modules.interfaces.interfaceAppModel import UserInDB
|
||||||
|
|
@ -232,6 +232,10 @@ class ManagerSyncDelta:
|
||||||
self.jira_connector = None
|
self.jira_connector = None
|
||||||
self.sharepoint_connector = None
|
self.sharepoint_connector = None
|
||||||
self.target_site = None
|
self.target_site = None
|
||||||
|
# Initialize centralized services with root user
|
||||||
|
from modules.services import getInterface as getServices
|
||||||
|
root_user = self.root_interface.getUserByUsername("admin")
|
||||||
|
self.services = getServices(root_user, None)
|
||||||
|
|
||||||
def get_sync_file_name(self) -> str:
|
def get_sync_file_name(self) -> str:
|
||||||
"""Get the appropriate sync file name based on the sync mode."""
|
"""Get the appropriate sync file name based on the sync mode."""
|
||||||
|
|
@ -294,8 +298,9 @@ class ManagerSyncDelta:
|
||||||
|
|
||||||
logger.info(f"Found SharePoint connection: {sharepoint_connection.id}")
|
logger.info(f"Found SharePoint connection: {sharepoint_connection.id}")
|
||||||
|
|
||||||
# Get SharePoint token for this connection
|
# Get fresh SharePoint token for this connection
|
||||||
sharepoint_token = self.root_interface.getConnectionToken(sharepoint_connection.id)
|
from modules.security.tokenManager import TokenManager
|
||||||
|
sharepoint_token = TokenManager().getFreshToken(self.root_interface, sharepoint_connection.id)
|
||||||
if not sharepoint_token:
|
if not sharepoint_token:
|
||||||
logger.error("No SharePoint token found for Delta Group user connection")
|
logger.error("No SharePoint token found for Delta Group user connection")
|
||||||
return False
|
return False
|
||||||
|
|
@ -303,7 +308,7 @@ class ManagerSyncDelta:
|
||||||
logger.info(f"Found SharePoint token: {sharepoint_token.id}")
|
logger.info(f"Found SharePoint token: {sharepoint_token.id}")
|
||||||
|
|
||||||
# Initialize SharePoint connector with Graph API
|
# Initialize SharePoint connector with Graph API
|
||||||
self.sharepoint_connector = ConnectorSharepoint(access_token=sharepoint_token.tokenAccess)
|
self.sharepoint_connector = SharepointService(access_token=sharepoint_token.tokenAccess)
|
||||||
|
|
||||||
# Resolve the site by hostname + site path to get the real site ID
|
# Resolve the site by hostname + site path to get the real site ID
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
@ -552,3 +557,21 @@ async def perform_sync_jira_delta_group() -> bool:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error in perform_sync_jira_delta_group: {str(e)}")
|
logger.error(f"Error in perform_sync_jira_delta_group: {str(e)}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# Register scheduled job on import using the shared event manager
|
||||||
|
try:
|
||||||
|
from modules.shared.eventManagement import eventManager
|
||||||
|
|
||||||
|
# Schedule sync every 20 minutes (at minutes 00, 20, 40)
|
||||||
|
eventManager.register_cron(
|
||||||
|
job_id="jira_delta_group_sync",
|
||||||
|
func=perform_sync_jira_delta_group,
|
||||||
|
cron_kwargs={"minute": "0,20,40"},
|
||||||
|
replace_existing=True,
|
||||||
|
coalesce=True,
|
||||||
|
max_instances=1,
|
||||||
|
misfire_grace_time=1800,
|
||||||
|
)
|
||||||
|
logger.info("Registered jira_delta_group_sync via EventManagement (every 20 minutes)")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to register jira_delta_group_sync: {str(e)}")
|
||||||
|
|
@ -1,527 +0,0 @@
|
||||||
import logging
|
|
||||||
from typing import Dict, Any, List, Union, Optional
|
|
||||||
from modules.connectors.connectorAiOpenai import AiOpenai, ContextLengthExceededException
|
|
||||||
from modules.connectors.connectorAiAnthropic import AiAnthropic
|
|
||||||
from modules.services.serviceDocument.documentExtraction import DocumentExtraction
|
|
||||||
from modules.interfaces.interfaceChatModel import ChatDocument
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# AI Model Registry with Performance Data
|
|
||||||
AI_MODELS = {
|
|
||||||
"openai_gpt4o": {
|
|
||||||
"connector": "openai",
|
|
||||||
"max_tokens": 128000,
|
|
||||||
"cost_per_1k_tokens": 0.03, # Input
|
|
||||||
"cost_per_1k_tokens_output": 0.06, # Output
|
|
||||||
"speed_rating": 8, # 1-10
|
|
||||||
"quality_rating": 9, # 1-10
|
|
||||||
"supports_images": True,
|
|
||||||
"supports_documents": True,
|
|
||||||
"context_length": 128000,
|
|
||||||
"model_name": "gpt-4o"
|
|
||||||
},
|
|
||||||
"openai_gpt35": {
|
|
||||||
"connector": "openai",
|
|
||||||
"max_tokens": 16000,
|
|
||||||
"cost_per_1k_tokens": 0.0015,
|
|
||||||
"cost_per_1k_tokens_output": 0.002,
|
|
||||||
"speed_rating": 9,
|
|
||||||
"quality_rating": 7,
|
|
||||||
"supports_images": False,
|
|
||||||
"supports_documents": True,
|
|
||||||
"context_length": 16000,
|
|
||||||
"model_name": "gpt-3.5-turbo"
|
|
||||||
},
|
|
||||||
"anthropic_claude": {
|
|
||||||
"connector": "anthropic",
|
|
||||||
"max_tokens": 200000,
|
|
||||||
"cost_per_1k_tokens": 0.015,
|
|
||||||
"cost_per_1k_tokens_output": 0.075,
|
|
||||||
"speed_rating": 7,
|
|
||||||
"quality_rating": 10,
|
|
||||||
"supports_images": True,
|
|
||||||
"supports_documents": True,
|
|
||||||
"context_length": 200000,
|
|
||||||
"model_name": "claude-3-sonnet-20240229"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class AiCalls:
|
|
||||||
"""Interface for AI service interactions with centralized call method"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.openaiService = AiOpenai()
|
|
||||||
self.anthropicService = AiAnthropic()
|
|
||||||
self.document_extractor = DocumentExtraction()
|
|
||||||
|
|
||||||
async def callAi(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
documents: List[ChatDocument] = None,
|
|
||||||
operation_type: str = "general",
|
|
||||||
priority: str = "balanced", # "speed", "quality", "cost", "balanced"
|
|
||||||
compress_prompt: bool = True,
|
|
||||||
compress_documents: bool = True,
|
|
||||||
process_documents_individually: bool = False,
|
|
||||||
max_cost: float = None,
|
|
||||||
max_processing_time: int = None
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Zentrale AI Call Methode mit intelligenter Modell-Auswahl und Content-Verarbeitung.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: Der Hauptprompt für die AI
|
|
||||||
documents: Liste von Dokumenten zur Verarbeitung
|
|
||||||
operation_type: Art der Operation ("general", "document_analysis", "image_analysis", etc.)
|
|
||||||
priority: Priorität für Modell-Auswahl ("speed", "quality", "cost", "balanced")
|
|
||||||
compress_prompt: Ob der Prompt komprimiert werden soll
|
|
||||||
compress_documents: Ob Dokumente komprimiert werden sollen
|
|
||||||
process_documents_individually: Ob Dokumente einzeln verarbeitet werden sollen
|
|
||||||
max_cost: Maximale Kosten für den Call
|
|
||||||
max_processing_time: Maximale Verarbeitungszeit in Sekunden
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
AI Response als String
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# 1. Dokumente verarbeiten falls vorhanden
|
|
||||||
document_content = ""
|
|
||||||
if documents:
|
|
||||||
document_content = await self._process_documents_for_ai(
|
|
||||||
documents,
|
|
||||||
operation_type,
|
|
||||||
compress_documents,
|
|
||||||
process_documents_individually
|
|
||||||
)
|
|
||||||
|
|
||||||
# 2. Bestes Modell basierend auf Priorität und Content auswählen
|
|
||||||
selected_model = self._select_optimal_model(
|
|
||||||
prompt,
|
|
||||||
document_content,
|
|
||||||
priority,
|
|
||||||
operation_type,
|
|
||||||
max_cost,
|
|
||||||
max_processing_time
|
|
||||||
)
|
|
||||||
|
|
||||||
# 3. Content für das gewählte Modell optimieren
|
|
||||||
optimized_prompt, optimized_content = await self._optimize_content_for_model(
|
|
||||||
prompt,
|
|
||||||
document_content,
|
|
||||||
selected_model,
|
|
||||||
compress_prompt,
|
|
||||||
compress_documents
|
|
||||||
)
|
|
||||||
|
|
||||||
# 4. AI Call mit Failover ausführen
|
|
||||||
return await self._execute_ai_call_with_failover(
|
|
||||||
selected_model,
|
|
||||||
optimized_prompt,
|
|
||||||
optimized_content
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in centralized AI call: {str(e)}")
|
|
||||||
return f"Error: {str(e)}"
|
|
||||||
|
|
||||||
def _select_optimal_model(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
document_content: str,
|
|
||||||
priority: str,
|
|
||||||
operation_type: str,
|
|
||||||
max_cost: float = None,
|
|
||||||
max_processing_time: int = None
|
|
||||||
) -> str:
|
|
||||||
"""Wählt das optimale Modell basierend auf Priorität und Content aus"""
|
|
||||||
|
|
||||||
# Content-Größe berechnen
|
|
||||||
total_content_size = len(prompt.encode('utf-8')) + len(document_content.encode('utf-8'))
|
|
||||||
|
|
||||||
# Verfügbare Modelle filtern
|
|
||||||
available_models = {}
|
|
||||||
for model_name, model_info in AI_MODELS.items():
|
|
||||||
# Prüfe ob Modell für Content-Größe geeignet ist
|
|
||||||
if total_content_size > model_info["context_length"] * 0.8: # 80% für Content
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Prüfe Kosten-Limit
|
|
||||||
if max_cost:
|
|
||||||
estimated_cost = self._estimate_cost(model_info, total_content_size)
|
|
||||||
if estimated_cost > max_cost:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Prüfe Operation-Type Kompatibilität
|
|
||||||
if operation_type == "image_analysis" and not model_info["supports_images"]:
|
|
||||||
continue
|
|
||||||
|
|
||||||
available_models[model_name] = model_info
|
|
||||||
|
|
||||||
if not available_models:
|
|
||||||
# Fallback zum kleinsten Modell
|
|
||||||
return "openai_gpt35"
|
|
||||||
|
|
||||||
# Modell basierend auf Priorität auswählen
|
|
||||||
if priority == "speed":
|
|
||||||
return max(available_models.keys(), key=lambda x: available_models[x]["speed_rating"])
|
|
||||||
elif priority == "quality":
|
|
||||||
return max(available_models.keys(), key=lambda x: available_models[x]["quality_rating"])
|
|
||||||
elif priority == "cost":
|
|
||||||
return min(available_models.keys(), key=lambda x: available_models[x]["cost_per_1k_tokens"])
|
|
||||||
else: # balanced
|
|
||||||
# Gewichtete Bewertung: 40% Qualität, 30% Geschwindigkeit, 30% Kosten
|
|
||||||
def balanced_score(model_name):
|
|
||||||
model_info = available_models[model_name]
|
|
||||||
quality_score = model_info["quality_rating"] * 0.4
|
|
||||||
speed_score = model_info["speed_rating"] * 0.3
|
|
||||||
cost_score = (10 - (model_info["cost_per_1k_tokens"] * 1000)) * 0.3 # Niedrigere Kosten = höherer Score
|
|
||||||
return quality_score + speed_score + cost_score
|
|
||||||
|
|
||||||
return max(available_models.keys(), key=balanced_score)
|
|
||||||
|
|
||||||
def _estimate_cost(self, model_info: Dict, content_size: int) -> float:
|
|
||||||
"""Schätzt die Kosten für einen AI Call"""
|
|
||||||
# Grobe Schätzung: 1 Token ≈ 4 Zeichen
|
|
||||||
estimated_tokens = content_size / 4
|
|
||||||
input_cost = (estimated_tokens / 1000) * model_info["cost_per_1k_tokens"]
|
|
||||||
output_cost = (estimated_tokens / 1000) * model_info["cost_per_1k_tokens_output"] * 0.1 # 10% für Output
|
|
||||||
return input_cost + output_cost
|
|
||||||
|
|
||||||
async def _process_documents_for_ai(
|
|
||||||
self,
|
|
||||||
documents: List[ChatDocument],
|
|
||||||
operation_type: str,
|
|
||||||
compress_documents: bool,
|
|
||||||
process_individually: bool
|
|
||||||
) -> str:
|
|
||||||
"""Verarbeitet Dokumente für AI Call mit documentExtraction.py"""
|
|
||||||
|
|
||||||
if not documents:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
processed_contents = []
|
|
||||||
|
|
||||||
for doc in documents:
|
|
||||||
try:
|
|
||||||
# Extrahiere Content mit documentExtraction.py
|
|
||||||
extracted = await self.document_extractor.processFileData(
|
|
||||||
doc.fileData,
|
|
||||||
doc.fileName,
|
|
||||||
doc.mimeType,
|
|
||||||
prompt=f"Extract relevant content for {operation_type}",
|
|
||||||
documentId=doc.id,
|
|
||||||
enableAI=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Kombiniere alle Content-Items
|
|
||||||
doc_content = []
|
|
||||||
for content_item in extracted.contents:
|
|
||||||
if content_item.data and content_item.data.strip():
|
|
||||||
doc_content.append(content_item.data)
|
|
||||||
|
|
||||||
if doc_content:
|
|
||||||
combined_doc_content = "\n\n".join(doc_content)
|
|
||||||
|
|
||||||
# Komprimiere falls gewünscht
|
|
||||||
if compress_documents and len(combined_doc_content.encode('utf-8')) > 10000: # 10KB Limit
|
|
||||||
combined_doc_content = await self._compress_content(
|
|
||||||
combined_doc_content,
|
|
||||||
10000,
|
|
||||||
"document"
|
|
||||||
)
|
|
||||||
|
|
||||||
processed_contents.append(f"Document: {doc.fileName}\n{combined_doc_content}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error processing document {doc.fileName}: {str(e)}")
|
|
||||||
processed_contents.append(f"Document: {doc.fileName}\n[Error processing document: {str(e)}]")
|
|
||||||
|
|
||||||
return "\n\n---\n\n".join(processed_contents)
|
|
||||||
|
|
||||||
async def _optimize_content_for_model(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
document_content: str,
|
|
||||||
model_name: str,
|
|
||||||
compress_prompt: bool,
|
|
||||||
compress_documents: bool
|
|
||||||
) -> tuple[str, str]:
|
|
||||||
"""Optimiert Content für das gewählte Modell"""
|
|
||||||
|
|
||||||
model_info = AI_MODELS[model_name]
|
|
||||||
max_content_size = model_info["context_length"] * 0.7 # 70% für Content
|
|
||||||
|
|
||||||
optimized_prompt = prompt
|
|
||||||
optimized_content = document_content
|
|
||||||
|
|
||||||
# Prompt komprimieren falls gewünscht
|
|
||||||
if compress_prompt and len(prompt.encode('utf-8')) > 2000: # 2KB Limit für Prompt
|
|
||||||
optimized_prompt = await self._compress_content(prompt, 2000, "prompt")
|
|
||||||
|
|
||||||
# Dokument-Content komprimieren falls gewünscht
|
|
||||||
if compress_documents and document_content:
|
|
||||||
content_size = len(document_content.encode('utf-8'))
|
|
||||||
if content_size > max_content_size:
|
|
||||||
optimized_content = await self._compress_content(
|
|
||||||
document_content,
|
|
||||||
int(max_content_size),
|
|
||||||
"document"
|
|
||||||
)
|
|
||||||
|
|
||||||
return optimized_prompt, optimized_content
|
|
||||||
|
|
||||||
async def _compress_content(self, content: str, target_size: int, content_type: str) -> str:
|
|
||||||
"""Komprimiert Content intelligent basierend auf Typ"""
|
|
||||||
|
|
||||||
if len(content.encode('utf-8')) <= target_size:
|
|
||||||
return content
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Verwende AI für intelligente Kompression
|
|
||||||
compression_prompt = f"""
|
|
||||||
Komprimiere den folgenden {content_type} auf maximal {target_size} Zeichen,
|
|
||||||
behalte aber alle wichtigen Informationen bei:
|
|
||||||
|
|
||||||
{content}
|
|
||||||
|
|
||||||
Gib nur den komprimierten Inhalt zurück, ohne zusätzliche Erklärungen.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Verwende das schnellste verfügbare Modell für Kompression
|
|
||||||
compression_model = "openai_gpt35"
|
|
||||||
model_info = AI_MODELS[compression_model]
|
|
||||||
connector = getattr(self, f"{model_info['connector']}Service")
|
|
||||||
|
|
||||||
messages = [{"role": "user", "content": compression_prompt}]
|
|
||||||
|
|
||||||
if model_info["connector"] == "openai":
|
|
||||||
compressed = await connector.callAiBasic(messages)
|
|
||||||
else:
|
|
||||||
response = await connector.callAiBasic(messages)
|
|
||||||
compressed = response["choices"][0]["message"]["content"]
|
|
||||||
|
|
||||||
return compressed
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"AI compression failed, using truncation: {str(e)}")
|
|
||||||
# Fallback: Einfache Truncation
|
|
||||||
return content[:target_size] + "... [truncated]"
|
|
||||||
|
|
||||||
async def _execute_ai_call_with_failover(
|
|
||||||
self,
|
|
||||||
model_name: str,
|
|
||||||
prompt: str,
|
|
||||||
document_content: str
|
|
||||||
) -> str:
|
|
||||||
"""Führt AI Call mit automatischem Failover aus"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
model_info = AI_MODELS[model_name]
|
|
||||||
connector = getattr(self, f"{model_info['connector']}Service")
|
|
||||||
|
|
||||||
# Messages vorbereiten
|
|
||||||
messages = []
|
|
||||||
if document_content:
|
|
||||||
messages.append({
|
|
||||||
"role": "system",
|
|
||||||
"content": f"Context from documents:\n{document_content}"
|
|
||||||
})
|
|
||||||
|
|
||||||
messages.append({
|
|
||||||
"role": "user",
|
|
||||||
"content": prompt
|
|
||||||
})
|
|
||||||
|
|
||||||
# AI Call ausführen
|
|
||||||
if model_info["connector"] == "openai":
|
|
||||||
return await connector.callAiBasic(messages)
|
|
||||||
else: # anthropic
|
|
||||||
response = await connector.callAiBasic(messages)
|
|
||||||
return response["choices"][0]["message"]["content"]
|
|
||||||
|
|
||||||
except ContextLengthExceededException:
|
|
||||||
logger.warning(f"Context length exceeded for {model_name}, trying fallback")
|
|
||||||
# Fallback zu Modell mit größerem Context
|
|
||||||
fallback_model = self._find_fallback_model(model_name)
|
|
||||||
if fallback_model:
|
|
||||||
return await self._execute_ai_call_with_failover(fallback_model, prompt, document_content)
|
|
||||||
else:
|
|
||||||
# Letzter Ausweg: Content weiter komprimieren
|
|
||||||
compressed_prompt = await self._compress_content(prompt, 1000, "prompt")
|
|
||||||
compressed_content = await self._compress_content(document_content, 5000, "document")
|
|
||||||
return await self._execute_ai_call_with_failover("openai_gpt35", compressed_prompt, compressed_content)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"AI call failed with {model_name}: {e}")
|
|
||||||
# Allgemeiner Fallback
|
|
||||||
return await self._execute_ai_call_with_failover("openai_gpt35", prompt, document_content)
|
|
||||||
|
|
||||||
def _find_fallback_model(self, current_model: str) -> Optional[str]:
|
|
||||||
"""Findet ein Fallback-Modell mit größerem Context"""
|
|
||||||
current_context = AI_MODELS[current_model]["context_length"]
|
|
||||||
|
|
||||||
# Suche Modell mit größerem Context
|
|
||||||
for model_name, model_info in AI_MODELS.items():
|
|
||||||
if model_info["context_length"] > current_context:
|
|
||||||
return model_name
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Legacy methods
|
|
||||||
|
|
||||||
async def callAiTextBasic(self, prompt: str, context: Optional[str] = None) -> str:
|
|
||||||
"""
|
|
||||||
Basic text processing - now uses centralized AI call method.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The user prompt to process
|
|
||||||
context: Optional system context/prompt
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The AI response as text
|
|
||||||
"""
|
|
||||||
# Combine context with prompt if provided
|
|
||||||
full_prompt = prompt
|
|
||||||
if context:
|
|
||||||
full_prompt = f"Context: {context}\n\nUser Request: {prompt}"
|
|
||||||
|
|
||||||
# Use centralized AI call with speed priority for basic calls
|
|
||||||
return await self.callAi(
|
|
||||||
prompt=full_prompt,
|
|
||||||
priority="speed",
|
|
||||||
compress_prompt=True,
|
|
||||||
compress_documents=False
|
|
||||||
)
|
|
||||||
|
|
||||||
async def callAiTextAdvanced(self, prompt: str, context: Optional[str] = None, _is_fallback: bool = False) -> str:
|
|
||||||
"""
|
|
||||||
Advanced text processing - now uses centralized AI call method.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The user prompt to process
|
|
||||||
context: Optional system context/prompt
|
|
||||||
_is_fallback: Internal flag (kept for compatibility)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The AI response as text
|
|
||||||
"""
|
|
||||||
# Combine context with prompt if provided
|
|
||||||
full_prompt = prompt
|
|
||||||
if context:
|
|
||||||
full_prompt = f"Context: {context}\n\nUser Request: {prompt}"
|
|
||||||
|
|
||||||
# Use centralized AI call with quality priority for advanced calls
|
|
||||||
return await self.callAi(
|
|
||||||
prompt=full_prompt,
|
|
||||||
priority="quality",
|
|
||||||
compress_prompt=False,
|
|
||||||
compress_documents=False
|
|
||||||
)
|
|
||||||
|
|
||||||
async def callAiImageBasic(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
|
|
||||||
"""
|
|
||||||
Basic image processing - now uses centralized AI call method.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The prompt for image analysis
|
|
||||||
imageData: The image data (file path or bytes)
|
|
||||||
mimeType: Optional MIME type of the image
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The AI response as text
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# For image processing, use the original connector directly
|
|
||||||
# as the centralized method doesn't handle images yet
|
|
||||||
return await self.openaiService.callAiImage(prompt, imageData, mimeType)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in OpenAI image call: {str(e)}")
|
|
||||||
return f"Error: {str(e)}"
|
|
||||||
|
|
||||||
async def callAiImageAdvanced(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
|
|
||||||
"""
|
|
||||||
Advanced image processing - now uses centralized AI call method.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
prompt: The prompt for image analysis
|
|
||||||
imageData: The image data (file path or bytes)
|
|
||||||
mimeType: Optional MIME type of the image
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The AI response as text
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# For image processing, use the original connector directly
|
|
||||||
# as the centralized method doesn't handle images yet
|
|
||||||
return await self.anthropicService.callAiImage(prompt, imageData, mimeType)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in Anthropic image call: {str(e)}")
|
|
||||||
return f"Error: {str(e)}"
|
|
||||||
|
|
||||||
# Convenience methods for common use cases
|
|
||||||
|
|
||||||
async def callAiForDocumentAnalysis(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
documents: List[ChatDocument],
|
|
||||||
priority: str = "balanced"
|
|
||||||
) -> str:
|
|
||||||
"""Convenience method for document analysis"""
|
|
||||||
return await self.callAi(
|
|
||||||
prompt=prompt,
|
|
||||||
documents=documents,
|
|
||||||
operation_type="document_analysis",
|
|
||||||
priority=priority,
|
|
||||||
compress_documents=True,
|
|
||||||
process_documents_individually=False
|
|
||||||
)
|
|
||||||
|
|
||||||
async def callAiForReportGeneration(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
documents: List[ChatDocument],
|
|
||||||
priority: str = "quality"
|
|
||||||
) -> str:
|
|
||||||
"""Convenience method for report generation"""
|
|
||||||
return await self.callAi(
|
|
||||||
prompt=prompt,
|
|
||||||
documents=documents,
|
|
||||||
operation_type="report_generation",
|
|
||||||
priority=priority,
|
|
||||||
compress_documents=True,
|
|
||||||
process_documents_individually=True
|
|
||||||
)
|
|
||||||
|
|
||||||
async def callAiForEmailComposition(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
documents: List[ChatDocument] = None,
|
|
||||||
priority: str = "speed"
|
|
||||||
) -> str:
|
|
||||||
"""Convenience method for email composition"""
|
|
||||||
return await self.callAi(
|
|
||||||
prompt=prompt,
|
|
||||||
documents=documents,
|
|
||||||
operation_type="email_composition",
|
|
||||||
priority=priority,
|
|
||||||
compress_prompt=True,
|
|
||||||
compress_documents=True
|
|
||||||
)
|
|
||||||
|
|
||||||
async def callAiForTaskPlanning(
|
|
||||||
self,
|
|
||||||
prompt: str,
|
|
||||||
documents: List[ChatDocument] = None,
|
|
||||||
priority: str = "balanced"
|
|
||||||
) -> str:
|
|
||||||
"""Convenience method for task planning"""
|
|
||||||
return await self.callAi(
|
|
||||||
prompt=prompt,
|
|
||||||
documents=documents,
|
|
||||||
operation_type="task_planning",
|
|
||||||
priority=priority,
|
|
||||||
compress_prompt=False,
|
|
||||||
compress_documents=True
|
|
||||||
)
|
|
||||||
|
|
||||||
30
modules/interfaces/interfaceAiModel.py
Normal file
30
modules/interfaces/interfaceAiModel.py
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
from typing import Optional
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
class AiCallOptions(BaseModel):
|
||||||
|
"""Options for centralized AI processing (no document extraction here)."""
|
||||||
|
|
||||||
|
operationType: str = Field(default="general", description="Type of operation")
|
||||||
|
priority: str = Field(default="balanced", description="speed|quality|cost|balanced")
|
||||||
|
compressPrompt: bool = Field(default=True, description="Whether to compress the prompt")
|
||||||
|
compressContext: bool = Field(default=True, description="Whether to compress optional context")
|
||||||
|
maxCost: Optional[float] = Field(default=None, description="Max cost budget")
|
||||||
|
maxProcessingTime: Optional[int] = Field(default=None, description="Max processing time in seconds")
|
||||||
|
|
||||||
|
|
||||||
|
class AiCallRequest(BaseModel):
|
||||||
|
"""Centralized AI call request payload for interface use."""
|
||||||
|
|
||||||
|
prompt: str = Field(description="The user prompt")
|
||||||
|
context: Optional[str] = Field(default=None, description="Optional external context (e.g., extracted docs)")
|
||||||
|
options: AiCallOptions = Field(default_factory=AiCallOptions)
|
||||||
|
|
||||||
|
|
||||||
|
class AiCallResponse(BaseModel):
|
||||||
|
"""Standardized AI call response."""
|
||||||
|
|
||||||
|
content: str = Field(description="AI response content")
|
||||||
|
modelName: str = Field(description="Selected model name")
|
||||||
|
usedTokens: Optional[int] = Field(default=None, description="Estimated used tokens")
|
||||||
|
costEstimate: Optional[float] = Field(default=None, description="Estimated cost of the call")
|
||||||
117
modules/interfaces/interfaceAiObjects.py
Normal file
117
modules/interfaces/interfaceAiObjects.py
Normal file
|
|
@ -0,0 +1,117 @@
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
|
||||||
|
from modules.connectors.connectorAiOpenai import AiOpenai
|
||||||
|
from modules.connectors.connectorAiAnthropic import AiAnthropic
|
||||||
|
from modules.interfaces.interfaceAiModel import AiCallOptions, AiCallRequest, AiCallResponse
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# Local model registry (connectors specification) belongs in interface layer, not service
|
||||||
|
aiModels: Dict[str, Dict[str, Any]] = {
|
||||||
|
"openai_gpt4o": {
|
||||||
|
"connector": "openai",
|
||||||
|
"contextLength": 128000,
|
||||||
|
"costPer1kTokens": 0.03,
|
||||||
|
"costPer1kTokensOutput": 0.06,
|
||||||
|
"speedRating": 8,
|
||||||
|
"qualityRating": 9,
|
||||||
|
},
|
||||||
|
"openai_gpt35": {
|
||||||
|
"connector": "openai",
|
||||||
|
"contextLength": 16000,
|
||||||
|
"costPer1kTokens": 0.0015,
|
||||||
|
"costPer1kTokensOutput": 0.002,
|
||||||
|
"speedRating": 9,
|
||||||
|
"qualityRating": 7,
|
||||||
|
},
|
||||||
|
"anthropic_claude": {
|
||||||
|
"connector": "anthropic",
|
||||||
|
"contextLength": 200000,
|
||||||
|
"costPer1kTokens": 0.015,
|
||||||
|
"costPer1kTokensOutput": 0.075,
|
||||||
|
"speedRating": 7,
|
||||||
|
"qualityRating": 10,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class AiObjects:
|
||||||
|
"""Centralized AI interface: selects model and calls connector. No document handling."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.openaiService = AiOpenai()
|
||||||
|
self.anthropicService = AiAnthropic()
|
||||||
|
|
||||||
|
def _estimateCost(self, modelInfo: Dict[str, Any], contentSize: int) -> float:
|
||||||
|
estimatedTokens = contentSize / 4
|
||||||
|
inputCost = (estimatedTokens / 1000) * modelInfo["costPer1kTokens"]
|
||||||
|
outputCost = (estimatedTokens / 1000) * modelInfo["costPer1kTokensOutput"] * 0.1
|
||||||
|
return inputCost + outputCost
|
||||||
|
|
||||||
|
def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str:
|
||||||
|
totalSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8"))
|
||||||
|
candidates: Dict[str, Dict[str, Any]] = {}
|
||||||
|
for name, info in aiModels.items():
|
||||||
|
if totalSize > info["contextLength"] * 0.8:
|
||||||
|
continue
|
||||||
|
if options.maxCost is not None:
|
||||||
|
if self._estimateCost(info, totalSize) > options.maxCost:
|
||||||
|
continue
|
||||||
|
candidates[name] = info
|
||||||
|
if not candidates:
|
||||||
|
return "openai_gpt35"
|
||||||
|
if options.priority == "speed":
|
||||||
|
return max(candidates, key=lambda k: candidates[k]["speedRating"])
|
||||||
|
if options.priority == "quality":
|
||||||
|
return max(candidates, key=lambda k: candidates[k]["qualityRating"])
|
||||||
|
if options.priority == "cost":
|
||||||
|
return min(candidates, key=lambda k: candidates[k]["costPer1kTokens"])
|
||||||
|
def balancedScore(name: str) -> float:
|
||||||
|
info = candidates[name]
|
||||||
|
return info["qualityRating"] * 0.4 + info["speedRating"] * 0.3 + (10 - info["costPer1kTokens"] * 1000) * 0.3
|
||||||
|
return max(candidates, key=balancedScore)
|
||||||
|
|
||||||
|
def _connectorFor(self, modelName: str):
|
||||||
|
return self.openaiService if aiModels[modelName]["connector"] == "openai" else self.anthropicService
|
||||||
|
|
||||||
|
async def call(self, request: AiCallRequest) -> AiCallResponse:
|
||||||
|
prompt = request.prompt
|
||||||
|
context = request.context or ""
|
||||||
|
options = request.options
|
||||||
|
|
||||||
|
# Compress optionally (prompt/context) - simple truncation fallback kept here
|
||||||
|
def maybeTruncate(text: str, limit: int) -> str:
|
||||||
|
data = text.encode("utf-8")
|
||||||
|
if len(data) <= limit:
|
||||||
|
return text
|
||||||
|
return data[:limit].decode("utf-8", errors="ignore") + "... [truncated]"
|
||||||
|
|
||||||
|
if options.compressPrompt and len(prompt.encode("utf-8")) > 2000:
|
||||||
|
prompt = maybeTruncate(prompt, 2000)
|
||||||
|
if options.compressContext and len(context.encode("utf-8")) > 70000:
|
||||||
|
context = maybeTruncate(context, 70000)
|
||||||
|
|
||||||
|
modelName = self._selectModel(prompt, context, options)
|
||||||
|
|
||||||
|
messages: List[Dict[str, Any]] = []
|
||||||
|
if context:
|
||||||
|
messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
|
||||||
|
messages.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
|
connector = self._connectorFor(modelName)
|
||||||
|
if aiModels[modelName]["connector"] == "openai":
|
||||||
|
content = await connector.callAiBasic(messages)
|
||||||
|
else:
|
||||||
|
response = await connector.callAiBasic(messages)
|
||||||
|
content = response["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
# Estimate cost/tokens
|
||||||
|
totalSize = len((prompt + context).encode("utf-8"))
|
||||||
|
cost = self._estimateCost(aiModels[modelName], totalSize)
|
||||||
|
usedTokens = int(totalSize / 4)
|
||||||
|
|
||||||
|
return AiCallResponse(content=content, modelName=modelName, usedTokens=usedTokens, costEstimate=cost)
|
||||||
|
|
||||||
|
|
@ -201,7 +201,6 @@ class AppObjects:
|
||||||
"""
|
"""
|
||||||
return self.access.canModify(model_class, recordId)
|
return self.access.canModify(model_class, recordId)
|
||||||
|
|
||||||
|
|
||||||
def getInitialId(self, model_class: type) -> Optional[str]:
|
def getInitialId(self, model_class: type) -> Optional[str]:
|
||||||
"""Returns the initial ID for a table."""
|
"""Returns the initial ID for a table."""
|
||||||
return self.db.getInitialId(model_class)
|
return self.db.getInitialId(model_class)
|
||||||
|
|
@ -268,105 +267,6 @@ class AppObjects:
|
||||||
logger.error(f"Error getting user by ID: {str(e)}")
|
logger.error(f"Error getting user by ID: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def getUserConnections(self, userId: str) -> List[UserConnection]:
|
|
||||||
"""Returns all connections for a user."""
|
|
||||||
try:
|
|
||||||
# Get connections for this user
|
|
||||||
connections = self.db.getRecordset(UserConnection, recordFilter={"userId": userId})
|
|
||||||
|
|
||||||
# Convert to UserConnection objects
|
|
||||||
result = []
|
|
||||||
for conn_dict in connections:
|
|
||||||
try:
|
|
||||||
# Create UserConnection object
|
|
||||||
connection = UserConnection(
|
|
||||||
id=conn_dict["id"],
|
|
||||||
userId=conn_dict["userId"],
|
|
||||||
authority=conn_dict.get("authority"),
|
|
||||||
externalId=conn_dict.get("externalId", ""),
|
|
||||||
externalUsername=conn_dict.get("externalUsername", ""),
|
|
||||||
externalEmail=conn_dict.get("externalEmail"),
|
|
||||||
status=conn_dict.get("status", "pending"),
|
|
||||||
connectedAt=conn_dict.get("connectedAt"),
|
|
||||||
lastChecked=conn_dict.get("lastChecked"),
|
|
||||||
expiresAt=conn_dict.get("expiresAt")
|
|
||||||
)
|
|
||||||
result.append(connection)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error converting connection dict to object: {str(e)}")
|
|
||||||
continue
|
|
||||||
return result
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error getting user connections: {str(e)}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def addUserConnection(self, userId: str, authority: AuthAuthority, externalId: str,
|
|
||||||
externalUsername: str, externalEmail: Optional[str] = None,
|
|
||||||
status: ConnectionStatus = ConnectionStatus.PENDING) -> UserConnection:
|
|
||||||
"""
|
|
||||||
Adds a new connection for a user.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
userId: The ID of the user
|
|
||||||
authority: The authentication authority (e.g., MSFT, GOOGLE)
|
|
||||||
externalId: The external ID from the authority
|
|
||||||
externalUsername: The username from the authority
|
|
||||||
externalEmail: Optional email from the authority
|
|
||||||
status: The connection status (defaults to PENDING)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The created UserConnection object
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Get the user
|
|
||||||
user = self.getUser(userId)
|
|
||||||
if not user:
|
|
||||||
raise ValueError(f"User not found: {userId}")
|
|
||||||
|
|
||||||
# Create new connection with all required fields
|
|
||||||
connection = UserConnection(
|
|
||||||
id=str(uuid.uuid4()),
|
|
||||||
userId=userId,
|
|
||||||
authority=authority,
|
|
||||||
externalId=externalId,
|
|
||||||
externalUsername=externalUsername,
|
|
||||||
externalEmail=externalEmail,
|
|
||||||
status=status,
|
|
||||||
connectedAt=get_utc_timestamp(),
|
|
||||||
lastChecked=get_utc_timestamp(),
|
|
||||||
expiresAt=None # Optional field, set to None by default
|
|
||||||
)
|
|
||||||
|
|
||||||
# Save to connections table
|
|
||||||
self.db.recordCreate(UserConnection, connection)
|
|
||||||
|
|
||||||
|
|
||||||
return connection
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error adding user connection: {str(e)}")
|
|
||||||
raise ValueError(f"Failed to add user connection: {str(e)}")
|
|
||||||
|
|
||||||
def removeUserConnection(self, connectionId: str) -> None:
|
|
||||||
"""Remove a connection to an external service"""
|
|
||||||
try:
|
|
||||||
# Get connection
|
|
||||||
connections = self.db.getRecordset(UserConnection, recordFilter={
|
|
||||||
"id": connectionId
|
|
||||||
})
|
|
||||||
|
|
||||||
if not connections:
|
|
||||||
raise ValueError(f"Connection {connectionId} not found")
|
|
||||||
|
|
||||||
# Delete connection
|
|
||||||
self.db.recordDelete(UserConnection, connectionId)
|
|
||||||
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error removing user connection: {str(e)}")
|
|
||||||
raise ValueError(f"Failed to remove user connection: {str(e)}")
|
|
||||||
|
|
||||||
def authenticateLocalUser(self, username: str, password: str) -> Optional[User]:
|
def authenticateLocalUser(self, username: str, password: str) -> Optional[User]:
|
||||||
"""Authenticates a user by username and password using local authentication."""
|
"""Authenticates a user by username and password using local authentication."""
|
||||||
# Clear the users table from cache and reload it
|
# Clear the users table from cache and reload it
|
||||||
|
|
@ -551,6 +451,154 @@ class AppObjects:
|
||||||
logger.error(f"Error deleting user: {str(e)}")
|
logger.error(f"Error deleting user: {str(e)}")
|
||||||
raise ValueError(f"Failed to delete user: {str(e)}")
|
raise ValueError(f"Failed to delete user: {str(e)}")
|
||||||
|
|
||||||
|
def _getInitialUser(self) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Get the initial user record directly from database without access control."""
|
||||||
|
try:
|
||||||
|
initialUserId = self.getInitialId(UserInDB)
|
||||||
|
if not initialUserId:
|
||||||
|
return None
|
||||||
|
|
||||||
|
users = self.db.getRecordset(UserInDB, recordFilter={"id": initialUserId})
|
||||||
|
return users[0] if users else None
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting initial user: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def checkUsernameAvailability(self, checkData: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""Checks if a username is available for registration."""
|
||||||
|
try:
|
||||||
|
username = checkData.get("username")
|
||||||
|
authenticationAuthority = checkData.get("authenticationAuthority", "local")
|
||||||
|
|
||||||
|
if not username:
|
||||||
|
return {
|
||||||
|
"available": False,
|
||||||
|
"message": "Username is required"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get user by username
|
||||||
|
user = self.getUserByUsername(username)
|
||||||
|
|
||||||
|
# Check if user exists (User model instance)
|
||||||
|
if user is not None:
|
||||||
|
return {
|
||||||
|
"available": False,
|
||||||
|
"message": "Username is already taken"
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"available": True,
|
||||||
|
"message": "Username is available"
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error checking username availability: {str(e)}")
|
||||||
|
return {
|
||||||
|
"available": False,
|
||||||
|
"message": f"Error checking username availability: {str(e)}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Connection methods
|
||||||
|
|
||||||
|
def getUserConnections(self, userId: str) -> List[UserConnection]:
|
||||||
|
"""Returns all connections for a user."""
|
||||||
|
try:
|
||||||
|
# Get connections for this user
|
||||||
|
connections = self.db.getRecordset(UserConnection, recordFilter={"userId": userId})
|
||||||
|
|
||||||
|
# Convert to UserConnection objects
|
||||||
|
result = []
|
||||||
|
for conn_dict in connections:
|
||||||
|
try:
|
||||||
|
# Create UserConnection object
|
||||||
|
connection = UserConnection(
|
||||||
|
id=conn_dict["id"],
|
||||||
|
userId=conn_dict["userId"],
|
||||||
|
authority=conn_dict.get("authority"),
|
||||||
|
externalId=conn_dict.get("externalId", ""),
|
||||||
|
externalUsername=conn_dict.get("externalUsername", ""),
|
||||||
|
externalEmail=conn_dict.get("externalEmail"),
|
||||||
|
status=conn_dict.get("status", "pending"),
|
||||||
|
connectedAt=conn_dict.get("connectedAt"),
|
||||||
|
lastChecked=conn_dict.get("lastChecked"),
|
||||||
|
expiresAt=conn_dict.get("expiresAt")
|
||||||
|
)
|
||||||
|
result.append(connection)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error converting connection dict to object: {str(e)}")
|
||||||
|
continue
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting user connections: {str(e)}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def addUserConnection(self, userId: str, authority: AuthAuthority, externalId: str,
|
||||||
|
externalUsername: str, externalEmail: Optional[str] = None,
|
||||||
|
status: ConnectionStatus = ConnectionStatus.PENDING) -> UserConnection:
|
||||||
|
"""
|
||||||
|
Adds a new connection for a user.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
userId: The ID of the user
|
||||||
|
authority: The authentication authority (e.g., MSFT, GOOGLE)
|
||||||
|
externalId: The external ID from the authority
|
||||||
|
externalUsername: The username from the authority
|
||||||
|
externalEmail: Optional email from the authority
|
||||||
|
status: The connection status (defaults to PENDING)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The created UserConnection object
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get the user
|
||||||
|
user = self.getUser(userId)
|
||||||
|
if not user:
|
||||||
|
raise ValueError(f"User not found: {userId}")
|
||||||
|
|
||||||
|
# Create new connection with all required fields
|
||||||
|
connection = UserConnection(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
userId=userId,
|
||||||
|
authority=authority,
|
||||||
|
externalId=externalId,
|
||||||
|
externalUsername=externalUsername,
|
||||||
|
externalEmail=externalEmail,
|
||||||
|
status=status,
|
||||||
|
connectedAt=get_utc_timestamp(),
|
||||||
|
lastChecked=get_utc_timestamp(),
|
||||||
|
expiresAt=None # Optional field, set to None by default
|
||||||
|
)
|
||||||
|
|
||||||
|
# Save to connections table
|
||||||
|
self.db.recordCreate(UserConnection, connection)
|
||||||
|
|
||||||
|
|
||||||
|
return connection
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error adding user connection: {str(e)}")
|
||||||
|
raise ValueError(f"Failed to add user connection: {str(e)}")
|
||||||
|
|
||||||
|
def removeUserConnection(self, connectionId: str) -> None:
|
||||||
|
"""Remove a connection to an external service"""
|
||||||
|
try:
|
||||||
|
# Get connection
|
||||||
|
connections = self.db.getRecordset(UserConnection, recordFilter={
|
||||||
|
"id": connectionId
|
||||||
|
})
|
||||||
|
|
||||||
|
if not connections:
|
||||||
|
raise ValueError(f"Connection {connectionId} not found")
|
||||||
|
|
||||||
|
# Delete connection
|
||||||
|
self.db.recordDelete(UserConnection, connectionId)
|
||||||
|
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error removing user connection: {str(e)}")
|
||||||
|
raise ValueError(f"Failed to remove user connection: {str(e)}")
|
||||||
|
|
||||||
# Mandate methods
|
# Mandate methods
|
||||||
|
|
||||||
def getAllMandates(self) -> List[Mandate]:
|
def getAllMandates(self) -> List[Mandate]:
|
||||||
|
|
@ -650,52 +698,7 @@ class AppObjects:
|
||||||
logger.error(f"Error deleting mandate: {str(e)}")
|
logger.error(f"Error deleting mandate: {str(e)}")
|
||||||
raise ValueError(f"Failed to delete mandate: {str(e)}")
|
raise ValueError(f"Failed to delete mandate: {str(e)}")
|
||||||
|
|
||||||
def _getInitialUser(self) -> Optional[Dict[str, Any]]:
|
# Token methods
|
||||||
"""Get the initial user record directly from database without access control."""
|
|
||||||
try:
|
|
||||||
initialUserId = self.getInitialId(UserInDB)
|
|
||||||
if not initialUserId:
|
|
||||||
return None
|
|
||||||
|
|
||||||
users = self.db.getRecordset(UserInDB, recordFilter={"id": initialUserId})
|
|
||||||
return users[0] if users else None
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error getting initial user: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def checkUsernameAvailability(self, checkData: Dict[str, Any]) -> Dict[str, Any]:
|
|
||||||
"""Checks if a username is available for registration."""
|
|
||||||
try:
|
|
||||||
username = checkData.get("username")
|
|
||||||
authenticationAuthority = checkData.get("authenticationAuthority", "local")
|
|
||||||
|
|
||||||
if not username:
|
|
||||||
return {
|
|
||||||
"available": False,
|
|
||||||
"message": "Username is required"
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get user by username
|
|
||||||
user = self.getUserByUsername(username)
|
|
||||||
|
|
||||||
# Check if user exists (User model instance)
|
|
||||||
if user is not None:
|
|
||||||
return {
|
|
||||||
"available": False,
|
|
||||||
"message": "Username is already taken"
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
"available": True,
|
|
||||||
"message": "Username is available"
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error checking username availability: {str(e)}")
|
|
||||||
return {
|
|
||||||
"available": False,
|
|
||||||
"message": f"Error checking username availability: {str(e)}"
|
|
||||||
}
|
|
||||||
|
|
||||||
def saveAccessToken(self, token: Token, replace_existing: bool = True) -> None:
|
def saveAccessToken(self, token: Token, replace_existing: bool = True) -> None:
|
||||||
"""Save an access token for the current user (must NOT have connectionId)"""
|
"""Save an access token for the current user (must NOT have connectionId)"""
|
||||||
|
|
@ -803,56 +806,8 @@ class AppObjects:
|
||||||
logger.error(f"Error saving connection token: {str(e)}")
|
logger.error(f"Error saving connection token: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def getAccessToken(self, authority: str, auto_refresh: bool = True) -> Optional[Token]:
|
def getConnectionToken(self, connectionId: str) -> Optional[Token]:
|
||||||
"""Get the latest valid access token for the current user and authority, optionally auto-refresh if expired"""
|
"""Get the latest stored token for a specific connectionId (no refresh)."""
|
||||||
try:
|
|
||||||
# Validate that we're not looking for connection tokens
|
|
||||||
if not self.currentUser or not self.currentUser.id:
|
|
||||||
raise ValueError("No valid user context available for token retrieval")
|
|
||||||
|
|
||||||
# Get access tokens for this user and authority (must NOT have connectionId)
|
|
||||||
tokens = self.db.getRecordset(Token, recordFilter={
|
|
||||||
"userId": self.currentUser.id,
|
|
||||||
"authority": authority,
|
|
||||||
"connectionId": None # Ensure we only get access tokens
|
|
||||||
})
|
|
||||||
|
|
||||||
if not tokens:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Sort by creation date and get the latest
|
|
||||||
tokens.sort(key=lambda x: x.get("createdAt", ""), reverse=True)
|
|
||||||
latest_token = Token(**tokens[0])
|
|
||||||
|
|
||||||
# Check if token is expired
|
|
||||||
if latest_token.expiresAt and latest_token.expiresAt < get_utc_timestamp():
|
|
||||||
if auto_refresh:
|
|
||||||
# Import TokenManager here to avoid circular imports
|
|
||||||
from modules.security.tokenManager import TokenManager
|
|
||||||
token_manager = TokenManager()
|
|
||||||
|
|
||||||
# Try to refresh the token
|
|
||||||
refreshed_token = token_manager.refresh_token(latest_token)
|
|
||||||
if refreshed_token:
|
|
||||||
# Save the new token (which will automatically replace old ones)
|
|
||||||
self.saveAccessToken(refreshed_token)
|
|
||||||
|
|
||||||
return refreshed_token
|
|
||||||
else:
|
|
||||||
logger.warning(f"Failed to refresh expired access token for {authority}")
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
logger.warning(f"Access token for {authority} is expired (expiresAt: {latest_token.expiresAt})")
|
|
||||||
return None
|
|
||||||
|
|
||||||
return latest_token
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error getting access token: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def getConnectionToken(self, connectionId: str, auto_refresh: bool = True) -> Optional[Token]:
|
|
||||||
"""Get the connection token for a specific connectionId, optionally auto-refresh if expired"""
|
|
||||||
try:
|
try:
|
||||||
# Validate connectionId
|
# Validate connectionId
|
||||||
if not connectionId:
|
if not connectionId:
|
||||||
|
|
@ -873,31 +828,7 @@ class AppObjects:
|
||||||
tokens.sort(key=lambda x: x.get("expiresAt", 0), reverse=True)
|
tokens.sort(key=lambda x: x.get("expiresAt", 0), reverse=True)
|
||||||
latest_token = Token(**tokens[0])
|
latest_token = Token(**tokens[0])
|
||||||
|
|
||||||
# Check if token is expired or expires within 30 minutes
|
# No auto-refresh here. Callers should use a higher-level service to refresh when needed.
|
||||||
current_time = get_utc_timestamp()
|
|
||||||
thirty_minutes = 30 * 60 # 30 minutes in seconds
|
|
||||||
|
|
||||||
if latest_token.expiresAt and latest_token.expiresAt < (current_time + thirty_minutes):
|
|
||||||
if auto_refresh:
|
|
||||||
# Import TokenManager here to avoid circular imports
|
|
||||||
from modules.security.tokenManager import TokenManager
|
|
||||||
token_manager = TokenManager()
|
|
||||||
|
|
||||||
# Try to refresh the token
|
|
||||||
refreshed_token = token_manager.refresh_token(latest_token)
|
|
||||||
|
|
||||||
if refreshed_token:
|
|
||||||
# Save the new token (which will automatically replace old ones)
|
|
||||||
self.saveConnectionToken(refreshed_token)
|
|
||||||
|
|
||||||
logger.info(f"Proactively refreshed connection token for connectionId {connectionId} (expired in {latest_token.expiresAt - current_time} seconds)")
|
|
||||||
return refreshed_token
|
|
||||||
else:
|
|
||||||
logger.warning(f"Token refresh failed for connectionId {connectionId}")
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
logger.warning(f"Connection token for connectionId {connectionId} expires soon (expiresAt: {latest_token.expiresAt})")
|
|
||||||
return None
|
|
||||||
|
|
||||||
return latest_token
|
return latest_token
|
||||||
|
|
||||||
|
|
@ -905,53 +836,6 @@ class AppObjects:
|
||||||
logger.error(f"Error getting connection token for connectionId {connectionId}: {str(e)}")
|
logger.error(f"Error getting connection token for connectionId {connectionId}: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def deleteAccessToken(self, authority: str) -> None:
|
|
||||||
"""Delete all access tokens for the current user and authority"""
|
|
||||||
try:
|
|
||||||
# Validate user context
|
|
||||||
if not self.currentUser or not self.currentUser.id:
|
|
||||||
raise ValueError("No valid user context available for token deletion")
|
|
||||||
|
|
||||||
# Get access tokens to delete (must NOT have connectionId)
|
|
||||||
tokens = self.db.getRecordset(Token, recordFilter={
|
|
||||||
"userId": self.currentUser.id,
|
|
||||||
"authority": authority,
|
|
||||||
"connectionId": None # Ensure we only delete access tokens
|
|
||||||
})
|
|
||||||
|
|
||||||
# Delete each token
|
|
||||||
for token in tokens:
|
|
||||||
self.db.recordDelete(Token, token["id"])
|
|
||||||
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error deleting access token: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
def deleteConnectionTokenByConnectionId(self, connectionId: str) -> None:
|
|
||||||
"""Delete all connection tokens for a specific connectionId"""
|
|
||||||
try:
|
|
||||||
# Validate connectionId
|
|
||||||
if not connectionId:
|
|
||||||
raise ValueError("connectionId is required for deleteConnectionTokenByConnectionId")
|
|
||||||
|
|
||||||
# Get connection tokens to delete
|
|
||||||
tokens = self.db.getRecordset(Token, recordFilter={
|
|
||||||
"connectionId": connectionId
|
|
||||||
})
|
|
||||||
|
|
||||||
# Delete each token
|
|
||||||
for token in tokens:
|
|
||||||
self.db.recordDelete(Token, token["id"])
|
|
||||||
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error deleting connection token for connectionId {connectionId}: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
# =====================
|
|
||||||
# Token revocation (LOCAL gateway JWTs)
|
|
||||||
# =====================
|
|
||||||
def findActiveTokenById(self, tokenId: str, userId: str, authority: AuthAuthority, sessionId: str = None, mandateId: str = None) -> Optional[Token]:
|
def findActiveTokenById(self, tokenId: str, userId: str, authority: AuthAuthority, sessionId: str = None, mandateId: str = None) -> Optional[Token]:
|
||||||
"""Find an active access token by its id (jti) with optional session/tenant scoping."""
|
"""Find an active access token by its id (jti) with optional session/tenant scoping."""
|
||||||
try:
|
try:
|
||||||
|
|
@ -1088,7 +972,7 @@ class AppObjects:
|
||||||
logger.error(f"Error during logout: {str(e)}")
|
logger.error(f"Error during logout: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
# Data Neutralization methods
|
# Neutralization methods
|
||||||
|
|
||||||
def getNeutralizationConfig(self) -> Optional[DataNeutraliserConfig]:
|
def getNeutralizationConfig(self) -> Optional[DataNeutraliserConfig]:
|
||||||
"""Get the data neutralization configuration for the current user's mandate"""
|
"""Get the data neutralization configuration for the current user's mandate"""
|
||||||
|
|
@ -1138,98 +1022,6 @@ class AppObjects:
|
||||||
logger.error(f"Error creating/updating neutralization config: {str(e)}")
|
logger.error(f"Error creating/updating neutralization config: {str(e)}")
|
||||||
raise ValueError(f"Failed to create/update neutralization config: {str(e)}")
|
raise ValueError(f"Failed to create/update neutralization config: {str(e)}")
|
||||||
|
|
||||||
def neutralizeText(self, text: str, file_id: Optional[str] = None) -> Dict[str, Any]:
|
|
||||||
"""Neutralize text content and store attribute mappings"""
|
|
||||||
try:
|
|
||||||
from modules.services.serviceNeutralization.neutralizer import DataAnonymizer
|
|
||||||
|
|
||||||
# Get neutralization configuration to extract namesToParse
|
|
||||||
config = self.getNeutralizationConfig()
|
|
||||||
names_to_parse = []
|
|
||||||
if config and hasattr(config, 'namesToParse') and config.namesToParse:
|
|
||||||
# Split by newlines and filter out empty strings
|
|
||||||
names_to_parse = [name.strip() for name in config.namesToParse.split('\n') if name.strip()]
|
|
||||||
|
|
||||||
# Initialize anonymizer with custom names
|
|
||||||
anonymizer = DataAnonymizer(names_to_parse=names_to_parse)
|
|
||||||
|
|
||||||
# Process the text
|
|
||||||
result = anonymizer.process_content(text, 'text')
|
|
||||||
|
|
||||||
# Store attribute mappings in database
|
|
||||||
stored_attributes = []
|
|
||||||
for original_text, neutralized_text in result.mapping.items():
|
|
||||||
# Extract pattern type and UUID from the neutralized text format [type.uuid]
|
|
||||||
pattern_type = "unknown"
|
|
||||||
placeholder_uuid = None
|
|
||||||
|
|
||||||
if neutralized_text.startswith("[") and "." in neutralized_text and neutralized_text.endswith("]"):
|
|
||||||
# Extract type and UUID from [type.uuid] format
|
|
||||||
inner = neutralized_text[1:-1] # Remove [ and ]
|
|
||||||
if "." in inner:
|
|
||||||
pattern_type, placeholder_uuid = inner.split(".", 1)
|
|
||||||
|
|
||||||
# Check if this exact original text already has a placeholder in the database
|
|
||||||
existing_attribute = self.getExistingPlaceholder(original_text)
|
|
||||||
|
|
||||||
if existing_attribute:
|
|
||||||
# Reuse existing placeholder
|
|
||||||
existing_uuid = existing_attribute.id
|
|
||||||
existing_pattern_type = existing_attribute.patternType
|
|
||||||
|
|
||||||
# Update the neutralized text to use the existing UUID
|
|
||||||
result.data = result.data.replace(neutralized_text, f"[{existing_pattern_type}.{existing_uuid}]")
|
|
||||||
result.mapping[original_text] = f"[{existing_pattern_type}.{existing_uuid}]"
|
|
||||||
|
|
||||||
stored_attributes.append(existing_attribute)
|
|
||||||
else:
|
|
||||||
# Create new attribute record with the UUID that the neutralizer generated
|
|
||||||
attribute_data = {
|
|
||||||
"id": placeholder_uuid, # Use the UUID from the neutralizer
|
|
||||||
"mandateId": self.mandateId,
|
|
||||||
"userId": self.userId,
|
|
||||||
"originalText": original_text,
|
|
||||||
"fileId": file_id,
|
|
||||||
"patternType": pattern_type
|
|
||||||
}
|
|
||||||
|
|
||||||
attribute = DataNeutralizerAttributes.from_dict(attribute_data)
|
|
||||||
created_attribute = self.db.recordCreate(DataNeutralizerAttributes, attribute)
|
|
||||||
stored_attributes.append(created_attribute)
|
|
||||||
|
|
||||||
|
|
||||||
# The neutralized text is already in the correct [type.uuid] format
|
|
||||||
# No need to replace it, as it's already properly formatted
|
|
||||||
|
|
||||||
return {
|
|
||||||
"neutralized_text": result.data,
|
|
||||||
"attributes": stored_attributes,
|
|
||||||
"mapping": result.mapping,
|
|
||||||
"replaced_fields": result.replaced_fields,
|
|
||||||
"processed_info": result.processed_info
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error neutralizing text: {str(e)}")
|
|
||||||
raise ValueError(f"Failed to neutralize text: {str(e)}")
|
|
||||||
|
|
||||||
def getExistingPlaceholder(self, original_text: str) -> Optional[DataNeutralizerAttributes]:
|
|
||||||
"""Get existing placeholder for original text if it exists"""
|
|
||||||
try:
|
|
||||||
existing_attributes = self.db.getRecordset(DataNeutralizerAttributes, recordFilter={
|
|
||||||
"mandateId": self.mandateId,
|
|
||||||
"userId": self.userId,
|
|
||||||
"originalText": original_text
|
|
||||||
})
|
|
||||||
|
|
||||||
if existing_attributes:
|
|
||||||
return DataNeutralizerAttributes.from_dict(existing_attributes[0])
|
|
||||||
return None
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error getting existing placeholder: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def getNeutralizationAttributes(self, file_id: Optional[str] = None) -> List[DataNeutralizerAttributes]:
|
def getNeutralizationAttributes(self, file_id: Optional[str] = None) -> List[DataNeutralizerAttributes]:
|
||||||
"""Get neutralization attributes, optionally filtered by file ID"""
|
"""Get neutralization attributes, optionally filtered by file ID"""
|
||||||
try:
|
try:
|
||||||
|
|
@ -1246,35 +1038,6 @@ class AppObjects:
|
||||||
logger.error(f"Error getting neutralization attributes: {str(e)}")
|
logger.error(f"Error getting neutralization attributes: {str(e)}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def resolveNeutralizedText(self, text: str) -> str:
|
|
||||||
"""Resolve UIDs in neutralized text back to original text"""
|
|
||||||
try:
|
|
||||||
# Find all placeholders in the new format [type.uuid]
|
|
||||||
placeholder_pattern = r'\[([a-z]+)\.([a-f0-9-]{36})\]'
|
|
||||||
matches = re.findall(placeholder_pattern, text)
|
|
||||||
|
|
||||||
resolved_text = text
|
|
||||||
for placeholder_type, uid in matches:
|
|
||||||
# Find the attribute with this UID (which is the record ID)
|
|
||||||
attributes = self.db.getRecordset(DataNeutralizerAttributes, recordFilter={
|
|
||||||
"mandateId": self.mandateId,
|
|
||||||
"id": uid
|
|
||||||
})
|
|
||||||
|
|
||||||
if attributes:
|
|
||||||
attribute = attributes[0]
|
|
||||||
# Replace placeholder with original text
|
|
||||||
placeholder = f"[{placeholder_type}.{uid}]"
|
|
||||||
resolved_text = resolved_text.replace(placeholder, attribute["originalText"])
|
|
||||||
else:
|
|
||||||
logger.warning(f"No attribute found for UID {uid}")
|
|
||||||
|
|
||||||
return resolved_text
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error resolving neutralized text: {str(e)}")
|
|
||||||
return text
|
|
||||||
|
|
||||||
def deleteNeutralizationAttributes(self, file_id: str) -> bool:
|
def deleteNeutralizationAttributes(self, file_id: str) -> bool:
|
||||||
"""Delete all neutralization attributes for a specific file"""
|
"""Delete all neutralization attributes for a specific file"""
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import pandas as pd
|
||||||
import openpyxl
|
import openpyxl
|
||||||
from modules.shared.timezoneUtils import get_utc_now
|
from modules.shared.timezoneUtils import get_utc_now
|
||||||
|
|
||||||
from modules.connectors.connectorSharepoint import ConnectorSharepoint
|
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
|
||||||
|
|
||||||
from modules.interfaces.interfaceTicketModel import TicketBase, Task
|
from modules.interfaces.interfaceTicketModel import TicketBase, Task
|
||||||
|
|
||||||
|
|
@ -14,7 +14,7 @@ from modules.interfaces.interfaceTicketModel import TicketBase, Task
|
||||||
@dataclass(slots=True)
|
@dataclass(slots=True)
|
||||||
class TicketSharepointSyncInterface:
|
class TicketSharepointSyncInterface:
|
||||||
connector_ticket: TicketBase
|
connector_ticket: TicketBase
|
||||||
connector_sharepoint: ConnectorSharepoint
|
connector_sharepoint: SharepointService
|
||||||
task_sync_definition: dict
|
task_sync_definition: dict
|
||||||
sync_folder: str
|
sync_folder: str
|
||||||
sync_file: str
|
sync_file: str
|
||||||
|
|
@ -26,7 +26,7 @@ class TicketSharepointSyncInterface:
|
||||||
async def create(
|
async def create(
|
||||||
cls,
|
cls,
|
||||||
connector_ticket: TicketBase,
|
connector_ticket: TicketBase,
|
||||||
connector_sharepoint: ConnectorSharepoint,
|
connector_sharepoint: SharepointService,
|
||||||
task_sync_definition: dict,
|
task_sync_definition: dict,
|
||||||
sync_folder: str,
|
sync_folder: str,
|
||||||
sync_file: str,
|
sync_file: str,
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ from modules.security.auth import limiter, getCurrentUser
|
||||||
|
|
||||||
# Import interfaces
|
# Import interfaces
|
||||||
from modules.interfaces.interfaceAppModel import User, DataNeutraliserConfig, DataNeutralizerAttributes
|
from modules.interfaces.interfaceAppModel import User, DataNeutraliserConfig, DataNeutralizerAttributes
|
||||||
from modules.features.neutralizePlayground.mainNeutralizePlayground import NeutralizationService
|
from modules.features.neutralization.mainNeutralizationPlayground import NeutralizationService
|
||||||
|
|
||||||
# Configure logger
|
# Configure logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
|
||||||
|
|
@ -339,7 +339,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create JWT token data (like Microsoft does)
|
# Create JWT token data (like Microsoft does)
|
||||||
from modules.security.auth import createAccessToken
|
from modules.security.jwtService import createAccessToken
|
||||||
jwt_token_data = {
|
jwt_token_data = {
|
||||||
"sub": user.username,
|
"sub": user.username,
|
||||||
"mandateId": str(user.mandateId),
|
"mandateId": str(user.mandateId),
|
||||||
|
|
@ -637,8 +637,9 @@ async def verify_token(
|
||||||
detail="No Google connection found for current user"
|
detail="No Google connection found for current user"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get the current token
|
# Get a fresh token via TokenManager convenience method
|
||||||
current_token = appInterface.getConnectionToken(google_connection.id, auto_refresh=False)
|
from modules.security.tokenManager import TokenManager
|
||||||
|
current_token = TokenManager().getFreshToken(appInterface, google_connection.id)
|
||||||
|
|
||||||
if not current_token:
|
if not current_token:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
|
@ -646,20 +647,9 @@ async def verify_token(
|
||||||
detail="No Google token found for this connection"
|
detail="No Google token found for this connection"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Verify the token
|
# Verify the (fresh) token
|
||||||
token_verification = await verify_google_token(current_token.tokenAccess)
|
token_verification = await verify_google_token(current_token.tokenAccess)
|
||||||
|
|
||||||
if not token_verification.get("valid"):
|
|
||||||
# Try to refresh the token if verification failed
|
|
||||||
from modules.security.tokenManager import TokenManager
|
|
||||||
token_manager = TokenManager()
|
|
||||||
refreshed_token = token_manager.refresh_token(current_token)
|
|
||||||
|
|
||||||
if refreshed_token:
|
|
||||||
appInterface.saveConnectionToken(refreshed_token)
|
|
||||||
# Verify the refreshed token
|
|
||||||
token_verification = await verify_google_token(refreshed_token.tokenAccess)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"valid": token_verification.get("valid", False),
|
"valid": token_verification.get("valid", False),
|
||||||
"scopes": token_verification.get("scopes", []),
|
"scopes": token_verification.get("scopes", []),
|
||||||
|
|
@ -721,8 +711,9 @@ async def refresh_token(
|
||||||
|
|
||||||
logger.debug(f"Found Google connection: {google_connection.id}, status={google_connection.status}")
|
logger.debug(f"Found Google connection: {google_connection.id}, status={google_connection.status}")
|
||||||
|
|
||||||
# Get the token for this specific connection using the new method
|
# Get the token for this specific connection (fresh if expiring soon)
|
||||||
current_token = appInterface.getConnectionToken(google_connection.id, auto_refresh=False)
|
from modules.security.tokenManager import TokenManager
|
||||||
|
current_token = TokenManager().getFreshToken(appInterface, google_connection.id)
|
||||||
|
|
||||||
if not current_token:
|
if not current_token:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
|
@ -731,38 +722,25 @@ async def refresh_token(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# If we could not obtain a fresh token, report error
|
||||||
|
if not current_token:
|
||||||
|
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to refresh token")
|
||||||
|
|
||||||
# Always attempt refresh (as per your requirement)
|
# Update the connection status and timing
|
||||||
from modules.security.tokenManager import TokenManager
|
google_connection.expiresAt = float(current_token.expiresAt) if current_token.expiresAt else google_connection.expiresAt
|
||||||
token_manager = TokenManager()
|
|
||||||
|
|
||||||
refreshed_token = token_manager.refresh_token(current_token)
|
|
||||||
if refreshed_token:
|
|
||||||
# Save the new connection token (which will automatically replace old ones)
|
|
||||||
appInterface.saveConnectionToken(refreshed_token)
|
|
||||||
|
|
||||||
# Update the connection's expiration time
|
|
||||||
google_connection.expiresAt = float(refreshed_token.expiresAt)
|
|
||||||
google_connection.lastChecked = get_utc_timestamp()
|
google_connection.lastChecked = get_utc_timestamp()
|
||||||
google_connection.status = ConnectionStatus.ACTIVE
|
google_connection.status = ConnectionStatus.ACTIVE
|
||||||
|
|
||||||
# Save updated connection
|
|
||||||
appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.to_dict())
|
appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.to_dict())
|
||||||
|
|
||||||
# Calculate time until expiration
|
# Calculate time until expiration
|
||||||
current_time = get_utc_timestamp()
|
current_time = get_utc_timestamp()
|
||||||
expires_in = int(refreshed_token.expiresAt - current_time)
|
expires_in = int(current_token.expiresAt - current_time) if current_token.expiresAt else 0
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"message": "Token refreshed successfully",
|
"message": "Token refreshed successfully",
|
||||||
"expires_at": refreshed_token.expiresAt,
|
"expires_at": current_token.expiresAt,
|
||||||
"expires_in_seconds": expires_in
|
"expires_in_seconds": expires_in
|
||||||
}
|
}
|
||||||
else:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
||||||
detail="Failed to refresh token"
|
|
||||||
)
|
|
||||||
|
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,8 @@ from jose import jwt
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
# Import auth modules
|
# Import auth modules
|
||||||
from modules.security.auth import createAccessToken, createAccessTokenWithCookie, setRefreshTokenCookie, getCurrentUser, limiter, SECRET_KEY, ALGORITHM
|
from modules.security.auth import getCurrentUser, limiter, SECRET_KEY, ALGORITHM
|
||||||
|
from modules.security.jwtService import createAccessToken, createRefreshToken, setAccessTokenCookie, setRefreshTokenCookie
|
||||||
from modules.interfaces.interfaceAppObjects import getInterface, getRootInterface
|
from modules.interfaces.interfaceAppObjects import getInterface, getRootInterface
|
||||||
from modules.interfaces.interfaceAppModel import User, UserInDB, AuthAuthority, UserPrivilege, Token
|
from modules.interfaces.interfaceAppModel import User, UserInDB, AuthAuthority, UserPrivilege, Token
|
||||||
from modules.shared.attributeUtils import ModelMixin
|
from modules.shared.attributeUtils import ModelMixin
|
||||||
|
|
@ -91,11 +92,13 @@ async def login(
|
||||||
session_id = str(uuid.uuid4())
|
session_id = str(uuid.uuid4())
|
||||||
token_data["sid"] = session_id
|
token_data["sid"] = session_id
|
||||||
|
|
||||||
# Create access token with httpOnly cookie
|
# Create access token + set cookie
|
||||||
access_token = createAccessTokenWithCookie(token_data, response)
|
access_token, _access_expires = createAccessToken(token_data)
|
||||||
|
setAccessTokenCookie(response, access_token)
|
||||||
|
|
||||||
# Create refresh token with httpOnly cookie
|
# Create refresh token + set cookie
|
||||||
refresh_token = setRefreshTokenCookie(token_data, response)
|
refresh_token, _refresh_expires = createRefreshToken(token_data)
|
||||||
|
setRefreshTokenCookie(response, refresh_token)
|
||||||
|
|
||||||
# Get expiration time for response
|
# Get expiration time for response
|
||||||
try:
|
try:
|
||||||
|
|
@ -287,8 +290,9 @@ async def refresh_token(
|
||||||
"authenticationAuthority": currentUser.authenticationAuthority
|
"authenticationAuthority": currentUser.authenticationAuthority
|
||||||
}
|
}
|
||||||
|
|
||||||
# Create new access token with cookie
|
# Create new access token + set cookie
|
||||||
access_token = createAccessTokenWithCookie(token_data, response)
|
access_token, _expires = createAccessToken(token_data)
|
||||||
|
setAccessTokenCookie(response, access_token)
|
||||||
|
|
||||||
# Get expiration time
|
# Get expiration time
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,8 @@ import httpx
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
from modules.interfaces.interfaceAppObjects import getInterface, getRootInterface
|
from modules.interfaces.interfaceAppObjects import getInterface, getRootInterface
|
||||||
from modules.interfaces.interfaceAppModel import AuthAuthority, User, Token, ConnectionStatus, UserConnection
|
from modules.interfaces.interfaceAppModel import AuthAuthority, User, Token, ConnectionStatus, UserConnection
|
||||||
from modules.security.auth import getCurrentUser, limiter, createAccessToken
|
from modules.security.auth import getCurrentUser, limiter
|
||||||
|
from modules.security.jwtService import createAccessToken
|
||||||
from modules.shared.attributeUtils import ModelMixin
|
from modules.shared.attributeUtils import ModelMixin
|
||||||
from modules.shared.timezoneUtils import get_utc_now, create_expiration_timestamp, get_utc_timestamp
|
from modules.shared.timezoneUtils import get_utc_now, create_expiration_timestamp, get_utc_timestamp
|
||||||
|
|
||||||
|
|
@ -559,9 +560,9 @@ async def refresh_token(
|
||||||
|
|
||||||
logger.debug(f"Found Microsoft connection: {msft_connection.id}, status={msft_connection.status}")
|
logger.debug(f"Found Microsoft connection: {msft_connection.id}, status={msft_connection.status}")
|
||||||
|
|
||||||
# Get the token for this specific connection using the new method
|
# Get a fresh token via TokenManager convenience method
|
||||||
# Enable auto-refresh to handle expired tokens gracefully
|
from modules.security.tokenManager import TokenManager
|
||||||
current_token = appInterface.getConnectionToken(msft_connection.id, auto_refresh=True)
|
current_token = TokenManager().getFreshToken(appInterface, msft_connection.id)
|
||||||
|
|
||||||
if not current_token:
|
if not current_token:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
|
|
||||||
|
|
@ -54,106 +54,7 @@ limiter = Limiter(key_func=get_remote_address)
|
||||||
# Logger
|
# Logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> Tuple[str, datetime]:
|
# Note: JWT creation and cookie helpers moved to modules.security.jwtService
|
||||||
"""
|
|
||||||
Creates a JWT Access Token.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
data: Data to encode (usually user ID or username)
|
|
||||||
expiresDelta: Validity duration of the token (optional)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple of (JWT Token as string, expiration datetime)
|
|
||||||
"""
|
|
||||||
toEncode = data.copy()
|
|
||||||
# Ensure a token id (jti) exists for revocation tracking (only required for local, harmless otherwise)
|
|
||||||
if "jti" not in toEncode or not toEncode.get("jti"):
|
|
||||||
toEncode["jti"] = str(uuid.uuid4())
|
|
||||||
|
|
||||||
if expiresDelta:
|
|
||||||
expire = get_utc_now() + expiresDelta
|
|
||||||
else:
|
|
||||||
expire = get_utc_now() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
|
|
||||||
|
|
||||||
toEncode.update({"exp": expire})
|
|
||||||
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
|
|
||||||
|
|
||||||
return encodedJwt, expire
|
|
||||||
|
|
||||||
def createAccessTokenWithCookie(data: dict, response: Response, expiresDelta: Optional[timedelta] = None) -> str:
|
|
||||||
"""
|
|
||||||
Creates a JWT Access Token and sets it as an httpOnly cookie.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
data: Data to encode (usually user ID or username)
|
|
||||||
response: FastAPI Response object to set cookie
|
|
||||||
expiresDelta: Validity duration of the token (optional)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
JWT Token as string
|
|
||||||
"""
|
|
||||||
access_token, expires_at = createAccessToken(data, expiresDelta)
|
|
||||||
|
|
||||||
# Set httpOnly cookie
|
|
||||||
response.set_cookie(
|
|
||||||
key="auth_token",
|
|
||||||
value=access_token,
|
|
||||||
httponly=True,
|
|
||||||
secure=True, # HTTPS only in production
|
|
||||||
samesite="strict",
|
|
||||||
max_age=int(expiresDelta.total_seconds()) if expiresDelta else ACCESS_TOKEN_EXPIRE_MINUTES * 60
|
|
||||||
)
|
|
||||||
|
|
||||||
return access_token
|
|
||||||
|
|
||||||
def createRefreshToken(data: dict) -> Tuple[str, datetime]:
|
|
||||||
"""
|
|
||||||
Creates a JWT Refresh Token with longer expiration.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
data: Data to encode (usually user ID or username)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple of (JWT Refresh Token as string, expiration datetime)
|
|
||||||
"""
|
|
||||||
toEncode = data.copy()
|
|
||||||
# Ensure a token id (jti) exists for revocation tracking
|
|
||||||
if "jti" not in toEncode or not toEncode.get("jti"):
|
|
||||||
toEncode["jti"] = str(uuid.uuid4())
|
|
||||||
|
|
||||||
# Add refresh token type
|
|
||||||
toEncode["type"] = "refresh"
|
|
||||||
|
|
||||||
expire = get_utc_now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
|
|
||||||
toEncode.update({"exp": expire})
|
|
||||||
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
|
|
||||||
|
|
||||||
return encodedJwt, expire
|
|
||||||
|
|
||||||
def setRefreshTokenCookie(data: dict, response: Response) -> str:
|
|
||||||
"""
|
|
||||||
Creates a JWT Refresh Token and sets it as an httpOnly cookie.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
data: Data to encode (usually user ID or username)
|
|
||||||
response: FastAPI Response object to set cookie
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
JWT Refresh Token as string
|
|
||||||
"""
|
|
||||||
refresh_token, expires_at = createRefreshToken(data)
|
|
||||||
|
|
||||||
# Set httpOnly cookie for refresh token
|
|
||||||
response.set_cookie(
|
|
||||||
key="refresh_token",
|
|
||||||
value=refresh_token,
|
|
||||||
httponly=True,
|
|
||||||
secure=True, # HTTPS only in production
|
|
||||||
samesite="strict",
|
|
||||||
max_age=REFRESH_TOKEN_EXPIRE_DAYS * 24 * 60 * 60 # Days to seconds
|
|
||||||
)
|
|
||||||
|
|
||||||
return refresh_token
|
|
||||||
|
|
||||||
def _getUserBase(token: str = Depends(cookieAuth)) -> User:
|
def _getUserBase(token: str = Depends(cookieAuth)) -> User:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
72
modules/security/jwtService.py
Normal file
72
modules/security/jwtService.py
Normal file
|
|
@ -0,0 +1,72 @@
|
||||||
|
"""
|
||||||
|
JWT Service
|
||||||
|
Centralizes local JWT creation and cookie helpers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from datetime import timedelta
|
||||||
|
from typing import Optional, Tuple
|
||||||
|
from fastapi import Response
|
||||||
|
from jose import jwt
|
||||||
|
|
||||||
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
from modules.shared.timezoneUtils import get_utc_now
|
||||||
|
|
||||||
|
# Config
|
||||||
|
SECRET_KEY = APP_CONFIG.get("APP_JWT_KEY_SECRET")
|
||||||
|
ALGORITHM = APP_CONFIG.get("Auth_ALGORITHM")
|
||||||
|
ACCESS_TOKEN_EXPIRE_MINUTES = int(APP_CONFIG.get("APP_TOKEN_EXPIRY"))
|
||||||
|
REFRESH_TOKEN_EXPIRE_DAYS = int(APP_CONFIG.get("APP_REFRESH_TOKEN_EXPIRY", "7"))
|
||||||
|
|
||||||
|
|
||||||
|
def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> Tuple[str, "datetime"]:
|
||||||
|
"""Create a JWT access token and return (token, expiresAt)."""
|
||||||
|
toEncode = data.copy()
|
||||||
|
if "jti" not in toEncode or not toEncode.get("jti"):
|
||||||
|
import uuid
|
||||||
|
toEncode["jti"] = str(uuid.uuid4())
|
||||||
|
|
||||||
|
expire = get_utc_now() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
|
||||||
|
toEncode.update({"exp": expire})
|
||||||
|
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
|
||||||
|
return encodedJwt, expire
|
||||||
|
|
||||||
|
|
||||||
|
def createRefreshToken(data: dict) -> Tuple[str, "datetime"]:
|
||||||
|
"""Create a JWT refresh token and return (token, expiresAt)."""
|
||||||
|
toEncode = data.copy()
|
||||||
|
if "jti" not in toEncode or not toEncode.get("jti"):
|
||||||
|
import uuid
|
||||||
|
toEncode["jti"] = str(uuid.uuid4())
|
||||||
|
toEncode["type"] = "refresh"
|
||||||
|
|
||||||
|
expire = get_utc_now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
|
||||||
|
toEncode.update({"exp": expire})
|
||||||
|
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
|
||||||
|
return encodedJwt, expire
|
||||||
|
|
||||||
|
|
||||||
|
def setAccessTokenCookie(response: Response, token: str, expiresDelta: Optional[timedelta] = None) -> None:
|
||||||
|
"""Set access token as httpOnly cookie."""
|
||||||
|
maxAge = int(expiresDelta.total_seconds()) if expiresDelta else ACCESS_TOKEN_EXPIRE_MINUTES * 60
|
||||||
|
response.set_cookie(
|
||||||
|
key="auth_token",
|
||||||
|
value=token,
|
||||||
|
httponly=True,
|
||||||
|
secure=True,
|
||||||
|
samesite="strict",
|
||||||
|
max_age=maxAge
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def setRefreshTokenCookie(response: Response, token: str) -> None:
|
||||||
|
"""Set refresh token as httpOnly cookie."""
|
||||||
|
response.set_cookie(
|
||||||
|
key="refresh_token",
|
||||||
|
value=token,
|
||||||
|
httponly=True,
|
||||||
|
secure=True,
|
||||||
|
samesite="strict",
|
||||||
|
max_age=REFRESH_TOKEN_EXPIRE_DAYS * 24 * 60 * 60
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -6,7 +6,7 @@ Handles all token operations including automatic refresh for backend services.
|
||||||
import logging
|
import logging
|
||||||
import httpx
|
import httpx
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Optional, Dict, Any
|
from typing import Optional, Dict, Any, Callable
|
||||||
|
|
||||||
from modules.interfaces.interfaceAppModel import Token, AuthAuthority
|
from modules.interfaces.interfaceAppModel import Token, AuthAuthority
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
@ -199,3 +199,65 @@ class TokenManager:
|
||||||
logger.error(f"Error refreshing token: {str(e)}")
|
logger.error(f"Error refreshing token: {str(e)}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def ensure_fresh_token(self, token: Token, *, seconds_before_expiry: int = 30 * 60, save_callback: Optional[Callable[[Token], None]] = None) -> Optional[Token]:
|
||||||
|
"""Ensure a token is fresh; refresh if expiring within threshold.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
token: Existing token to validate/refresh.
|
||||||
|
seconds_before_expiry: Threshold window to proactively refresh.
|
||||||
|
save_callback: Optional function to persist a refreshed token.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A fresh token (refreshed or original) or None if refresh failed.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if token is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
now_ts = get_utc_timestamp()
|
||||||
|
expires_at = token.expiresAt or 0
|
||||||
|
|
||||||
|
# If token expires within the threshold, try to refresh
|
||||||
|
if expires_at and expires_at < (now_ts + seconds_before_expiry):
|
||||||
|
logger.info(
|
||||||
|
f"ensure_fresh_token: Token for connection {token.connectionId} expiring soon "
|
||||||
|
f"(in {max(0, expires_at - now_ts)}s). Attempting proactive refresh."
|
||||||
|
)
|
||||||
|
refreshed = self.refresh_token(token)
|
||||||
|
if refreshed:
|
||||||
|
if save_callback is not None:
|
||||||
|
try:
|
||||||
|
save_callback(refreshed)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"ensure_fresh_token: Failed to persist refreshed token: {e}")
|
||||||
|
return refreshed
|
||||||
|
else:
|
||||||
|
logger.warning("ensure_fresh_token: Token refresh failed")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Token is sufficiently fresh
|
||||||
|
return token
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"ensure_fresh_token: Error ensuring fresh token: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Convenience wrapper to fetch and ensure fresh token for a connection via interface layer
|
||||||
|
def getFreshToken(self, interfaceApp, connectionId: str, secondsBeforeExpiry: int = 30 * 60) -> Optional[Token]:
|
||||||
|
"""Return a fresh token for a connection, refreshing when expiring soon.
|
||||||
|
|
||||||
|
Reads the latest stored token via interfaceApp.getConnectionToken, then
|
||||||
|
uses ensure_fresh_token to refresh if needed and persists the refreshed
|
||||||
|
token via interfaceApp.saveConnectionToken.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
token = interfaceApp.getConnectionToken(connectionId)
|
||||||
|
if not token:
|
||||||
|
return None
|
||||||
|
return self.ensure_fresh_token(
|
||||||
|
token,
|
||||||
|
seconds_before_expiry=secondsBeforeExpiry,
|
||||||
|
save_callback=lambda t: interfaceApp.saveConnectionToken(t)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"getFreshToken: Error fetching or refreshing token for connection {connectionId}: {e}")
|
||||||
|
return None
|
||||||
|
|
@ -51,8 +51,8 @@ class TokenRefreshService:
|
||||||
try:
|
try:
|
||||||
logger.debug(f"Refreshing Google token for connection {connection.id}")
|
logger.debug(f"Refreshing Google token for connection {connection.id}")
|
||||||
|
|
||||||
# Get current token
|
# Get current token (no refresh in interface layer)
|
||||||
current_token = interface.getConnectionToken(connection.id, auto_refresh=False)
|
current_token = interface.getConnectionToken(connection.id)
|
||||||
if not current_token:
|
if not current_token:
|
||||||
logger.warning(f"No Google token found for connection {connection.id}")
|
logger.warning(f"No Google token found for connection {connection.id}")
|
||||||
return False
|
return False
|
||||||
|
|
@ -100,8 +100,8 @@ class TokenRefreshService:
|
||||||
try:
|
try:
|
||||||
logger.debug(f"Refreshing Microsoft token for connection {connection.id}")
|
logger.debug(f"Refreshing Microsoft token for connection {connection.id}")
|
||||||
|
|
||||||
# Get current token
|
# Get current token (no refresh in interface layer)
|
||||||
current_token = interface.getConnectionToken(connection.id, auto_refresh=False)
|
current_token = interface.getConnectionToken(connection.id)
|
||||||
if not current_token:
|
if not current_token:
|
||||||
logger.warning(f"No Microsoft token found for connection {connection.id}")
|
logger.warning(f"No Microsoft token found for connection {connection.id}")
|
||||||
return False
|
return False
|
||||||
|
|
|
||||||
100
modules/services/__init__.py
Normal file
100
modules/services/__init__.py
Normal file
|
|
@ -0,0 +1,100 @@
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from modules.interfaces.interfaceAppModel import User
|
||||||
|
from modules.interfaces.interfaceChatModel import ChatWorkflow
|
||||||
|
from modules.services.serviceWorkflows.mainServiceWorkflows import WorkflowService
|
||||||
|
|
||||||
|
class PublicService:
|
||||||
|
"""Lightweight proxy exposing only public callable attributes of a target.
|
||||||
|
|
||||||
|
- Hides names starting with '_'
|
||||||
|
- Optionally restricts to callables only
|
||||||
|
- Optional name_filter predicate for allow-list patterns
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, target: Any, functions_only: bool = True, name_filter=None):
|
||||||
|
self._target = target
|
||||||
|
self._functions_only = functions_only
|
||||||
|
self._name_filter = name_filter
|
||||||
|
|
||||||
|
def __getattr__(self, name: str):
|
||||||
|
if name.startswith('_'):
|
||||||
|
raise AttributeError(f"'{type(self._target).__name__}' attribute '{name}' is private")
|
||||||
|
if self._name_filter and not self._name_filter(name):
|
||||||
|
raise AttributeError(f"'{name}' not exposed by policy")
|
||||||
|
attr = getattr(self._target, name)
|
||||||
|
if self._functions_only and not callable(attr):
|
||||||
|
raise AttributeError(f"'{name}' is not a function")
|
||||||
|
return attr
|
||||||
|
|
||||||
|
def __dir__(self):
|
||||||
|
names = [
|
||||||
|
n for n in dir(self._target)
|
||||||
|
if not n.startswith('_')
|
||||||
|
and (not self._functions_only or callable(getattr(self._target, n, None)))
|
||||||
|
and (self._name_filter(n) if self._name_filter else True)
|
||||||
|
]
|
||||||
|
return sorted(names)
|
||||||
|
|
||||||
|
|
||||||
|
class Services:
|
||||||
|
|
||||||
|
def __init__(self, user: User, workflow: ChatWorkflow):
|
||||||
|
self.user: User = user
|
||||||
|
self.workflow: ChatWorkflow = workflow
|
||||||
|
|
||||||
|
# Directly expose existing service modules
|
||||||
|
|
||||||
|
from .serviceDocument.mainServiceDocumentExtraction import DocumentExtractionService
|
||||||
|
self.document = PublicService(DocumentExtractionService(self))
|
||||||
|
|
||||||
|
from .serviceDocument.mainServiceDocumentGeneration import DocumentGenerationService
|
||||||
|
self.document = PublicService(DocumentGenerationService(self))
|
||||||
|
|
||||||
|
from .serviceNeutralization.mainNeutralization import NeutralizationService
|
||||||
|
self.neutralization = PublicService(NeutralizationService())
|
||||||
|
|
||||||
|
from .serviceSharepoint.mainSharepoint import SharePointService
|
||||||
|
self.sharepoint = PublicService(SharePointService(self))
|
||||||
|
|
||||||
|
from .serviceAi.mainServiceAi import AiService
|
||||||
|
self.ai = PublicService(AiService(self))
|
||||||
|
|
||||||
|
from .serviceWorkflows.mainServiceWorkflows import WorkflowService
|
||||||
|
self.workflow = PublicService(WorkflowService(self))
|
||||||
|
|
||||||
|
# Initialize chat interface for workflow operations
|
||||||
|
from modules.interfaces.interfaceChatObjects import getInterface as getChatInterface
|
||||||
|
self.chatInterface = getChatInterface(user)
|
||||||
|
|
||||||
|
# Chat interface wrapper methods
|
||||||
|
def getWorkflow(self, workflowId: str):
|
||||||
|
return self.chatInterface.getWorkflow(workflowId)
|
||||||
|
|
||||||
|
def createWorkflow(self, workflowData: dict):
|
||||||
|
return self.chatInterface.createWorkflow(workflowData)
|
||||||
|
|
||||||
|
def updateWorkflow(self, workflowId: str, workflowData: dict):
|
||||||
|
return self.chatInterface.updateWorkflow(workflowId, workflowData)
|
||||||
|
|
||||||
|
def createMessage(self, messageData: dict):
|
||||||
|
return self.chatInterface.createMessage(messageData)
|
||||||
|
|
||||||
|
def updateMessage(self, messageId: str, messageData: dict):
|
||||||
|
return self.chatInterface.updateMessage(messageId, messageData)
|
||||||
|
|
||||||
|
def createLog(self, logData: dict):
|
||||||
|
return self.chatInterface.createLog(logData)
|
||||||
|
|
||||||
|
def updateWorkflowStats(self, workflowId: str, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0):
|
||||||
|
return self.chatInterface.updateWorkflowStats(workflowId, bytesSent, bytesReceived, tokenCount)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def mandateId(self):
|
||||||
|
return self.chatInterface.mandateId
|
||||||
|
|
||||||
|
|
||||||
|
def getInterface(user: User, workflow: ChatWorkflow) -> Services:
|
||||||
|
return Services(user, workflow)
|
||||||
|
|
||||||
|
|
||||||
137
modules/services/serviceAi/mainServiceAi.py
Normal file
137
modules/services/serviceAi/mainServiceAi.py
Normal file
|
|
@ -0,0 +1,137 @@
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any, List, Optional, Tuple
|
||||||
|
|
||||||
|
from modules.interfaces.interfaceChatModel import ChatDocument
|
||||||
|
from modules.services.serviceDocument.documentExtraction import DocumentExtractionService
|
||||||
|
from modules.interfaces.interfaceAiModel import AiCallRequest, AiCallOptions
|
||||||
|
from modules.interfaces.interfaceAiObjects import AiObjects
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# Model registry is now provided by interfaces via AiModels
|
||||||
|
|
||||||
|
|
||||||
|
class AiService:
|
||||||
|
"""Centralized AI service orchestrating documents, model selection and failover.
|
||||||
|
|
||||||
|
The concrete connector instances (OpenAI/Anthropic) are injected by the interface layer.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, aiObjects: AiObjects | None = None) -> None:
|
||||||
|
# Only depend on interfaces
|
||||||
|
self.aiObjects = aiObjects or AiObjects()
|
||||||
|
self.documentExtractor = DocumentExtractionService()
|
||||||
|
|
||||||
|
async def callAi(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
documents: Optional[List[ChatDocument]] = None,
|
||||||
|
processDocumentsIndividually: bool = False,
|
||||||
|
options: Optional[AiCallOptions] = None,
|
||||||
|
) -> str:
|
||||||
|
try:
|
||||||
|
documentContent = ""
|
||||||
|
if documents:
|
||||||
|
documentContent = await self._processDocumentsForAi(
|
||||||
|
documents,
|
||||||
|
options.operationType if options else "general",
|
||||||
|
options.compressContext if options else True,
|
||||||
|
processDocumentsIndividually,
|
||||||
|
)
|
||||||
|
|
||||||
|
effectiveOptions = options or AiCallOptions()
|
||||||
|
request = AiCallRequest(
|
||||||
|
prompt=prompt,
|
||||||
|
context=documentContent or None,
|
||||||
|
options=effectiveOptions,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = await self.aiObjects.call(request)
|
||||||
|
return response.content
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in centralized AI call: {str(e)}")
|
||||||
|
return f"Error: {str(e)}"
|
||||||
|
|
||||||
|
# Model selection now handled by interface AiObjects
|
||||||
|
|
||||||
|
# Cost estimation handled by interface for model selection
|
||||||
|
|
||||||
|
async def _processDocumentsForAi(
|
||||||
|
self,
|
||||||
|
documents: List[ChatDocument],
|
||||||
|
operationType: str,
|
||||||
|
compressDocuments: bool,
|
||||||
|
processIndividually: bool,
|
||||||
|
) -> str:
|
||||||
|
if not documents:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
processedContents: List[str] = []
|
||||||
|
for doc in documents:
|
||||||
|
try:
|
||||||
|
extracted = await self.documentExtractor.processFileData(
|
||||||
|
doc.fileData,
|
||||||
|
doc.fileName,
|
||||||
|
doc.mimeType,
|
||||||
|
prompt=f"Extract relevant content for {operationType}",
|
||||||
|
documentId=doc.id,
|
||||||
|
enableAI=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
docContent: List[str] = []
|
||||||
|
for contentItem in extracted.contents:
|
||||||
|
if contentItem.data and contentItem.data.strip():
|
||||||
|
docContent.append(contentItem.data)
|
||||||
|
|
||||||
|
if docContent:
|
||||||
|
combinedDocContent = "\n\n".join(docContent)
|
||||||
|
if (
|
||||||
|
compressDocuments
|
||||||
|
and len(combinedDocContent.encode("utf-8")) > 10000
|
||||||
|
):
|
||||||
|
combinedDocContent = await self._compressContent(
|
||||||
|
combinedDocContent, 10000, "document"
|
||||||
|
)
|
||||||
|
processedContents.append(
|
||||||
|
f"Document: {doc.fileName}\n{combinedDocContent}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Error processing document {doc.fileName}: {str(e)}"
|
||||||
|
)
|
||||||
|
processedContents.append(
|
||||||
|
f"Document: {doc.fileName}\n[Error processing document: {str(e)}]"
|
||||||
|
)
|
||||||
|
|
||||||
|
return "\n\n---\n\n".join(processedContents)
|
||||||
|
|
||||||
|
# Prompt/context optimization (compression) handled by interface
|
||||||
|
|
||||||
|
async def _compressContent(self, content: str, targetSize: int, contentType: str) -> str:
|
||||||
|
if len(content.encode("utf-8")) <= targetSize:
|
||||||
|
return content
|
||||||
|
|
||||||
|
try:
|
||||||
|
compressionPrompt = f"""
|
||||||
|
Komprimiere den folgenden {contentType} auf maximal {targetSize} Zeichen,
|
||||||
|
behalte aber alle wichtigen Informationen bei:
|
||||||
|
|
||||||
|
{content}
|
||||||
|
|
||||||
|
Gib nur den komprimierten Inhalt zurück, ohne zusätzliche Erklärungen.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Service must not call connectors directly; use simple truncation fallback here
|
||||||
|
data = content.encode("utf-8")
|
||||||
|
return data[:targetSize].decode("utf-8", errors="ignore") + "... [truncated]"
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"AI compression failed, using truncation: {str(e)}")
|
||||||
|
return content[:targetSize] + "... [truncated]"
|
||||||
|
|
||||||
|
# Failover logic now centralized in interface via model selection; service delegates a single call
|
||||||
|
|
||||||
|
# Fallback selection moved to interface; service doesn't select models directly
|
||||||
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -22,7 +22,7 @@ from modules.interfaces.interfaceChatModel import (
|
||||||
ContentItem,
|
ContentItem,
|
||||||
ContentMetadata
|
ContentMetadata
|
||||||
)
|
)
|
||||||
from modules.services.serviceNeutralization.neutralizer import DataAnonymizer
|
from modules.services.serviceNeutralization.mainNeutralization import NeutralizationService
|
||||||
from modules.shared.configuration import APP_CONFIG
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -36,13 +36,16 @@ class FileProcessingError(Exception):
|
||||||
"""Custom exception for file processing errors."""
|
"""Custom exception for file processing errors."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class DocumentExtraction:
|
class DocumentExtractionService:
|
||||||
"""Processor for handling document operations and content extraction."""
|
"""Processor for handling document operations and content extraction."""
|
||||||
|
|
||||||
def __init__(self, serviceCenter=None):
|
def __init__(self, serviceCenter=None):
|
||||||
"""Initialize the document processor."""
|
"""Initialize the document processor."""
|
||||||
self._neutralizer = DataAnonymizer() if APP_CONFIG.get("ENABLE_CONTENT_NEUTRALIZATION", False) else None
|
self._neutralizer = NeutralizationService() if APP_CONFIG.get("ENABLE_CONTENT_NEUTRALIZATION", False) else None
|
||||||
self._serviceCenter = serviceCenter
|
self._serviceCenter = serviceCenter
|
||||||
|
# Centralized services interface (for AI)
|
||||||
|
from modules.services import getInterface as getServices
|
||||||
|
self.services = getServices(serviceCenter.user, serviceCenter.workflow)
|
||||||
|
|
||||||
self.supportedTypes: Dict[str, Callable[[bytes, str, str], Awaitable[List[ContentItem]]]] = {
|
self.supportedTypes: Dict[str, Callable[[bytes, str, str], Awaitable[List[ContentItem]]]] = {
|
||||||
# Text and data files
|
# Text and data files
|
||||||
|
|
@ -218,10 +221,6 @@ class DocumentExtraction:
|
||||||
# This should never be reached, but just in case
|
# This should never be reached, but just in case
|
||||||
raise FileProcessingError(f"Failed to decode {fileName} with any encoding")
|
raise FileProcessingError(f"Failed to decode {fileName} with any encoding")
|
||||||
|
|
||||||
def initialize(self) -> None:
|
|
||||||
"""Initialize the document processor."""
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _loadPdfExtractor(self):
|
def _loadPdfExtractor(self):
|
||||||
"""Loads PDF extraction libraries when needed"""
|
"""Loads PDF extraction libraries when needed"""
|
||||||
global pdfExtractorLoaded
|
global pdfExtractorLoaded
|
||||||
|
|
@ -1132,18 +1131,18 @@ class DocumentExtraction:
|
||||||
# Create a basic content item explaining the limitation
|
# Create a basic content item explaining the limitation
|
||||||
info_content = f"""Legacy Word Document (.doc) - {fileName}
|
info_content = f"""Legacy Word Document (.doc) - {fileName}
|
||||||
|
|
||||||
Note: This is a legacy .doc format file. For better content extraction,
|
Note: This is a legacy .doc format file. For better content extraction,
|
||||||
consider converting to .docx format.
|
consider converting to .docx format.
|
||||||
|
|
||||||
File size: {len(fileData)} bytes
|
File size: {len(fileData)} bytes
|
||||||
Format: Microsoft Word 97-2003 Document
|
Format: Microsoft Word 97-2003 Document
|
||||||
|
|
||||||
Content extraction from .doc files requires specialized tools like:
|
Content extraction from .doc files requires specialized tools like:
|
||||||
- antiword (Linux/Unix)
|
- antiword (Linux/Unix)
|
||||||
- catdoc (Linux/Unix)
|
- catdoc (Linux/Unix)
|
||||||
- Microsoft Word (for conversion)
|
- Microsoft Word (for conversion)
|
||||||
|
|
||||||
The raw binary content is available but not human-readable."""
|
The raw binary content is available but not human-readable."""
|
||||||
|
|
||||||
contentItems.append(ContentItem(
|
contentItems.append(ContentItem(
|
||||||
label="info",
|
label="info",
|
||||||
|
|
@ -1183,18 +1182,18 @@ The raw binary content is available but not human-readable."""
|
||||||
# Create a basic content item explaining the limitation
|
# Create a basic content item explaining the limitation
|
||||||
info_content = f"""Legacy Excel Document (.xls) - {fileName}
|
info_content = f"""Legacy Excel Document (.xls) - {fileName}
|
||||||
|
|
||||||
Note: This is a legacy .xls format file. For better content extraction,
|
Note: This is a legacy .xls format file. For better content extraction,
|
||||||
consider converting to .xlsx format.
|
consider converting to .xlsx format.
|
||||||
|
|
||||||
File size: {len(fileData)} bytes
|
File size: {len(fileData)} bytes
|
||||||
Format: Microsoft Excel 97-2003 Workbook
|
Format: Microsoft Excel 97-2003 Workbook
|
||||||
|
|
||||||
Content extraction from .xls files requires specialized tools like:
|
Content extraction from .xls files requires specialized tools like:
|
||||||
- xlrd (Python library)
|
- xlrd (Python library)
|
||||||
- Microsoft Excel (for conversion)
|
- Microsoft Excel (for conversion)
|
||||||
- LibreOffice (for conversion)
|
- LibreOffice (for conversion)
|
||||||
|
|
||||||
The raw binary content is available but not human-readable."""
|
The raw binary content is available but not human-readable."""
|
||||||
|
|
||||||
contentItems.append(ContentItem(
|
contentItems.append(ContentItem(
|
||||||
label="info",
|
label="info",
|
||||||
|
|
@ -1234,18 +1233,18 @@ The raw binary content is available but not human-readable."""
|
||||||
# Create a basic content item explaining the limitation
|
# Create a basic content item explaining the limitation
|
||||||
info_content = f"""Legacy PowerPoint Document (.ppt) - {fileName}
|
info_content = f"""Legacy PowerPoint Document (.ppt) - {fileName}
|
||||||
|
|
||||||
Note: This is a legacy .ppt format file. For better content extraction,
|
Note: This is a legacy .ppt format file. For better content extraction,
|
||||||
consider converting to .pptx format.
|
consider converting to .pptx format.
|
||||||
|
|
||||||
File size: {len(fileData)} bytes
|
File size: {len(fileData)} bytes
|
||||||
Format: Microsoft PowerPoint 97-2003 Presentation
|
Format: Microsoft PowerPoint 97-2003 Presentation
|
||||||
|
|
||||||
Content extraction from .ppt files requires specialized tools like:
|
Content extraction from .ppt files requires specialized tools like:
|
||||||
- python-pptx (limited support for .ppt)
|
- python-pptx (limited support for .ppt)
|
||||||
- Microsoft PowerPoint (for conversion)
|
- Microsoft PowerPoint (for conversion)
|
||||||
- LibreOffice (for conversion)
|
- LibreOffice (for conversion)
|
||||||
|
|
||||||
The raw binary content is available but not human-readable."""
|
The raw binary content is available but not human-readable."""
|
||||||
|
|
||||||
contentItems.append(ContentItem(
|
contentItems.append(ContentItem(
|
||||||
label="info",
|
label="info",
|
||||||
|
|
@ -1417,11 +1416,7 @@ The raw binary content is available but not human-readable."""
|
||||||
# Process with AI based on content type
|
# Process with AI based on content type
|
||||||
try:
|
try:
|
||||||
if mimeType.startswith('image/') and mimeType != "image/svg+xml":
|
if mimeType.startswith('image/') and mimeType != "image/svg+xml":
|
||||||
# For images (excluding SVG), extract meaningful content as text
|
# For images (excluding SVG), analyze via centralized AI service
|
||||||
# Use AI to analyze the image and extract relevant information
|
|
||||||
|
|
||||||
|
|
||||||
# Create a specific prompt for image content extraction
|
|
||||||
imagePrompt = f"""
|
imagePrompt = f"""
|
||||||
Analyze this image and extract the actual content and information from it.
|
Analyze this image and extract the actual content and information from it.
|
||||||
Focus on extracting text, data, charts, diagrams, or any meaningful content.
|
Focus on extracting text, data, charts, diagrams, or any meaningful content.
|
||||||
|
|
@ -1430,8 +1425,19 @@ The raw binary content is available but not human-readable."""
|
||||||
|
|
||||||
Original prompt: {prompt}
|
Original prompt: {prompt}
|
||||||
"""
|
"""
|
||||||
|
from modules.interfaces.interfaceChatModel import ChatDocument
|
||||||
processedContent = await self._serviceCenter.callAiImageBasic(imagePrompt, chunk, mimeType)
|
image_doc = ChatDocument(fileData=chunk, fileName="image", mimeType=mimeType)
|
||||||
|
processedContent = await self.services.ai.callAi(
|
||||||
|
prompt=imagePrompt,
|
||||||
|
documents=[image_doc],
|
||||||
|
options={
|
||||||
|
"process_type": "image",
|
||||||
|
"operation_type": "analyse_content",
|
||||||
|
"priority": "balanced",
|
||||||
|
"compress_documents": True,
|
||||||
|
"max_cost": 0.03
|
||||||
|
}
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
# For text content (including SVG), use text AI service
|
# For text content (including SVG), use text AI service
|
||||||
# Neutralize content if neutralizer is enabled (only for text)
|
# Neutralize content if neutralizer is enabled (only for text)
|
||||||
|
|
@ -1456,7 +1462,36 @@ The raw binary content is available but not human-readable."""
|
||||||
# For code files, preserve the complete content without AI processing
|
# For code files, preserve the complete content without AI processing
|
||||||
processedContent = contentToProcess
|
processedContent = contentToProcess
|
||||||
else:
|
else:
|
||||||
processedContent = await self._serviceCenter.callAiTextBasic(aiPrompt, contentToProcess)
|
if self.services and hasattr(self.services, 'ai'):
|
||||||
|
processedContent = await self.services.ai.callAi(
|
||||||
|
prompt=aiPrompt,
|
||||||
|
documents=None,
|
||||||
|
options={
|
||||||
|
"process_type": "text",
|
||||||
|
"operation_type": "analyse_content",
|
||||||
|
"priority": "balanced",
|
||||||
|
"compress_prompt": True,
|
||||||
|
"compress_documents": False,
|
||||||
|
"processing_mode": "advanced",
|
||||||
|
"max_cost": 0.05,
|
||||||
|
"max_processing_time": 30
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Fallback to basic AI processing with centralized service
|
||||||
|
processedContent = await self.services.ai.callAi(
|
||||||
|
prompt=aiPrompt,
|
||||||
|
documents=None,
|
||||||
|
options={
|
||||||
|
"process_type": "text",
|
||||||
|
"operation_type": "analyse_content",
|
||||||
|
"priority": "speed",
|
||||||
|
"compress_prompt": True,
|
||||||
|
"compress_documents": False,
|
||||||
|
"max_cost": 0.01,
|
||||||
|
"max_processing_time": 15
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
chunkResults.append(processedContent)
|
chunkResults.append(processedContent)
|
||||||
except Exception as aiError:
|
except Exception as aiError:
|
||||||
|
|
@ -13,7 +13,7 @@ from modules.services.serviceDocument.documentUtility import (
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class DocumentGenerator:
|
class DocumentGenerationService:
|
||||||
def __init__(self, service):
|
def __init__(self, service):
|
||||||
self.service = service
|
self.service = service
|
||||||
|
|
||||||
206
modules/services/serviceNeutralization/mainNeutralization.py
Normal file
206
modules/services/serviceNeutralization/mainNeutralization.py
Normal file
|
|
@ -0,0 +1,206 @@
|
||||||
|
"""
|
||||||
|
Data Neutralization Service
|
||||||
|
Handles file processing for data neutralization including SharePoint integration
|
||||||
|
DSGVO-konformer Daten-Neutralisierer für KI-Agentensysteme
|
||||||
|
Unterstützt TXT, JSON, CSV, Excel und Word-Dateien
|
||||||
|
Mehrsprachig: DE, EN, FR, IT
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
import json
|
||||||
|
from typing import Dict, List, Any, Optional, Tuple
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
import mimetypes
|
||||||
|
|
||||||
|
from modules.interfaces.interfaceAppObjects import getInterface
|
||||||
|
from modules.interfaces.interfaceAppModel import User, DataNeutraliserConfig, DataNeutralizerAttributes
|
||||||
|
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||||
|
|
||||||
|
# Import all necessary classes and functions for neutralization
|
||||||
|
from modules.services.serviceNeutralization.subProcessCommon import ProcessResult, CommonUtils, NeutralizationResult, NeutralizationAttribute
|
||||||
|
from modules.services.serviceNeutralization.subProcessText import TextProcessor, PlainText
|
||||||
|
from modules.services.serviceNeutralization.subProcessList import ListProcessor, TableData
|
||||||
|
from modules.services.serviceNeutralization.subProcessBinary import BinaryProcessor, BinaryData
|
||||||
|
from modules.services.serviceNeutralization.subParseString import StringParser
|
||||||
|
from modules.services.serviceNeutralization.subPatterns import Pattern, HeaderPatterns, DataPatterns, TextTablePatterns
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class NeutralizationService:
|
||||||
|
"""Service for handling data neutralization operations"""
|
||||||
|
|
||||||
|
def __init__(self, current_user: User = None, names_to_parse: List[str] = None):
|
||||||
|
"""Initialize the service with user context and anonymization processors
|
||||||
|
|
||||||
|
Args:
|
||||||
|
current_user: User object for context (optional for basic neutralization)
|
||||||
|
names_to_parse: List of names to parse and replace (case-insensitive)
|
||||||
|
"""
|
||||||
|
self.current_user = current_user
|
||||||
|
self.app_interface = getInterface(current_user) if current_user else None
|
||||||
|
|
||||||
|
# Initialize anonymization processors
|
||||||
|
self.names_to_parse = names_to_parse or []
|
||||||
|
self.textProcessor = TextProcessor(names_to_parse)
|
||||||
|
self.listProcessor = ListProcessor(names_to_parse)
|
||||||
|
self.binaryProcessor = BinaryProcessor()
|
||||||
|
self.commonUtils = CommonUtils()
|
||||||
|
|
||||||
|
def getConfig(self) -> Optional[DataNeutraliserConfig]:
|
||||||
|
"""Get the neutralization configuration for the current user's mandate"""
|
||||||
|
if not self.app_interface:
|
||||||
|
return None
|
||||||
|
return self.app_interface.getNeutralizationConfig()
|
||||||
|
|
||||||
|
def saveConfig(self, config_data: Dict[str, Any]) -> DataNeutraliserConfig:
|
||||||
|
"""Save or update the neutralization configuration"""
|
||||||
|
if not self.app_interface:
|
||||||
|
raise ValueError("User context required for saving configuration")
|
||||||
|
return self.app_interface.createOrUpdateNeutralizationConfig(config_data)
|
||||||
|
|
||||||
|
# Public API: process text or file
|
||||||
|
|
||||||
|
def processText(self, text: str) -> Dict[str, Any]:
|
||||||
|
"""Neutralize a raw text string and return a standard result dict."""
|
||||||
|
return self._neutralizeText(text, 'text')
|
||||||
|
|
||||||
|
def processFile(self, fileId: str) -> Dict[str, Any]:
|
||||||
|
"""Neutralize a file referenced by its fileId using app interface."""
|
||||||
|
if not self.app_interface:
|
||||||
|
raise ValueError("User context is required to process a file by fileId")
|
||||||
|
# Fetch file data and metadata
|
||||||
|
fileInfo = None
|
||||||
|
try:
|
||||||
|
# getFile returns an object; fallback to dict-like
|
||||||
|
fileInfo = self.app_interface.getFile(fileId)
|
||||||
|
except Exception:
|
||||||
|
fileInfo = None
|
||||||
|
fileName = getattr(fileInfo, 'fileName', None) if fileInfo else None
|
||||||
|
mimeType = getattr(fileInfo, 'mimeType', None) if fileInfo else None
|
||||||
|
fileData = self.app_interface.getFileData(fileId)
|
||||||
|
if not fileData:
|
||||||
|
raise ValueError(f"No file data found for fileId: {fileId}")
|
||||||
|
|
||||||
|
# Determine textType from mime
|
||||||
|
textType = self._getContentTypeFromMime(mimeType or '')
|
||||||
|
|
||||||
|
# Decode to text
|
||||||
|
try:
|
||||||
|
textContent = fileData.decode('utf-8')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
decoded = None
|
||||||
|
for enc in ['latin-1', 'cp1252', 'iso-8859-1']:
|
||||||
|
try:
|
||||||
|
decoded = fileData.decode(enc)
|
||||||
|
break
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
continue
|
||||||
|
if decoded is None:
|
||||||
|
raise ValueError("Unable to decode file content")
|
||||||
|
textContent = decoded
|
||||||
|
|
||||||
|
result = self._neutralizeText(textContent, textType)
|
||||||
|
# Add a reasonable output filename if original known
|
||||||
|
if fileName:
|
||||||
|
result['neutralized_file_name'] = f"neutralized_{fileName}"
|
||||||
|
result['file_id'] = fileId
|
||||||
|
return result
|
||||||
|
|
||||||
|
def resolveText(self, text: str) -> str:
|
||||||
|
if not self.app_interface:
|
||||||
|
return text
|
||||||
|
try:
|
||||||
|
placeholder_pattern = r'\[([a-z]+)\.([a-f0-9-]{36})\]'
|
||||||
|
matches = re.findall(placeholder_pattern, text)
|
||||||
|
resolved_text = text
|
||||||
|
for placeholder_type, uid in matches:
|
||||||
|
attributes = self.app_interface.db.getRecordset(
|
||||||
|
DataNeutralizerAttributes,
|
||||||
|
recordFilter={
|
||||||
|
"mandateId": self.app_interface.mandateId,
|
||||||
|
"id": uid
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if attributes:
|
||||||
|
attribute = attributes[0]
|
||||||
|
placeholder = f"[{placeholder_type}.{uid}]"
|
||||||
|
resolved_text = resolved_text.replace(placeholder, attribute["originalText"])
|
||||||
|
return resolved_text
|
||||||
|
except Exception:
|
||||||
|
return text
|
||||||
|
|
||||||
|
# Helper functions
|
||||||
|
|
||||||
|
def _neutralizeText(self, text: str, textType: str = None) -> Dict[str, Any]:
|
||||||
|
"""Process text and return unified dict for API consumption."""
|
||||||
|
try:
|
||||||
|
# Auto-detect content type if not provided
|
||||||
|
if textType is None:
|
||||||
|
textType = self.commonUtils.detect_content_type(text)
|
||||||
|
|
||||||
|
# Check if content is binary data
|
||||||
|
if self.binaryProcessor.is_binary_content(text):
|
||||||
|
data, mapping, replaced_fields, processed_info = self.binaryProcessor.process_binary_content(text)
|
||||||
|
neutralized_text = text if isinstance(data, str) else str(data)
|
||||||
|
attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()]
|
||||||
|
return NeutralizationResult(
|
||||||
|
neutralized_text=neutralized_text,
|
||||||
|
mapping=mapping,
|
||||||
|
attributes=attributes,
|
||||||
|
processed_info=processed_info
|
||||||
|
).model_dump()
|
||||||
|
|
||||||
|
# Inline former _processData routing
|
||||||
|
if textType in ['csv', 'json', 'xml']:
|
||||||
|
if textType == 'csv':
|
||||||
|
data, mapping, replaced_fields, processed_info = self.listProcessor.process_csv_content(text)
|
||||||
|
elif textType == 'json':
|
||||||
|
data, mapping, replaced_fields, processed_info = self.listProcessor.process_json_content(text)
|
||||||
|
else: # xml
|
||||||
|
data, mapping, replaced_fields, processed_info = self.listProcessor.process_xml_content(text)
|
||||||
|
else:
|
||||||
|
data, mapping, replaced_fields, processed_info = self.textProcessor.process_text_content(text)
|
||||||
|
# Stringify data consistently
|
||||||
|
if textType == 'csv':
|
||||||
|
try:
|
||||||
|
neutralized_text = data.to_csv(index=False)
|
||||||
|
except Exception:
|
||||||
|
neutralized_text = str(data)
|
||||||
|
elif textType == 'json':
|
||||||
|
neutralized_text = json.dumps(data, ensure_ascii=False)
|
||||||
|
elif textType == 'xml':
|
||||||
|
neutralized_text = str(data)
|
||||||
|
else:
|
||||||
|
neutralized_text = str(data)
|
||||||
|
|
||||||
|
attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()]
|
||||||
|
return NeutralizationResult(
|
||||||
|
neutralized_text=neutralized_text,
|
||||||
|
mapping=mapping,
|
||||||
|
attributes=attributes,
|
||||||
|
processed_info=processed_info
|
||||||
|
).model_dump()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing content: {str(e)}")
|
||||||
|
return NeutralizationResult(
|
||||||
|
neutralized_text='',
|
||||||
|
mapping={},
|
||||||
|
attributes=[],
|
||||||
|
processed_info={'type': 'error', 'error': str(e)}
|
||||||
|
).model_dump()
|
||||||
|
|
||||||
|
def _getContentTypeFromMime(self, mime_type: str) -> str:
|
||||||
|
"""Determine content type from MIME type for neutralization processing"""
|
||||||
|
if mime_type.startswith('text/'):
|
||||||
|
return 'text'
|
||||||
|
elif mime_type in ['application/json', 'application/xml', 'text/xml']:
|
||||||
|
return 'json' if 'json' in mime_type else 'xml'
|
||||||
|
elif mime_type in ['text/csv', 'application/csv']:
|
||||||
|
return 'csv'
|
||||||
|
else:
|
||||||
|
return 'text' # Default to text processing
|
||||||
|
|
@ -1,112 +0,0 @@
|
||||||
"""
|
|
||||||
DSGVO-konformer Daten-Neutralisierer für KI-Agentensysteme
|
|
||||||
Unterstützt TXT, JSON, CSV, Excel und Word-Dateien
|
|
||||||
Mehrsprachig: DE, EN, FR, IT
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import Dict, List, Any
|
|
||||||
|
|
||||||
# Import all necessary classes and functions
|
|
||||||
from modules.services.serviceNeutralization.subProcessCommon import ProcessResult, CommonUtils
|
|
||||||
from modules.services.serviceNeutralization.subProcessText import TextProcessor, PlainText
|
|
||||||
from modules.services.serviceNeutralization.subProcessList import ListProcessor, TableData
|
|
||||||
from modules.services.serviceNeutralization.subProcessBinary import BinaryProcessor, BinaryData
|
|
||||||
from modules.services.serviceNeutralization.subParseString import StringParser
|
|
||||||
from modules.services.serviceNeutralization.subPatterns import Pattern, HeaderPatterns, DataPatterns, TextTablePatterns
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Export all classes and functions for external use
|
|
||||||
__all__ = [
|
|
||||||
'DataAnonymizer',
|
|
||||||
'ProcessResult',
|
|
||||||
'CommonUtils',
|
|
||||||
'TextProcessor',
|
|
||||||
'PlainText',
|
|
||||||
'ListProcessor',
|
|
||||||
'TableData',
|
|
||||||
'BinaryProcessor',
|
|
||||||
'BinaryData',
|
|
||||||
'StringParser',
|
|
||||||
'Pattern',
|
|
||||||
'HeaderPatterns',
|
|
||||||
'DataPatterns',
|
|
||||||
'TextTablePatterns'
|
|
||||||
]
|
|
||||||
|
|
||||||
class DataAnonymizer:
|
|
||||||
"""Hauptklasse für die Datenanonymisierung"""
|
|
||||||
|
|
||||||
def __init__(self, names_to_parse: List[str] = None):
|
|
||||||
"""Initialize the anonymizer with specialized processors
|
|
||||||
|
|
||||||
Args:
|
|
||||||
names_to_parse: List of names to parse and replace (case-insensitive)
|
|
||||||
"""
|
|
||||||
self.names_to_parse = names_to_parse or []
|
|
||||||
|
|
||||||
# Initialize specialized processors
|
|
||||||
self.text_processor = TextProcessor(names_to_parse)
|
|
||||||
self.list_processor = ListProcessor(names_to_parse)
|
|
||||||
self.binary_processor = BinaryProcessor()
|
|
||||||
|
|
||||||
# Common utilities
|
|
||||||
self.common_utils = CommonUtils()
|
|
||||||
|
|
||||||
def process_content(self, content: str, content_type: str = None) -> ProcessResult:
|
|
||||||
"""
|
|
||||||
Process content and return anonymized data
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: Content to process
|
|
||||||
content_type: Type of content ('csv', 'json', 'xml', 'text', 'binary')
|
|
||||||
If None, will auto-detect
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
ProcessResult: Contains anonymized data, mapping, replaced fields and processing info
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Auto-detect content type if not provided
|
|
||||||
if content_type is None:
|
|
||||||
content_type = self.common_utils.detect_content_type(content)
|
|
||||||
|
|
||||||
# Check if content is binary data
|
|
||||||
if self.binary_processor.is_binary_content(content):
|
|
||||||
return self.binary_processor.process_binary_content(content)
|
|
||||||
|
|
||||||
# Route to appropriate processor based on content type
|
|
||||||
if content_type in ['csv', 'json', 'xml']:
|
|
||||||
if content_type == 'csv':
|
|
||||||
result, mapping, replaced_fields, processed_info = self.list_processor.process_csv_content(content)
|
|
||||||
elif content_type == 'json':
|
|
||||||
result, mapping, replaced_fields, processed_info = self.list_processor.process_json_content(content)
|
|
||||||
else: # xml
|
|
||||||
result, mapping, replaced_fields, processed_info = self.list_processor.process_xml_content(content)
|
|
||||||
|
|
||||||
return ProcessResult(result, mapping, replaced_fields, processed_info)
|
|
||||||
else:
|
|
||||||
# Handle as text
|
|
||||||
result, mapping, replaced_fields, processed_info = self.text_processor.process_text_content(content)
|
|
||||||
return ProcessResult(result, mapping, replaced_fields, processed_info)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error processing content: {str(e)}")
|
|
||||||
return ProcessResult(None, {}, [], {'type': 'error', 'error': str(e)})
|
|
||||||
|
|
||||||
def get_mapping(self) -> Dict[str, str]:
|
|
||||||
"""
|
|
||||||
Get the combined mapping from all processors
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Dict[str, str]: Combined mapping dictionary
|
|
||||||
"""
|
|
||||||
text_mapping = self.text_processor.get_mapping()
|
|
||||||
list_mapping = self.list_processor.get_mapping()
|
|
||||||
return self.common_utils.merge_mappings(text_mapping, list_mapping)
|
|
||||||
|
|
||||||
def clear_mapping(self):
|
|
||||||
"""Clear the mapping in all processors"""
|
|
||||||
self.text_processor.clear_mapping()
|
|
||||||
self.list_processor.clear_mapping()
|
|
||||||
|
|
@ -1,91 +0,0 @@
|
||||||
# Neutralizer Module Structure
|
|
||||||
|
|
||||||
This module provides DSGVO-compliant data anonymization for AI agent systems. The code has been refactored into specialized sub-modules for better maintainability and code reuse.
|
|
||||||
|
|
||||||
## Module Overview
|
|
||||||
|
|
||||||
### Core Module
|
|
||||||
- **`neutralizer.py`** - Main DataAnonymizer class that orchestrates all processing
|
|
||||||
|
|
||||||
### Specialized Processors
|
|
||||||
- **`subProcessText.py`** - Handles plain text processing without header information
|
|
||||||
- **`subProcessList.py`** - Handles structured data with headers (CSV, JSON, XML)
|
|
||||||
- **`subProcessBinary.py`** - Handles binary data types (images, audio, video, etc.)
|
|
||||||
|
|
||||||
### Utility Modules
|
|
||||||
- **`subParseString.py`** - String parsing and replacement utilities for emails, phones, addresses, IDs and names
|
|
||||||
- **`subProcessCommon.py`** - Common utilities and data structures shared across modules
|
|
||||||
- **`patterns.py`** - Pattern definitions for data anonymization
|
|
||||||
|
|
||||||
## Key Features
|
|
||||||
|
|
||||||
### 1. Modular Architecture
|
|
||||||
- **Separation of Concerns**: Each module handles a specific type of data processing
|
|
||||||
- **Code Reuse**: Common functionality is centralized in utility modules
|
|
||||||
- **Maintainability**: Easier to modify and extend individual components
|
|
||||||
|
|
||||||
### 2. Processing Order
|
|
||||||
1. **Pattern-based matches** (emails, phones, addresses, etc.) are processed FIRST
|
|
||||||
2. **Custom names** from the user list are processed SECOND
|
|
||||||
3. **Already anonymized content** (placeholders) is skipped
|
|
||||||
|
|
||||||
### 3. Supported Data Types
|
|
||||||
- **Text**: Plain text documents, emails, etc.
|
|
||||||
- **Structured Data**: CSV, JSON, XML with headers
|
|
||||||
- **Binary Data**: Images, audio, video (framework ready, implementation pending)
|
|
||||||
|
|
||||||
### 4. Placeholder Protection
|
|
||||||
- Prevents re-anonymization of already processed content
|
|
||||||
- Uses format `[tag.uuid]` for placeholders
|
|
||||||
- Validates placeholder format before processing
|
|
||||||
|
|
||||||
## Usage Example
|
|
||||||
|
|
||||||
```python
|
|
||||||
from modules.neutralizer import DataAnonymizer
|
|
||||||
|
|
||||||
# Initialize with custom names
|
|
||||||
anonymizer = DataAnonymizer(names_to_parse=['John Doe', 'Jane Smith'])
|
|
||||||
|
|
||||||
# Process content (auto-detects type)
|
|
||||||
result = anonymizer.process_content(content, content_type='text')
|
|
||||||
|
|
||||||
# Or specify content type explicitly
|
|
||||||
result = anonymizer.process_content(content, content_type='csv')
|
|
||||||
|
|
||||||
# Get mapping of original values to placeholders
|
|
||||||
mapping = anonymizer.get_mapping()
|
|
||||||
```
|
|
||||||
|
|
||||||
## Module Dependencies
|
|
||||||
|
|
||||||
```
|
|
||||||
neutralizer.py
|
|
||||||
├── subProcessCommon.py (ProcessResult, CommonUtils)
|
|
||||||
├── subProcessText.py (TextProcessor)
|
|
||||||
├── subProcessList.py (ListProcessor)
|
|
||||||
├── subProcessBinary.py (BinaryProcessor)
|
|
||||||
└── patterns.py (Pattern definitions)
|
|
||||||
|
|
||||||
subProcessText.py
|
|
||||||
└── subParseString.py (StringParser)
|
|
||||||
|
|
||||||
subProcessList.py
|
|
||||||
├── subParseString.py (StringParser)
|
|
||||||
└── patterns.py (HeaderPatterns)
|
|
||||||
|
|
||||||
subProcessBinary.py
|
|
||||||
└── (standalone)
|
|
||||||
|
|
||||||
subParseString.py
|
|
||||||
└── patterns.py (DataPatterns)
|
|
||||||
```
|
|
||||||
|
|
||||||
## Benefits of New Structure
|
|
||||||
|
|
||||||
1. **Single Responsibility**: Each module has one clear purpose
|
|
||||||
2. **DRY Principle**: No code duplication across modules
|
|
||||||
3. **Testability**: Individual modules can be tested in isolation
|
|
||||||
4. **Extensibility**: Easy to add new data types or processing methods
|
|
||||||
5. **Maintainability**: Changes to one module don't affect others
|
|
||||||
6. **Performance**: Specialized processors are optimized for their data types
|
|
||||||
|
|
@ -5,6 +5,7 @@ Shared functions and data structures
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from typing import Dict, List, Any, Union, Optional
|
from typing import Dict, List, Any, Union, Optional
|
||||||
|
from pydantic import BaseModel
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -15,6 +16,19 @@ class ProcessResult:
|
||||||
replaced_fields: List[str]
|
replaced_fields: List[str]
|
||||||
processed_info: Dict[str, Any] # Additional processing information
|
processed_info: Dict[str, Any] # Additional processing information
|
||||||
|
|
||||||
|
class NeutralizationAttribute(BaseModel):
|
||||||
|
"""Single attribute describing a replacement mapping."""
|
||||||
|
original: str
|
||||||
|
placeholder: str
|
||||||
|
patternType: Optional[str] = None
|
||||||
|
|
||||||
|
class NeutralizationResult(BaseModel):
|
||||||
|
"""Unified result for all content types, suitable for API responses."""
|
||||||
|
neutralized_text: str
|
||||||
|
mapping: Dict[str, str]
|
||||||
|
attributes: List[NeutralizationAttribute]
|
||||||
|
processed_info: Dict[str, Any]
|
||||||
|
|
||||||
class CommonUtils:
|
class CommonUtils:
|
||||||
"""Common utility functions for data processing"""
|
"""Common utility functions for data processing"""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,8 @@ class TextProcessor:
|
||||||
# Get processing information
|
# Get processing information
|
||||||
processed_info = {
|
processed_info = {
|
||||||
'type': 'text',
|
'type': 'text',
|
||||||
'tables': [{'headers': t.headers, 'row_count': len(t.rows)} for t in tables] if hasattr(tables[0], 'headers') else []
|
'tables': ([{'headers': t.headers, 'row_count': len(t.rows)} for t in tables]
|
||||||
|
if tables else [])
|
||||||
}
|
}
|
||||||
|
|
||||||
return result, self.string_parser.get_mapping(), [], processed_info
|
return result, self.string_parser.get_mapping(), [], processed_info
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from datetime import datetime, UTC
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class ConnectorSharepoint:
|
class SharepointService:
|
||||||
"""SharePoint connector using Microsoft Graph API for reliable authentication."""
|
"""SharePoint connector using Microsoft Graph API for reliable authentication."""
|
||||||
|
|
||||||
def __init__(self, access_token: str):
|
def __init__(self, access_token: str):
|
||||||
546
modules/services/serviceWorkflows/mainServiceWorkflows.py
Normal file
546
modules/services/serviceWorkflows/mainServiceWorkflows.py
Normal file
|
|
@ -0,0 +1,546 @@
|
||||||
|
import logging
|
||||||
|
import uuid
|
||||||
|
from typing import Dict, Any, List, Optional
|
||||||
|
from modules.interfaces.interfaceAppModel import User, UserConnection
|
||||||
|
from modules.interfaces.interfaceChatModel import ChatDocument, ChatMessage, ExtractedContent
|
||||||
|
from modules.services.serviceDocument.documentExtraction import DocumentExtractionService
|
||||||
|
from modules.services.serviceDocument.documentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
|
||||||
|
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class WorkflowService:
|
||||||
|
"""Service class containing methods for document processing, chat operations, and workflow management"""
|
||||||
|
|
||||||
|
def __init__(self, service_center):
|
||||||
|
self.service_center = service_center
|
||||||
|
self.user = service_center.user
|
||||||
|
self.workflow = service_center.workflow
|
||||||
|
self.interfaceChat = service_center.interfaceChat
|
||||||
|
self.interfaceComponent = service_center.interfaceComponent
|
||||||
|
self.interfaceApp = service_center.interfaceApp
|
||||||
|
self.documentProcessor = service_center.documentProcessor
|
||||||
|
# Centralized services interface (for AI)
|
||||||
|
from modules.services import getInterface as getServices
|
||||||
|
self.services = getServices(self.user, self.workflow)
|
||||||
|
|
||||||
|
async def summarizeChat(self, messages: List[ChatMessage]) -> str:
|
||||||
|
"""
|
||||||
|
Summarize chat messages from last to first message with status="first"
|
||||||
|
|
||||||
|
Args:
|
||||||
|
messages: List of chat messages to summarize
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Summary of the chat in user's language
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get messages from last to first, stopping at first message with status="first"
|
||||||
|
relevantMessages = []
|
||||||
|
for msg in reversed(messages):
|
||||||
|
relevantMessages.append(msg)
|
||||||
|
if msg.status == "first":
|
||||||
|
break
|
||||||
|
|
||||||
|
# Create prompt for AI
|
||||||
|
prompt = f"""You are an AI assistant providing a summary of a chat conversation.
|
||||||
|
Please respond in '{self.user.language}' language.
|
||||||
|
|
||||||
|
Chat History:
|
||||||
|
{chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))}
|
||||||
|
|
||||||
|
Instructions:
|
||||||
|
1. Summarize the conversation's key points and outcomes
|
||||||
|
2. Be concise but informative
|
||||||
|
3. Use a professional but friendly tone
|
||||||
|
4. Focus on important decisions and next steps if any
|
||||||
|
|
||||||
|
Please provide a comprehensive summary of this conversation."""
|
||||||
|
|
||||||
|
# Get summary using centralized AI (speed priority)
|
||||||
|
return await self.services.ai.callAi(
|
||||||
|
prompt=prompt,
|
||||||
|
documents=None,
|
||||||
|
options={
|
||||||
|
"process_type": "text",
|
||||||
|
"operation_type": "generate_content",
|
||||||
|
"priority": "speed",
|
||||||
|
"compress_prompt": True,
|
||||||
|
"compress_documents": False,
|
||||||
|
"max_cost": 0.01
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error summarizing chat: {str(e)}")
|
||||||
|
return f"Error summarizing chat: {str(e)}"
|
||||||
|
|
||||||
|
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
|
||||||
|
"""Get ChatDocuments from a list of document references using all three formats."""
|
||||||
|
try:
|
||||||
|
all_documents = []
|
||||||
|
for doc_ref in documentList:
|
||||||
|
if doc_ref.startswith("docItem:"):
|
||||||
|
# docItem:<id>:<filename> - extract ID and find document
|
||||||
|
parts = doc_ref.split(':')
|
||||||
|
if len(parts) >= 2:
|
||||||
|
doc_id = parts[1]
|
||||||
|
# Find the document by ID
|
||||||
|
for message in self.workflow.messages:
|
||||||
|
if message.documents:
|
||||||
|
for doc in message.documents:
|
||||||
|
if doc.id == doc_id:
|
||||||
|
doc_name = getattr(doc, 'fileName', 'unknown')
|
||||||
|
logger.debug(f"Found docItem reference {doc_ref}: {doc_name}")
|
||||||
|
all_documents.append(doc)
|
||||||
|
break
|
||||||
|
elif doc_ref.startswith("docList:"):
|
||||||
|
# docList:<messageId>:<label> or docList:<label> - extract message ID and find document list
|
||||||
|
parts = doc_ref.split(':')
|
||||||
|
if len(parts) >= 3:
|
||||||
|
# Format: docList:<messageId>:<label>
|
||||||
|
message_id = parts[1]
|
||||||
|
label = parts[2]
|
||||||
|
# Find the message by ID and get all its documents
|
||||||
|
for message in self.workflow.messages:
|
||||||
|
if str(message.id) == message_id:
|
||||||
|
if message.documents:
|
||||||
|
doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')]
|
||||||
|
logger.debug(f"Found docList reference {doc_ref}: {len(message.documents)} documents - {doc_names}")
|
||||||
|
all_documents.extend(message.documents)
|
||||||
|
else:
|
||||||
|
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
|
||||||
|
break
|
||||||
|
elif len(parts) >= 2:
|
||||||
|
# Format: docList:<label> - find message by documentsLabel
|
||||||
|
label = parts[1]
|
||||||
|
logger.debug(f"Looking for message with documentsLabel: {label}")
|
||||||
|
# Find messages with matching documentsLabel
|
||||||
|
matching_messages = []
|
||||||
|
for message in self.workflow.messages:
|
||||||
|
# Check both attribute and raw data for documentsLabel
|
||||||
|
msg_label = getattr(message, 'documentsLabel', None)
|
||||||
|
if msg_label == label:
|
||||||
|
matching_messages.append(message)
|
||||||
|
logger.debug(f"Found message {message.id} with matching documentsLabel: {msg_label}")
|
||||||
|
else:
|
||||||
|
# Debug: show what labels we're comparing
|
||||||
|
logger.debug(f"Message {message.id} has documentsLabel: '{msg_label}' (looking for: '{label}')")
|
||||||
|
|
||||||
|
if matching_messages:
|
||||||
|
# Use the newest message (highest publishedAt)
|
||||||
|
matching_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
|
||||||
|
newest_message = matching_messages[0]
|
||||||
|
|
||||||
|
if newest_message.documents:
|
||||||
|
doc_names = [doc.fileName for doc in newest_message.documents if hasattr(doc, 'fileName')]
|
||||||
|
logger.debug(f"Found docList reference {doc_ref}: {len(newest_message.documents)} documents - {doc_names}")
|
||||||
|
all_documents.extend(newest_message.documents)
|
||||||
|
else:
|
||||||
|
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
|
||||||
|
else:
|
||||||
|
logger.debug(f"No messages found with documentsLabel: {label}")
|
||||||
|
else:
|
||||||
|
# Direct label reference (round1_task2_action3_contextinfo)
|
||||||
|
# Search for messages with matching documentsLabel to find the actual documents
|
||||||
|
if doc_ref.startswith("round"):
|
||||||
|
# Parse round/task/action to find the corresponding document list
|
||||||
|
label_parts = doc_ref.split('_', 3)
|
||||||
|
if len(label_parts) >= 4:
|
||||||
|
round_num = int(label_parts[0].replace('round', ''))
|
||||||
|
task_num = int(label_parts[1].replace('task', ''))
|
||||||
|
action_num = int(label_parts[2].replace('action', ''))
|
||||||
|
context_info = label_parts[3]
|
||||||
|
|
||||||
|
logger.debug(f"Resolving round reference: round{round_num}_task{task_num}_action{action_num}_{context_info}")
|
||||||
|
logger.debug(f"Looking for messages with documentsLabel matching: {doc_ref}")
|
||||||
|
|
||||||
|
# Find messages with matching documentsLabel (this is the correct way!)
|
||||||
|
# In case of retries, we want the NEWEST message (most recent publishedAt)
|
||||||
|
matching_messages = []
|
||||||
|
for message in self.workflow.messages:
|
||||||
|
msg_documents_label = getattr(message, 'documentsLabel', '')
|
||||||
|
|
||||||
|
# Check if this message's documentsLabel matches our reference
|
||||||
|
if msg_documents_label == doc_ref:
|
||||||
|
# Found a matching message, collect it for comparison
|
||||||
|
matching_messages.append(message)
|
||||||
|
logger.debug(f"Found message {message.id} with matching documentsLabel: {msg_documents_label}")
|
||||||
|
|
||||||
|
# If we found matching messages, take the newest one (highest publishedAt)
|
||||||
|
if matching_messages:
|
||||||
|
# Sort by publishedAt descending (newest first)
|
||||||
|
matching_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
|
||||||
|
newest_message = matching_messages[0]
|
||||||
|
|
||||||
|
logger.debug(f"Found {len(matching_messages)} matching messages, using newest: {newest_message.id} (publishedAt: {getattr(newest_message, 'publishedAt', 'unknown')})")
|
||||||
|
logger.debug(f"Newest message has {len(newest_message.documents) if newest_message.documents else 0} documents")
|
||||||
|
|
||||||
|
if newest_message.documents:
|
||||||
|
doc_names = [doc.fileName for doc in newest_message.documents if hasattr(doc, 'fileName')]
|
||||||
|
logger.debug(f"Added {len(newest_message.documents)} documents from newest message {newest_message.id}: {doc_names}")
|
||||||
|
all_documents.extend(newest_message.documents)
|
||||||
|
else:
|
||||||
|
logger.debug(f"No documents found in newest message {newest_message.id}")
|
||||||
|
else:
|
||||||
|
logger.debug(f"No messages found with documentsLabel: {doc_ref}")
|
||||||
|
# Fallback: also check if any message has this documentsLabel as a prefix
|
||||||
|
logger.debug(f"Trying fallback search for messages with documentsLabel containing: {doc_ref}")
|
||||||
|
fallback_messages = []
|
||||||
|
for message in self.workflow.messages:
|
||||||
|
msg_documents_label = getattr(message, 'documentsLabel', '')
|
||||||
|
if msg_documents_label and msg_documents_label.startswith(doc_ref):
|
||||||
|
fallback_messages.append(message)
|
||||||
|
logger.debug(f"Found fallback message {message.id} with documentsLabel: {msg_documents_label}")
|
||||||
|
|
||||||
|
if fallback_messages:
|
||||||
|
# Sort by publishedAt descending (newest first)
|
||||||
|
fallback_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
|
||||||
|
newest_fallback = fallback_messages[0]
|
||||||
|
|
||||||
|
logger.debug(f"Using fallback message {newest_fallback.id} with documentsLabel: {getattr(newest_fallback, 'documentsLabel', 'unknown')}")
|
||||||
|
if newest_fallback.documents:
|
||||||
|
doc_names = [doc.fileName for doc in newest_fallback.documents if hasattr(doc, 'fileName')]
|
||||||
|
logger.debug(f"Added {len(newest_fallback.documents)} documents from fallback message {newest_fallback.id}: {doc_names}")
|
||||||
|
all_documents.extend(newest_fallback.documents)
|
||||||
|
else:
|
||||||
|
logger.debug(f"No documents found in fallback message {newest_fallback.id}")
|
||||||
|
else:
|
||||||
|
logger.debug(f"No fallback messages found either")
|
||||||
|
|
||||||
|
logger.debug(f"Resolved {len(all_documents)} documents from document list: {documentList}")
|
||||||
|
return all_documents
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting documents from document list: {str(e)}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str:
|
||||||
|
"""Get connection reference from UserConnection with enhanced state information"""
|
||||||
|
# Get token information to check if it's expired
|
||||||
|
token = None
|
||||||
|
token_status = "unknown"
|
||||||
|
try:
|
||||||
|
# Get a fresh token via TokenManager convenience method
|
||||||
|
logger.debug(f"Getting fresh token for connection {connection.id}")
|
||||||
|
from modules.security.tokenManager import TokenManager
|
||||||
|
token = TokenManager().getFreshToken(self.interfaceApp, connection.id)
|
||||||
|
if token:
|
||||||
|
if hasattr(token, 'expiresAt') and token.expiresAt:
|
||||||
|
current_time = get_utc_timestamp()
|
||||||
|
logger.debug(f"getConnectionReferenceFromUserConnection: Current time: {current_time}")
|
||||||
|
logger.debug(f"getConnectionReferenceFromUserConnection: Token expires at: {token.expiresAt}")
|
||||||
|
if current_time > token.expiresAt:
|
||||||
|
token_status = "expired"
|
||||||
|
else:
|
||||||
|
# Check if this token was recently refreshed (within last 5 minutes)
|
||||||
|
time_since_creation = current_time - token.createdAt if hasattr(token, 'createdAt') else 0
|
||||||
|
if time_since_creation < 300: # 5 minutes
|
||||||
|
token_status = "valid (refreshed)"
|
||||||
|
else:
|
||||||
|
token_status = "valid"
|
||||||
|
else:
|
||||||
|
token_status = "no_expiration"
|
||||||
|
else:
|
||||||
|
token_status = "no_token"
|
||||||
|
except Exception as e:
|
||||||
|
token_status = f"error: {str(e)}"
|
||||||
|
|
||||||
|
# Build enhanced reference with state information
|
||||||
|
base_ref = f"connection:{connection.authority.value}:{connection.externalUsername}:{connection.id}"
|
||||||
|
state_info = f" [status:{connection.status.value}, token:{token_status}]"
|
||||||
|
|
||||||
|
logger.debug(f"getConnectionReferenceFromUserConnection: Built reference: {base_ref + state_info}")
|
||||||
|
return base_ref + state_info
|
||||||
|
|
||||||
|
def getUserConnectionFromConnectionReference(self, connectionReference: str) -> Optional[UserConnection]:
|
||||||
|
"""Get UserConnection from reference string (handles both old and enhanced formats)"""
|
||||||
|
try:
|
||||||
|
# Parse reference format: connection:{authority}:{username}:{id} [status:..., token:...]
|
||||||
|
# Remove state information if present
|
||||||
|
base_reference = connectionReference.split(' [')[0]
|
||||||
|
|
||||||
|
parts = base_reference.split(':')
|
||||||
|
if len(parts) != 4 or parts[0] != "connection":
|
||||||
|
return None
|
||||||
|
|
||||||
|
authority = parts[1]
|
||||||
|
username = parts[2]
|
||||||
|
conn_id = parts[3]
|
||||||
|
|
||||||
|
# Get user connections through AppObjects interface
|
||||||
|
user_connections = self.interfaceApp.getUserConnections(self.user.id)
|
||||||
|
|
||||||
|
# Find matching connection
|
||||||
|
for conn in user_connections:
|
||||||
|
if str(conn.id) == conn_id and conn.authority.value == authority and conn.externalUsername == username:
|
||||||
|
return conn
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error parsing connection reference: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def getFileInfo(self, fileId: str) -> Dict[str, Any]:
|
||||||
|
"""Get file information"""
|
||||||
|
file_item = self.interfaceComponent.getFile(fileId)
|
||||||
|
if file_item:
|
||||||
|
return {
|
||||||
|
"id": file_item.id,
|
||||||
|
"fileName": file_item.fileName,
|
||||||
|
"size": file_item.fileSize,
|
||||||
|
"mimeType": file_item.mimeType,
|
||||||
|
"fileHash": file_item.fileHash,
|
||||||
|
"creationDate": file_item.creationDate
|
||||||
|
}
|
||||||
|
return None
|
||||||
|
|
||||||
|
def getFileData(self, fileId: str) -> bytes:
|
||||||
|
"""Get file data by ID"""
|
||||||
|
return self.interfaceComponent.getFileData(fileId)
|
||||||
|
|
||||||
|
async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
||||||
|
"""Extract content from ChatDocument using prompt"""
|
||||||
|
try:
|
||||||
|
# ChatDocument is just a reference, so we need to get file data using fileId
|
||||||
|
if not hasattr(document, 'fileId') or not document.fileId:
|
||||||
|
logger.error(f"Document {document.id} has no fileId")
|
||||||
|
raise ValueError("Document has no fileId")
|
||||||
|
|
||||||
|
# Get file data from service center using document's fileId
|
||||||
|
fileData = self.getFileData(document.fileId)
|
||||||
|
if not fileData:
|
||||||
|
logger.error(f"No file data found for fileId: {document.fileId}")
|
||||||
|
raise ValueError("No file data found for document")
|
||||||
|
|
||||||
|
# Get fileName and mime type from document properties
|
||||||
|
try:
|
||||||
|
fileName = document.fileName
|
||||||
|
mimeType = document.mimeType
|
||||||
|
except Exception as e:
|
||||||
|
# Try to diagnose and recover the issue
|
||||||
|
diagnosis = self._diagnoseDocumentAccess(document)
|
||||||
|
logger.error(f"Critical error: Cannot access document properties for document {document.id}. Diagnosis: {diagnosis}")
|
||||||
|
|
||||||
|
# Attempt recovery
|
||||||
|
if self._recoverDocumentAccess(document):
|
||||||
|
try:
|
||||||
|
fileName = document.fileName
|
||||||
|
mimeType = document.mimeType
|
||||||
|
logger.info(f"Document access recovered for {document.id} - proceeding with AI extraction")
|
||||||
|
except Exception as recovery_error:
|
||||||
|
logger.error(f"Recovery failed for document {document.id}: {str(recovery_error)}")
|
||||||
|
raise RuntimeError(f"Document {document.id} properties are permanently inaccessible after recovery attempt - cannot proceed with AI extraction: {str(recovery_error)}")
|
||||||
|
else:
|
||||||
|
# Recovery failed - don't continue with invalid data
|
||||||
|
raise RuntimeError(f"Document {document.id} properties are inaccessible and recovery failed. Diagnosis: {diagnosis}")
|
||||||
|
|
||||||
|
# Process with document processor directly
|
||||||
|
extractedContent = await self.documentProcessor.processFileData(
|
||||||
|
fileData=fileData,
|
||||||
|
fileName=fileName,
|
||||||
|
mimeType=mimeType,
|
||||||
|
base64Encoded=False,
|
||||||
|
prompt=prompt,
|
||||||
|
documentId=document.id
|
||||||
|
)
|
||||||
|
|
||||||
|
# Note: ExtractedContent model only has 'id' and 'contents' fields
|
||||||
|
# No need to set objectId or objectType as they don't exist in the model
|
||||||
|
|
||||||
|
return extractedContent
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error extracting from document: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Diagnose document access issues and provide recovery information.
|
||||||
|
This method helps identify why document properties are inaccessible.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
diagnosis = {
|
||||||
|
'document_id': document.id,
|
||||||
|
'file_id': document.fileId,
|
||||||
|
'has_component_interface': document._componentInterface is not None,
|
||||||
|
'component_interface_type': type(document._componentInterface).__name__ if document._componentInterface else None,
|
||||||
|
'file_exists': False,
|
||||||
|
'file_info': None,
|
||||||
|
'error_details': None
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check if component interface is set
|
||||||
|
if not document._componentInterface:
|
||||||
|
diagnosis['error_details'] = "Component interface not set - document cannot access file system"
|
||||||
|
return diagnosis
|
||||||
|
|
||||||
|
# Try to access the file directly
|
||||||
|
try:
|
||||||
|
file_info = self.interfaceComponent.getFile(document.fileId)
|
||||||
|
if file_info:
|
||||||
|
diagnosis['file_exists'] = True
|
||||||
|
diagnosis['file_info'] = {
|
||||||
|
'fileName': file_info.fileName if hasattr(file_info, 'fileName') else 'N/A',
|
||||||
|
'fileSize': file_info.fileSize if hasattr(file_info, 'fileSize') else 'N/A',
|
||||||
|
'mimeType': file_info.mimeType if hasattr(file_info, 'mimeType') else 'N/A'
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
diagnosis['error_details'] = f"File with ID {document.fileId} not found in component interface"
|
||||||
|
except Exception as e:
|
||||||
|
diagnosis['error_details'] = f"Error accessing file {document.fileId}: {str(e)}"
|
||||||
|
|
||||||
|
return diagnosis
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
'document_id': document.id if hasattr(document, 'id') else 'unknown',
|
||||||
|
'file_id': document.fileId if hasattr(document, 'fileId') else 'unknown',
|
||||||
|
'error_details': f"Error during diagnosis: {str(e)}"
|
||||||
|
}
|
||||||
|
|
||||||
|
def _recoverDocumentAccess(self, document: ChatDocument) -> bool:
|
||||||
|
"""
|
||||||
|
Attempt to recover document access by re-setting the component interface.
|
||||||
|
Returns True if recovery was successful.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
logger.info(f"Attempting to recover document access for document {document.id}")
|
||||||
|
|
||||||
|
# Re-set the component interface
|
||||||
|
document.setComponentInterface(self.interfaceComponent)
|
||||||
|
|
||||||
|
# Test if we can now access the fileName
|
||||||
|
try:
|
||||||
|
test_fileName = document.fileName
|
||||||
|
logger.info(f"Document access recovered for {document.id} -> {test_fileName}")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Document access recovery failed for {document.id}: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> ChatDocument:
|
||||||
|
"""Create document with file in one step - handles file creation internally"""
|
||||||
|
# Convert content to bytes based on base64 flag
|
||||||
|
if base64encoded:
|
||||||
|
import base64
|
||||||
|
content_bytes = base64.b64decode(content)
|
||||||
|
else:
|
||||||
|
content_bytes = content.encode('utf-8')
|
||||||
|
|
||||||
|
# Create the file (hash and size are computed inside interfaceComponent)
|
||||||
|
file_item = self.interfaceComponent.createFile(
|
||||||
|
name=fileName,
|
||||||
|
mimeType=mimeType,
|
||||||
|
content=content_bytes
|
||||||
|
)
|
||||||
|
|
||||||
|
# Then store the file data
|
||||||
|
self.interfaceComponent.createFileData(file_item.id, content_bytes)
|
||||||
|
|
||||||
|
# Get file info to copy attributes
|
||||||
|
file_info = self.getFileInfo(file_item.id)
|
||||||
|
if not file_info:
|
||||||
|
logger.error(f"Could not get file info for fileId: {file_item.id}")
|
||||||
|
raise ValueError(f"File info not found for fileId: {file_item.id}")
|
||||||
|
|
||||||
|
# Create document with all file attributes copied
|
||||||
|
document = ChatDocument(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
messageId=messageId or "", # Use provided messageId or empty string as fallback
|
||||||
|
fileId=file_item.id,
|
||||||
|
fileName=file_info.get("fileName", fileName),
|
||||||
|
fileSize=file_info.get("size", 0),
|
||||||
|
mimeType=file_info.get("mimeType", mimeType)
|
||||||
|
)
|
||||||
|
|
||||||
|
return document
|
||||||
|
|
||||||
|
def calculateObjectSize(self, obj: Any) -> int:
|
||||||
|
"""
|
||||||
|
Calculate the size of an object in bytes.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
obj: Object to calculate size for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
int: Size in bytes
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
if obj is None:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Convert object to JSON string and calculate size
|
||||||
|
json_str = json.dumps(obj, ensure_ascii=False, default=str)
|
||||||
|
return len(json_str.encode('utf-8'))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error calculating object size: {str(e)}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def getWorkflowContext(self) -> Dict[str, int]:
|
||||||
|
"""Get current workflow context for document generation"""
|
||||||
|
try:
|
||||||
|
return {
|
||||||
|
'currentRound': self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 0,
|
||||||
|
'currentTask': self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 0,
|
||||||
|
'currentAction': self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 0
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting workflow context: {str(e)}")
|
||||||
|
return {'currentRound': 0, 'currentTask': 0, 'currentAction': 0}
|
||||||
|
|
||||||
|
def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
|
||||||
|
"""Set current workflow context for document generation and routing"""
|
||||||
|
try:
|
||||||
|
# Prepare update data
|
||||||
|
update_data = {}
|
||||||
|
|
||||||
|
if round_number is not None:
|
||||||
|
self.workflow.currentRound = round_number
|
||||||
|
update_data["currentRound"] = round_number
|
||||||
|
if task_number is not None:
|
||||||
|
self.workflow.currentTask = task_number
|
||||||
|
update_data["currentTask"] = task_number
|
||||||
|
if action_number is not None:
|
||||||
|
self.workflow.currentAction = action_number
|
||||||
|
update_data["currentAction"] = action_number
|
||||||
|
|
||||||
|
# Persist changes to database if any updates were made
|
||||||
|
if update_data:
|
||||||
|
self.interfaceChat.updateWorkflow(self.workflow.id, update_data)
|
||||||
|
|
||||||
|
logger.debug(f"Updated workflow context: Round {self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 'N/A'}, Task {self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 'N/A'}, Action {self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 'N/A'}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error setting workflow context: {str(e)}")
|
||||||
|
|
||||||
|
def getWorkflowStats(self) -> Dict[str, Any]:
|
||||||
|
"""Get comprehensive workflow statistics including current context"""
|
||||||
|
try:
|
||||||
|
workflow_context = self.getWorkflowContext()
|
||||||
|
return {
|
||||||
|
'currentRound': workflow_context['currentRound'],
|
||||||
|
'currentTask': workflow_context['currentTask'],
|
||||||
|
'currentAction': workflow_context['currentAction'],
|
||||||
|
'totalTasks': self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 0,
|
||||||
|
'totalActions': self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 0,
|
||||||
|
'workflowStatus': self.workflow.status if hasattr(self.workflow, 'status') else 'unknown',
|
||||||
|
'workflowId': self.workflow.id if hasattr(self.workflow, 'id') else 'unknown'
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting workflow stats: {str(e)}")
|
||||||
|
return {
|
||||||
|
'currentRound': 0,
|
||||||
|
'currentTask': 0,
|
||||||
|
'currentAction': 0,
|
||||||
|
'totalTasks': 0,
|
||||||
|
'totalActions': 0,
|
||||||
|
'workflowStatus': 'unknown',
|
||||||
|
'workflowId': 'unknown'
|
||||||
|
}
|
||||||
120
modules/shared/eventManagement.py
Normal file
120
modules/shared/eventManagement.py
Normal file
|
|
@ -0,0 +1,120 @@
|
||||||
|
import logging
|
||||||
|
from typing import Callable, Optional, Dict, Any
|
||||||
|
|
||||||
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||||
|
from apscheduler.triggers.cron import CronTrigger
|
||||||
|
from apscheduler.triggers.interval import IntervalTrigger
|
||||||
|
from zoneinfo import ZoneInfo
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class EventManagement:
|
||||||
|
"""
|
||||||
|
Generic event scheduler wrapper around APScheduler's AsyncIOScheduler.
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- start/stop lifecycle
|
||||||
|
- register timed events with either cron or interval style
|
||||||
|
- remove events by id
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, timezone: str = "Europe/Zurich"):
|
||||||
|
self._timezone = ZoneInfo(timezone)
|
||||||
|
self._scheduler: Optional[AsyncIOScheduler] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def scheduler(self) -> AsyncIOScheduler:
|
||||||
|
if self._scheduler is None:
|
||||||
|
self._scheduler = AsyncIOScheduler(timezone=self._timezone)
|
||||||
|
return self._scheduler
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
if not self.scheduler.running:
|
||||||
|
self.scheduler.start()
|
||||||
|
logger.info("EventManagement scheduler started")
|
||||||
|
|
||||||
|
def stop(self) -> None:
|
||||||
|
if self._scheduler and self._scheduler.running:
|
||||||
|
try:
|
||||||
|
self._scheduler.shutdown(wait=False)
|
||||||
|
logger.info("EventManagement scheduler stopped")
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error(f"Error stopping scheduler: {exc}")
|
||||||
|
|
||||||
|
def register_cron(
|
||||||
|
self,
|
||||||
|
job_id: str,
|
||||||
|
func: Callable,
|
||||||
|
*,
|
||||||
|
cron_kwargs: Optional[Dict[str, Any]] = None,
|
||||||
|
replace_existing: bool = True,
|
||||||
|
coalesce: bool = True,
|
||||||
|
max_instances: int = 1,
|
||||||
|
misfire_grace_time: int = 1800,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Register a job using CronTrigger. Provide cron fields as keyword args, e.g.:
|
||||||
|
cron_kwargs={"minute": "0,20,40"}
|
||||||
|
"""
|
||||||
|
trigger = CronTrigger(timezone=self._timezone, **(cron_kwargs or {}))
|
||||||
|
self.scheduler.add_job(
|
||||||
|
func,
|
||||||
|
trigger,
|
||||||
|
id=job_id,
|
||||||
|
replace_existing=replace_existing,
|
||||||
|
coalesce=coalesce,
|
||||||
|
max_instances=max_instances,
|
||||||
|
misfire_grace_time=misfire_grace_time,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
logger.info(f"Registered cron job '{job_id}' with args {cron_kwargs}")
|
||||||
|
|
||||||
|
def register_interval(
|
||||||
|
self,
|
||||||
|
job_id: str,
|
||||||
|
func: Callable,
|
||||||
|
*,
|
||||||
|
seconds: Optional[int] = None,
|
||||||
|
minutes: Optional[int] = None,
|
||||||
|
hours: Optional[int] = None,
|
||||||
|
replace_existing: bool = True,
|
||||||
|
coalesce: bool = True,
|
||||||
|
max_instances: int = 1,
|
||||||
|
misfire_grace_time: int = 1800,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Register a job using IntervalTrigger.
|
||||||
|
"""
|
||||||
|
trigger = IntervalTrigger(
|
||||||
|
seconds=seconds, minutes=minutes, hours=hours, timezone=self._timezone
|
||||||
|
)
|
||||||
|
self.scheduler.add_job(
|
||||||
|
func,
|
||||||
|
trigger,
|
||||||
|
id=job_id,
|
||||||
|
replace_existing=replace_existing,
|
||||||
|
coalesce=coalesce,
|
||||||
|
max_instances=max_instances,
|
||||||
|
misfire_grace_time=misfire_grace_time,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
f"Registered interval job '{job_id}' (h={hours}, m={minutes}, s={seconds})"
|
||||||
|
)
|
||||||
|
|
||||||
|
def remove(self, job_id: str) -> None:
|
||||||
|
try:
|
||||||
|
self.scheduler.remove_job(job_id)
|
||||||
|
logger.info(f"Removed job '{job_id}'")
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(f"Could not remove job '{job_id}': {exc}")
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton instance for easy import and reuse
|
||||||
|
eventManager = EventManagement()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -20,6 +20,9 @@ class MethodAi(MethodBase):
|
||||||
super().__init__(service)
|
super().__init__(service)
|
||||||
self.name = "ai"
|
self.name = "ai"
|
||||||
self.description = "AI processing methods"
|
self.description = "AI processing methods"
|
||||||
|
# Centralized services interface (for AI)
|
||||||
|
from modules.services import getInterface as getServices
|
||||||
|
self.services = getServices(self.service.user, self.service.workflow)
|
||||||
|
|
||||||
def _format_timestamp_for_filename(self) -> str:
|
def _format_timestamp_for_filename(self) -> str:
|
||||||
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
||||||
|
|
@ -177,10 +180,43 @@ class MethodAi(MethodBase):
|
||||||
min_tokens_hint = "\n\nPlease ensure the response is substantial and complete."
|
min_tokens_hint = "\n\nPlease ensure the response is substantial and complete."
|
||||||
call_prompt = enhanced_prompt + min_tokens_hint
|
call_prompt = enhanced_prompt + min_tokens_hint
|
||||||
|
|
||||||
if processingMode in ["advanced", "detailed"]:
|
# Centralized AI call with optional document context
|
||||||
result = await self.service.callAiTextAdvanced(call_prompt, context)
|
documents = []
|
||||||
else:
|
try:
|
||||||
result = await self.service.callAiTextBasic(call_prompt, context)
|
if documentList:
|
||||||
|
for d in (chatDocuments or []):
|
||||||
|
try:
|
||||||
|
file_data = self.service.getFileData(d.fileId)
|
||||||
|
documents.append(
|
||||||
|
ChatDocument(
|
||||||
|
fileData=file_data,
|
||||||
|
fileName=d.fileName,
|
||||||
|
mimeType=d.mimeType
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
documents = None
|
||||||
|
|
||||||
|
output_format = output_extension.replace('.', '') or 'txt'
|
||||||
|
result = await self.services.ai.callAi(
|
||||||
|
prompt=call_prompt,
|
||||||
|
documents=documents or None,
|
||||||
|
options={
|
||||||
|
"process_type": "text",
|
||||||
|
"operation_type": "generate_content",
|
||||||
|
"priority": "quality" if processingMode in ["advanced", "detailed"] else "speed",
|
||||||
|
"compress_prompt": processingMode != "detailed",
|
||||||
|
"compress_documents": True,
|
||||||
|
"process_documents_individually": True,
|
||||||
|
"processing_mode": processingMode,
|
||||||
|
"result_format_requested": output_format,
|
||||||
|
"include_metadata": includeMetadata,
|
||||||
|
"max_cost": 0.05 if processingMode in ["advanced", "detailed"] else 0.02,
|
||||||
|
"max_processing_time": 45 if processingMode in ["advanced", "detailed"] else 20
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# If expected JSON and too short/not JSON, retry with stricter JSON guardrails
|
# If expected JSON and too short/not JSON, retry with stricter JSON guardrails
|
||||||
if output_extension == ".json":
|
if output_extension == ".json":
|
||||||
|
|
@ -207,7 +243,23 @@ class MethodAi(MethodBase):
|
||||||
"Include all requested fields with detailed content."
|
"Include all requested fields with detailed content."
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
result = await self.service.callAiTextAdvanced(guardrail_prompt, context)
|
result = await self.services.ai.callAi(
|
||||||
|
prompt=guardrail_prompt,
|
||||||
|
documents=context or None,
|
||||||
|
options={
|
||||||
|
"process_type": "text",
|
||||||
|
"operation_type": "generate_content",
|
||||||
|
"priority": "quality",
|
||||||
|
"compress_prompt": False,
|
||||||
|
"compress_documents": True,
|
||||||
|
"process_documents_individually": True,
|
||||||
|
"processing_mode": "detailed",
|
||||||
|
"result_format_requested": "json",
|
||||||
|
"include_metadata": False,
|
||||||
|
"max_cost": 0.03,
|
||||||
|
"max_processing_time": 30
|
||||||
|
}
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
result = cleaned # fallback to first attempt
|
result = cleaned # fallback to first attempt
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,9 @@ class MethodDocument(MethodBase):
|
||||||
super().__init__(serviceCenter)
|
super().__init__(serviceCenter)
|
||||||
self.name = "document"
|
self.name = "document"
|
||||||
self.description = "Handle document operations like extraction and analysis"
|
self.description = "Handle document operations like extraction and analysis"
|
||||||
|
# Centralized services interface (for AI)
|
||||||
|
from modules.services import getInterface as getServices
|
||||||
|
self.services = getServices(self.service.user, self.service.workflow)
|
||||||
|
|
||||||
def _format_timestamp_for_filename(self) -> str:
|
def _format_timestamp_for_filename(self) -> str:
|
||||||
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
||||||
|
|
@ -530,7 +533,18 @@ class MethodDocument(MethodBase):
|
||||||
|
|
||||||
# Call AI to generate the formatted content
|
# Call AI to generate the formatted content
|
||||||
logger.info(f"Calling AI for {extension} format conversion")
|
logger.info(f"Calling AI for {extension} format conversion")
|
||||||
formatted_content = await self.service.callAiTextBasic(ai_prompt, content)
|
formatted_content = await self.services.ai.callAi(
|
||||||
|
prompt=ai_prompt,
|
||||||
|
documents=None,
|
||||||
|
options={
|
||||||
|
"process_type": "text",
|
||||||
|
"operation_type": "generate_content",
|
||||||
|
"priority": "speed",
|
||||||
|
"compress_prompt": True,
|
||||||
|
"compress_documents": False,
|
||||||
|
"max_cost": 0.02
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
if not formatted_content or formatted_content.strip() == "":
|
if not formatted_content or formatted_content.strip() == "":
|
||||||
logger.warning("AI format conversion failed, using fallback")
|
logger.warning("AI format conversion failed, using fallback")
|
||||||
|
|
@ -751,7 +765,36 @@ SOURCE DOCUMENT CONTENT:
|
||||||
|
|
||||||
# Call AI to generate the report
|
# Call AI to generate the report
|
||||||
logger.info(f"Generating AI report for {len(validDocuments)} documents")
|
logger.info(f"Generating AI report for {len(validDocuments)} documents")
|
||||||
aiReport = await self.service.callAiTextAdvanced(aiPrompt, combinedContent)
|
# Build ChatDocument list from chatDocuments
|
||||||
|
documents = []
|
||||||
|
try:
|
||||||
|
from modules.interfaces.interfaceChatModel import ChatDocument as ChatDoc
|
||||||
|
for d in validDocuments:
|
||||||
|
try:
|
||||||
|
data = self.service.getFileData(d.fileId) if hasattr(d, 'fileId') else None
|
||||||
|
if data:
|
||||||
|
documents.append(ChatDoc(fileData=data, fileName=d.fileName, mimeType=d.mimeType))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
documents = None
|
||||||
|
aiReport = await self.services.ai.callAi(
|
||||||
|
prompt=aiPrompt,
|
||||||
|
documents=documents or None,
|
||||||
|
options={
|
||||||
|
"process_type": "text",
|
||||||
|
"operation_type": "report_generation",
|
||||||
|
"priority": "quality",
|
||||||
|
"compress_prompt": False,
|
||||||
|
"compress_documents": True,
|
||||||
|
"process_documents_individually": True,
|
||||||
|
"result_format_requested": "html",
|
||||||
|
"include_metadata": includeMetadata,
|
||||||
|
"processing_mode": "detailed",
|
||||||
|
"max_cost": 0.08,
|
||||||
|
"max_processing_time": 90
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# If AI call fails, return error - AI is crucial for report generation
|
# If AI call fails, return error - AI is crucial for report generation
|
||||||
if not aiReport or aiReport.strip() == "":
|
if not aiReport or aiReport.strip() == "":
|
||||||
|
|
|
||||||
|
|
@ -96,6 +96,9 @@ class MethodOutlook(MethodBase):
|
||||||
super().__init__(serviceCenter)
|
super().__init__(serviceCenter)
|
||||||
self.name = "outlook"
|
self.name = "outlook"
|
||||||
self.description = "Handle Microsoft Outlook email operations"
|
self.description = "Handle Microsoft Outlook email operations"
|
||||||
|
# Centralized services interface (for AI)
|
||||||
|
from modules.services import getInterface as getServices
|
||||||
|
self.services = getServices(self.service.user, self.service.workflow)
|
||||||
|
|
||||||
def _format_timestamp_for_filename(self) -> str:
|
def _format_timestamp_for_filename(self) -> str:
|
||||||
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
|
||||||
|
|
@ -116,8 +119,9 @@ class MethodOutlook(MethodBase):
|
||||||
|
|
||||||
logger.debug(f"Found connection: {userConnection.id}, status: {userConnection.status.value}, authority: {userConnection.authority.value}")
|
logger.debug(f"Found connection: {userConnection.id}, status: {userConnection.status.value}, authority: {userConnection.authority.value}")
|
||||||
|
|
||||||
# Get the token for this specific connection
|
# Get a fresh token for this specific connection
|
||||||
token = self.service.interfaceApp.getConnectionToken(userConnection.id)
|
from modules.security.tokenManager import TokenManager
|
||||||
|
token = TokenManager().getFreshToken(self.service.interfaceApp, userConnection.id)
|
||||||
if not token:
|
if not token:
|
||||||
logger.error(f"Token not found for connection: {userConnection.id}")
|
logger.error(f"Token not found for connection: {userConnection.id}")
|
||||||
logger.debug(f"Connection details: {userConnection}")
|
logger.debug(f"Connection details: {userConnection}")
|
||||||
|
|
@ -1605,7 +1609,36 @@ class MethodOutlook(MethodBase):
|
||||||
|
|
||||||
# Call AI to compose the email
|
# Call AI to compose the email
|
||||||
try:
|
try:
|
||||||
composed_email = await self.service.interfaceAiCalls.callAiTextAdvanced(ai_prompt)
|
# Centralized AI call for email composition with document context
|
||||||
|
documents = []
|
||||||
|
try:
|
||||||
|
if composition_documents:
|
||||||
|
from modules.interfaces.interfaceChatModel import ChatDocument as ChatDoc
|
||||||
|
for d in composition_documents:
|
||||||
|
try:
|
||||||
|
data = self.service.getFileData(d.fileId) if hasattr(d, 'fileId') else None
|
||||||
|
if data:
|
||||||
|
documents.append(ChatDoc(fileData=data, fileName=d.fileName, mimeType=d.mimeType))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
except Exception:
|
||||||
|
documents = None
|
||||||
|
|
||||||
|
composed_email = await self.services.ai.callAi(
|
||||||
|
prompt=ai_prompt,
|
||||||
|
documents=documents or None,
|
||||||
|
options={
|
||||||
|
"process_type": "text",
|
||||||
|
"operation_type": "email_composition",
|
||||||
|
"priority": "speed",
|
||||||
|
"compress_prompt": True,
|
||||||
|
"compress_documents": True,
|
||||||
|
"process_documents_individually": False,
|
||||||
|
"include_metadata": True,
|
||||||
|
"max_cost": 0.02,
|
||||||
|
"max_processing_time": 15
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Parse the AI response to ensure it's valid JSON
|
# Parse the AI response to ensure it's valid JSON
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -48,8 +48,9 @@ class MethodSharepoint(MethodBase):
|
||||||
logger.warning(f"Connection {userConnection.id} status is not active/pending: {userConnection.status.value}")
|
logger.warning(f"Connection {userConnection.id} status is not active/pending: {userConnection.status.value}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Get the token for this specific connection
|
# Get a fresh token for this specific connection
|
||||||
token = self.service.interfaceApp.getConnectionToken(userConnection.id)
|
from modules.security.tokenManager import TokenManager
|
||||||
|
token = TokenManager().getFreshToken(self.service.interfaceApp, userConnection.id)
|
||||||
if not token:
|
if not token:
|
||||||
logger.warning(f"No token found for connection {userConnection.id}")
|
logger.warning(f"No token found for connection {userConnection.id}")
|
||||||
return None
|
return None
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,9 @@ class MethodWeb(MethodBase):
|
||||||
super().__init__(serviceCenter)
|
super().__init__(serviceCenter)
|
||||||
self.name = "web"
|
self.name = "web"
|
||||||
self.description = "Web search, crawling, and scraping operations using Tavily"
|
self.description = "Web search, crawling, and scraping operations using Tavily"
|
||||||
|
# Centralized services interface (for AI)
|
||||||
|
from modules.services import getInterface as getServices
|
||||||
|
self.services = getServices(self.service.user, self.service.workflow)
|
||||||
|
|
||||||
@action
|
@action
|
||||||
async def search(self, parameters: Dict[str, Any]) -> ActionResult:
|
async def search(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
|
|
@ -274,7 +277,21 @@ class MethodWeb(MethodBase):
|
||||||
"Return only bullet points without any preface."
|
"Return only bullet points without any preface."
|
||||||
)
|
)
|
||||||
context = content[:4000]
|
context = content[:4000]
|
||||||
summary = await self.service.callAiTextBasic(prompt, context)
|
# Centralized AI summary (balanced analyse_content)
|
||||||
|
summary = await self.services.ai.callAi(
|
||||||
|
prompt=prompt,
|
||||||
|
documents=None,
|
||||||
|
options={
|
||||||
|
"process_type": "text",
|
||||||
|
"operation_type": "analyse_content",
|
||||||
|
"priority": "balanced",
|
||||||
|
"compress_prompt": True,
|
||||||
|
"compress_documents": False,
|
||||||
|
"processing_mode": "advanced",
|
||||||
|
"max_cost": 0.05,
|
||||||
|
"max_processing_time": 30
|
||||||
|
}
|
||||||
|
)
|
||||||
summary = summary.strip()
|
summary = summary.strip()
|
||||||
except Exception:
|
except Exception:
|
||||||
summary = ""
|
summary = ""
|
||||||
|
|
|
||||||
|
|
@ -12,8 +12,8 @@ from modules.interfaces.interfaceChatModel import (
|
||||||
)
|
)
|
||||||
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
|
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||||
from modules.workflows._transfer.executionState import TaskExecutionState
|
from modules.workflows.processing.executionState import TaskExecutionState
|
||||||
from modules.workflows._transfer.promptFactory import (
|
from modules.workflows.processing.promptFactory import (
|
||||||
createTaskPlanningPrompt,
|
createTaskPlanningPrompt,
|
||||||
createActionDefinitionPrompt,
|
createActionDefinitionPrompt,
|
||||||
createResultReviewPrompt,
|
createResultReviewPrompt,
|
||||||
|
|
@ -21,7 +21,8 @@ from modules.workflows._transfer.promptFactory import (
|
||||||
createActionParameterPrompt,
|
createActionParameterPrompt,
|
||||||
createRefinementPrompt
|
createRefinementPrompt
|
||||||
)
|
)
|
||||||
from modules.services.serviceDocument.documentGeneration import DocumentGenerator
|
from modules.services.serviceDocument.mainServiceDocumentGeneration import DocumentGenerationService
|
||||||
|
from modules.workflows.processing.promptFactory import methods
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -31,13 +32,10 @@ class WorkflowStoppedException(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class HandlingTasks:
|
class HandlingTasks:
|
||||||
def __init__(self, chatInterface, currentUser, workflow=None):
|
def __init__(self, services, workflow=None):
|
||||||
self.chatInterface = chatInterface
|
self.services = services
|
||||||
self.currentUser = currentUser
|
|
||||||
self.workflow = workflow
|
self.workflow = workflow
|
||||||
from modules.services.serviceCenter import ServiceCenter
|
self.documentGenerator = DocumentGenerationService(self.services.center)
|
||||||
self.service = ServiceCenter(currentUser, workflow)
|
|
||||||
self.documentGenerator = DocumentGenerator(self.service)
|
|
||||||
|
|
||||||
def _checkWorkflowStopped(self):
|
def _checkWorkflowStopped(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -46,7 +44,7 @@ class HandlingTasks:
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Get the current workflow status from the database to avoid stale data
|
# Get the current workflow status from the database to avoid stale data
|
||||||
current_workflow = self.chatInterface.getWorkflow(self.service.workflow.id)
|
current_workflow = services.chatInterface.getWorkflow(self.service.workflow.id)
|
||||||
if current_workflow and current_workflow.status == "stopped":
|
if current_workflow and current_workflow.status == "stopped":
|
||||||
logger.info("Workflow stopped by user, aborting execution")
|
logger.info("Workflow stopped by user, aborting execution")
|
||||||
raise WorkflowStoppedException("Workflow was stopped by user")
|
raise WorkflowStoppedException("Workflow was stopped by user")
|
||||||
|
|
@ -113,9 +111,23 @@ class HandlingTasks:
|
||||||
# Log task planning prompt sent to AI
|
# Log task planning prompt sent to AI
|
||||||
logger.info("=== TASK PLANNING PROMPT SENT TO AI ===")
|
logger.info("=== TASK PLANNING PROMPT SENT TO AI ===")
|
||||||
# Trace task planning prompt
|
# Trace task planning prompt
|
||||||
self.service.writeTraceLog("Task Plan Prompt", task_planning_prompt)
|
self.writeTraceLog("Task Plan Prompt", task_planning_prompt)
|
||||||
|
|
||||||
prompt = await self.service.callAiTextAdvanced(task_planning_prompt)
|
# Centralized AI call: Task planning (quality, detailed)
|
||||||
|
prompt = await self.services.ai.callAi(
|
||||||
|
prompt=task_planning_prompt,
|
||||||
|
documents=None,
|
||||||
|
options={
|
||||||
|
"process_type": "text",
|
||||||
|
"operation_type": "generate_plan",
|
||||||
|
"priority": "quality",
|
||||||
|
"compress_prompt": False,
|
||||||
|
"compress_documents": False,
|
||||||
|
"processing_mode": "detailed",
|
||||||
|
"max_cost": 0.10,
|
||||||
|
"max_processing_time": 30
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Check if AI response is valid
|
# Check if AI response is valid
|
||||||
if not prompt:
|
if not prompt:
|
||||||
|
|
@ -125,7 +137,7 @@ class HandlingTasks:
|
||||||
logger.info("=== TASK PLANNING AI RESPONSE RECEIVED ===")
|
logger.info("=== TASK PLANNING AI RESPONSE RECEIVED ===")
|
||||||
logger.info(f"Response length: {len(prompt) if prompt else 0}")
|
logger.info(f"Response length: {len(prompt) if prompt else 0}")
|
||||||
# Trace task planning response
|
# Trace task planning response
|
||||||
self.service.writeTraceLog("Task Plan Response", prompt)
|
self.writeTraceLog("Task Plan Response", prompt)
|
||||||
|
|
||||||
# Inline _parseTaskPlanResponse logic
|
# Inline _parseTaskPlanResponse logic
|
||||||
try:
|
try:
|
||||||
|
|
@ -251,7 +263,7 @@ class HandlingTasks:
|
||||||
"taskProgress": "pending"
|
"taskProgress": "pending"
|
||||||
}
|
}
|
||||||
|
|
||||||
message = self.chatInterface.createMessage(message_data)
|
message = services.chatInterface.createMessage(message_data)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
|
|
||||||
|
|
@ -359,9 +371,23 @@ class HandlingTasks:
|
||||||
# Generate the action definition prompt
|
# Generate the action definition prompt
|
||||||
action_prompt = await createActionDefinitionPrompt(action_context, self.service)
|
action_prompt = await createActionDefinitionPrompt(action_context, self.service)
|
||||||
# Trace action planning prompt
|
# Trace action planning prompt
|
||||||
self.service.writeTraceLog("Action Plan Prompt", action_prompt)
|
self.writeTraceLog("Action Plan Prompt", action_prompt)
|
||||||
|
|
||||||
prompt = await self.service.callAiTextAdvanced(action_prompt)
|
# Centralized AI call: Action planning (quality, detailed)
|
||||||
|
prompt = await self.services.ai.callAi(
|
||||||
|
prompt=action_prompt,
|
||||||
|
documents=None,
|
||||||
|
options={
|
||||||
|
"process_type": "text",
|
||||||
|
"operation_type": "generate_plan",
|
||||||
|
"priority": "quality",
|
||||||
|
"compress_prompt": False,
|
||||||
|
"compress_documents": False,
|
||||||
|
"processing_mode": "detailed",
|
||||||
|
"max_cost": 0.10,
|
||||||
|
"max_processing_time": 30
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Check if AI response is valid
|
# Check if AI response is valid
|
||||||
if not prompt:
|
if not prompt:
|
||||||
|
|
@ -371,7 +397,7 @@ class HandlingTasks:
|
||||||
logger.info("=== ACTION PLAN AI RESPONSE RECEIVED ===")
|
logger.info("=== ACTION PLAN AI RESPONSE RECEIVED ===")
|
||||||
logger.info(f"Response length: {len(prompt) if prompt else 0}")
|
logger.info(f"Response length: {len(prompt) if prompt else 0}")
|
||||||
# Trace action planning response
|
# Trace action planning response
|
||||||
self.service.writeTraceLog("Action Plan Response", prompt)
|
self.writeTraceLog("Action Plan Response", prompt)
|
||||||
|
|
||||||
# Inline parseActionResponse logic here
|
# Inline parseActionResponse logic here
|
||||||
json_start = prompt.find('{')
|
json_start = prompt.find('{')
|
||||||
|
|
@ -438,9 +464,23 @@ class HandlingTasks:
|
||||||
async def plan_select(self, context: TaskContext) -> Dict[str, Any]:
|
async def plan_select(self, context: TaskContext) -> Dict[str, Any]:
|
||||||
"""Plan: select exactly one action. Returns {"action": {method, name}}"""
|
"""Plan: select exactly one action. Returns {"action": {method, name}}"""
|
||||||
prompt = createActionSelectionPrompt(context, self.service)
|
prompt = createActionSelectionPrompt(context, self.service)
|
||||||
self.service.writeTraceLog("React Plan Selection Prompt", prompt)
|
self.writeTraceLog("React Plan Selection Prompt", prompt)
|
||||||
response = await self.service.callAiTextAdvanced(prompt)
|
# Centralized AI call for plan selection (use plan generation quality)
|
||||||
self.service.writeTraceLog("React Plan Selection Response", response)
|
response = await self.services.ai.callAi(
|
||||||
|
prompt=prompt,
|
||||||
|
documents=None,
|
||||||
|
options={
|
||||||
|
"process_type": "text",
|
||||||
|
"operation_type": "generate_plan",
|
||||||
|
"priority": "quality",
|
||||||
|
"compress_prompt": False,
|
||||||
|
"compress_documents": False,
|
||||||
|
"processing_mode": "detailed",
|
||||||
|
"max_cost": 0.10,
|
||||||
|
"max_processing_time": 30
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self.writeTraceLog("React Plan Selection Response", response)
|
||||||
json_start = response.find('{') if response else -1
|
json_start = response.find('{') if response else -1
|
||||||
json_end = response.rfind('}') + 1 if response else 0
|
json_end = response.rfind('}') + 1 if response else 0
|
||||||
if json_start == -1 or json_end == 0:
|
if json_start == -1 or json_end == 0:
|
||||||
|
|
@ -454,9 +494,23 @@ class HandlingTasks:
|
||||||
"""Act: request minimal parameters then execute selected action."""
|
"""Act: request minimal parameters then execute selected action."""
|
||||||
action = selection.get('action', {})
|
action = selection.get('action', {})
|
||||||
params_prompt = createActionParameterPrompt(context, action, self.service)
|
params_prompt = createActionParameterPrompt(context, action, self.service)
|
||||||
self.service.writeTraceLog("React Parameters Prompt", params_prompt)
|
self.writeTraceLog("React Parameters Prompt", params_prompt)
|
||||||
params_resp = await self.service.callAiTextAdvanced(params_prompt)
|
# Centralized AI call for parameter suggestion (balanced analysis)
|
||||||
self.service.writeTraceLog("React Parameters Response", params_resp)
|
params_resp = await self.services.ai.callAi(
|
||||||
|
prompt=params_prompt,
|
||||||
|
documents=None,
|
||||||
|
options={
|
||||||
|
"process_type": "text",
|
||||||
|
"operation_type": "analyse_content",
|
||||||
|
"priority": "balanced",
|
||||||
|
"compress_prompt": True,
|
||||||
|
"compress_documents": False,
|
||||||
|
"processing_mode": "advanced",
|
||||||
|
"max_cost": 0.05,
|
||||||
|
"max_processing_time": 30
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self.writeTraceLog("React Parameters Response", params_resp)
|
||||||
js = params_resp[params_resp.find('{'):params_resp.rfind('}')+1] if params_resp else '{}'
|
js = params_resp[params_resp.find('{'):params_resp.rfind('}')+1] if params_resp else '{}'
|
||||||
try:
|
try:
|
||||||
param_obj = json.loads(js)
|
param_obj = json.loads(js)
|
||||||
|
|
@ -508,9 +562,23 @@ class HandlingTasks:
|
||||||
async def refine_decide(self, context: TaskContext, observation: Dict[str, Any]) -> Dict[str, Any]:
|
async def refine_decide(self, context: TaskContext, observation: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
"""Refine: decide continue or stop, with reason"""
|
"""Refine: decide continue or stop, with reason"""
|
||||||
prompt = createRefinementPrompt(context, observation)
|
prompt = createRefinementPrompt(context, observation)
|
||||||
self.service.writeTraceLog("React Refinement Prompt", prompt)
|
self.writeTraceLog("React Refinement Prompt", prompt)
|
||||||
resp = await self.service.callAiTextAdvanced(prompt)
|
# Centralized AI call for refinement decision (balanced analysis)
|
||||||
self.service.writeTraceLog("React Refinement Response", resp)
|
resp = await self.services.ai.callAi(
|
||||||
|
prompt=prompt,
|
||||||
|
documents=None,
|
||||||
|
options={
|
||||||
|
"process_type": "text",
|
||||||
|
"operation_type": "analyse_content",
|
||||||
|
"priority": "balanced",
|
||||||
|
"compress_prompt": True,
|
||||||
|
"compress_documents": False,
|
||||||
|
"processing_mode": "advanced",
|
||||||
|
"max_cost": 0.05,
|
||||||
|
"max_processing_time": 30
|
||||||
|
}
|
||||||
|
)
|
||||||
|
self.writeTraceLog("React Refinement Response", resp)
|
||||||
js = resp[resp.find('{'):resp.rfind('}')+1] if resp else '{}'
|
js = resp[resp.find('{'):resp.rfind('}')+1] if resp else '{}'
|
||||||
try:
|
try:
|
||||||
decision = json.loads(js)
|
decision = json.loads(js)
|
||||||
|
|
@ -560,7 +628,7 @@ class HandlingTasks:
|
||||||
if task_step.userMessage:
|
if task_step.userMessage:
|
||||||
task_start_message["message"] += f"\n\n💬 {task_step.userMessage}"
|
task_start_message["message"] += f"\n\n💬 {task_step.userMessage}"
|
||||||
|
|
||||||
message = self.chatInterface.createMessage(task_start_message)
|
message = services.chatInterface.createMessage(task_start_message)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
logger.info(f"Task start message created for task {task_index}")
|
logger.info(f"Task start message created for task {task_index}")
|
||||||
|
|
@ -590,7 +658,7 @@ class HandlingTasks:
|
||||||
decision = await self.refine_decide(context, observation)
|
decision = await self.refine_decide(context, observation)
|
||||||
# Telemetry: simple duration per step
|
# Telemetry: simple duration per step
|
||||||
duration = time.time() - t0
|
duration = time.time() - t0
|
||||||
self.chatInterface.createLog({
|
services.chatInterface.createLog({
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
"message": f"react_step_duration_sec={duration:.3f}",
|
"message": f"react_step_duration_sec={duration:.3f}",
|
||||||
"type": "info"
|
"type": "info"
|
||||||
|
|
@ -611,12 +679,12 @@ class HandlingTasks:
|
||||||
"actionNumber": step,
|
"actionNumber": step,
|
||||||
"actionProgress": "success" if result.success else "fail"
|
"actionProgress": "success" if result.success else "fail"
|
||||||
}
|
}
|
||||||
self.chatInterface.createMessage(msg)
|
services.chatInterface.createMessage(msg)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"React step {step} error: {e}")
|
logger.error(f"React step {step} error: {e}")
|
||||||
break
|
break
|
||||||
|
|
||||||
from modules.workflows._transfer.executionState import should_continue
|
from modules.workflows.processing.executionState import should_continue
|
||||||
if not should_continue(observation, last_review_dict, step, state.max_steps):
|
if not should_continue(observation, last_review_dict, step, state.max_steps):
|
||||||
break
|
break
|
||||||
step += 1
|
step += 1
|
||||||
|
|
@ -709,7 +777,7 @@ class HandlingTasks:
|
||||||
"actionNumber": action_number
|
"actionNumber": action_number
|
||||||
})
|
})
|
||||||
|
|
||||||
message = self.chatInterface.createMessage(action_start_message)
|
message = services.chatInterface.createMessage(action_start_message)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
logger.info(f"Action start message created for action {action_number}")
|
logger.info(f"Action start message created for action {action_number}")
|
||||||
|
|
@ -763,7 +831,7 @@ class HandlingTasks:
|
||||||
"taskProgress": "success"
|
"taskProgress": "success"
|
||||||
}
|
}
|
||||||
|
|
||||||
message = self.chatInterface.createMessage(task_completion_message)
|
message = services.chatInterface.createMessage(task_completion_message)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
logger.info(f"Task completion message created for task {task_index}")
|
logger.info(f"Task completion message created for task {task_index}")
|
||||||
|
|
@ -855,7 +923,7 @@ class HandlingTasks:
|
||||||
"taskProgress": "retry"
|
"taskProgress": "retry"
|
||||||
}
|
}
|
||||||
|
|
||||||
message = self.chatInterface.createMessage(retry_message)
|
message = services.chatInterface.createMessage(retry_message)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
|
|
||||||
|
|
@ -908,7 +976,7 @@ class HandlingTasks:
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
message = self.chatInterface.createMessage(message_data)
|
message = services.chatInterface.createMessage(message_data)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
logger.info(f"Created user-facing retry message for failed task: {task_step.objective}")
|
logger.info(f"Created user-facing retry message for failed task: {task_step.objective}")
|
||||||
|
|
@ -962,7 +1030,7 @@ class HandlingTasks:
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
message = self.chatInterface.createMessage(message_data)
|
message = services.chatInterface.createMessage(message_data)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
logger.info(f"Created user-facing error message for failed task: {task_step.objective}")
|
logger.info(f"Created user-facing error message for failed task: {task_step.objective}")
|
||||||
|
|
@ -1024,15 +1092,29 @@ class HandlingTasks:
|
||||||
logger.info(f"Action Results Count: {len(review_context.action_results) if review_context.action_results else 0}")
|
logger.info(f"Action Results Count: {len(review_context.action_results) if review_context.action_results else 0}")
|
||||||
logger.info(f"Task Actions Count: {len(review_context.task_actions) if review_context.task_actions else 0}")
|
logger.info(f"Task Actions Count: {len(review_context.task_actions) if review_context.task_actions else 0}")
|
||||||
# Trace result review prompt
|
# Trace result review prompt
|
||||||
self.service.writeTraceLog("Result Review Prompt", prompt)
|
self.writeTraceLog("Result Review Prompt", prompt)
|
||||||
|
|
||||||
response = await self.service.callAiTextAdvanced(prompt)
|
# Centralized AI call: Result validation (balanced analysis)
|
||||||
|
response = await self.services.ai.callAi(
|
||||||
|
prompt=prompt,
|
||||||
|
documents=None,
|
||||||
|
options={
|
||||||
|
"process_type": "text",
|
||||||
|
"operation_type": "analyse_content",
|
||||||
|
"priority": "balanced",
|
||||||
|
"compress_prompt": True,
|
||||||
|
"compress_documents": False,
|
||||||
|
"processing_mode": "advanced",
|
||||||
|
"max_cost": 0.05,
|
||||||
|
"max_processing_time": 30
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Log result review response received
|
# Log result review response received
|
||||||
logger.info("=== RESULT REVIEW AI RESPONSE RECEIVED ===")
|
logger.info("=== RESULT REVIEW AI RESPONSE RECEIVED ===")
|
||||||
logger.info(f"Response length: {len(response) if response else 0}")
|
logger.info(f"Response length: {len(response) if response else 0}")
|
||||||
# Trace result review response
|
# Trace result review response
|
||||||
self.service.writeTraceLog("Result Review Response", response)
|
self.writeTraceLog("Result Review Response", response)
|
||||||
|
|
||||||
# Inline parseReviewResponse logic here
|
# Inline parseReviewResponse logic here
|
||||||
json_start = response.find('{')
|
json_start = response.find('{')
|
||||||
|
|
@ -1169,10 +1251,10 @@ class HandlingTasks:
|
||||||
actionData["execParameters"] = {}
|
actionData["execParameters"] = {}
|
||||||
|
|
||||||
# Use generic field separation based on TaskAction model
|
# Use generic field separation based on TaskAction model
|
||||||
simple_fields, object_fields = self.chatInterface._separate_object_fields(TaskAction, actionData)
|
simple_fields, object_fields = services.chatInterface._separate_object_fields(TaskAction, actionData)
|
||||||
|
|
||||||
# Create action in database
|
# Create action in database
|
||||||
createdAction = self.chatInterface.db.recordCreate(TaskAction, simple_fields)
|
createdAction = services.chatInterface.db.recordCreate(TaskAction, simple_fields)
|
||||||
|
|
||||||
# Convert to TaskAction model
|
# Convert to TaskAction model
|
||||||
return TaskAction(
|
return TaskAction(
|
||||||
|
|
@ -1229,7 +1311,7 @@ class HandlingTasks:
|
||||||
# Check workflow status before executing the action
|
# Check workflow status before executing the action
|
||||||
self._checkWorkflowStopped()
|
self._checkWorkflowStopped()
|
||||||
|
|
||||||
result = await self.service.executeAction(
|
result = await self.executeAction(
|
||||||
methodName=action.execMethod,
|
methodName=action.execMethod,
|
||||||
actionName=action.execAction,
|
actionName=action.execAction,
|
||||||
parameters=enhanced_parameters
|
parameters=enhanced_parameters
|
||||||
|
|
@ -1245,7 +1327,7 @@ class HandlingTasks:
|
||||||
"resultLabel": result_label,
|
"resultLabel": result_label,
|
||||||
"documentsCount": len(result.documents) if result.documents else 0
|
"documentsCount": len(result.documents) if result.documents else 0
|
||||||
}
|
}
|
||||||
self.service.writeTraceLog("Action Result", action_result_trace)
|
self.writeTraceLog("Action Result", action_result_trace)
|
||||||
|
|
||||||
# Process documents from the action result
|
# Process documents from the action result
|
||||||
created_documents = []
|
created_documents = []
|
||||||
|
|
@ -1276,7 +1358,7 @@ class HandlingTasks:
|
||||||
if created_documents:
|
if created_documents:
|
||||||
message.documents = created_documents
|
message.documents = created_documents
|
||||||
# Update the message in the database
|
# Update the message in the database
|
||||||
self.chatInterface.updateMessage(message.id, {"documents": [doc.dict() for doc in created_documents]})
|
services.chatInterface.updateMessage(message.id, {"documents": [doc.dict() for doc in created_documents]})
|
||||||
|
|
||||||
# Log action results
|
# Log action results
|
||||||
logger.info(f"Action completed successfully")
|
logger.info(f"Action completed successfully")
|
||||||
|
|
@ -1302,7 +1384,7 @@ class HandlingTasks:
|
||||||
message = await self.createActionMessage(action, result, workflow, result_label, [], task_step, task_index)
|
message = await self.createActionMessage(action, result, workflow, result_label, [], task_step, task_index)
|
||||||
|
|
||||||
# Create database log entry for action failure
|
# Create database log entry for action failure
|
||||||
self.chatInterface.createLog({
|
services.chatInterface.createLog({
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
"message": f"❌ **Task {task_num}**\n\n❌ **Action {action_num}/{total_actions}** failed: {result.error}",
|
"message": f"❌ **Task {task_num}**\n\n❌ **Action {action_num}/{total_actions}** failed: {result.error}",
|
||||||
"type": "error"
|
"type": "error"
|
||||||
|
|
@ -1398,7 +1480,7 @@ class HandlingTasks:
|
||||||
logger.info(f"Creating ERROR message: {message_text}")
|
logger.info(f"Creating ERROR message: {message_text}")
|
||||||
logger.info(f"Message data: {message_data}")
|
logger.info(f"Message data: {message_data}")
|
||||||
|
|
||||||
message = self.chatInterface.createMessage(message_data)
|
message = services.chatInterface.createMessage(message_data)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
logger.info(f"Message created: {action.execMethod}.{action.execAction}")
|
logger.info(f"Message created: {action.execMethod}.{action.execAction}")
|
||||||
|
|
@ -1558,7 +1640,7 @@ class HandlingTasks:
|
||||||
self.workflow.totalActions = 0
|
self.workflow.totalActions = 0
|
||||||
|
|
||||||
# Update in database
|
# Update in database
|
||||||
self.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
services.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
||||||
logger.info(f"Updated workflow {self.workflow.id} after task plan created: {update_data}")
|
logger.info(f"Updated workflow {self.workflow.id} after task plan created: {update_data}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -1582,7 +1664,7 @@ class HandlingTasks:
|
||||||
self.workflow.totalActions = 0
|
self.workflow.totalActions = 0
|
||||||
|
|
||||||
# Update in database
|
# Update in database
|
||||||
self.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
services.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
||||||
logger.info(f"Updated workflow {self.workflow.id} before executing task {task_number}: {update_data}")
|
logger.info(f"Updated workflow {self.workflow.id} before executing task {task_number}: {update_data}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -1602,7 +1684,7 @@ class HandlingTasks:
|
||||||
self.workflow.totalActions = total_actions
|
self.workflow.totalActions = total_actions
|
||||||
|
|
||||||
# Update in database
|
# Update in database
|
||||||
self.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
services.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
||||||
logger.info(f"Updated workflow {self.workflow.id} after action planning: {update_data}")
|
logger.info(f"Updated workflow {self.workflow.id} after action planning: {update_data}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -1622,7 +1704,7 @@ class HandlingTasks:
|
||||||
self.workflow.currentAction = action_number
|
self.workflow.currentAction = action_number
|
||||||
|
|
||||||
# Update in database
|
# Update in database
|
||||||
self.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
services.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
||||||
logger.info(f"Updated workflow {self.workflow.id} before executing action {action_number}: {update_data}")
|
logger.info(f"Updated workflow {self.workflow.id} before executing action {action_number}: {update_data}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -1643,7 +1725,7 @@ class HandlingTasks:
|
||||||
|
|
||||||
# Update workflow object in database if we have changes
|
# Update workflow object in database if we have changes
|
||||||
if update_data:
|
if update_data:
|
||||||
self.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
services.chatInterface.updateWorkflow(self.workflow.id, update_data)
|
||||||
logger.info(f"Updated workflow {self.workflow.id} totals in database: {update_data}")
|
logger.info(f"Updated workflow {self.workflow.id} totals in database: {update_data}")
|
||||||
|
|
||||||
logger.debug(f"Updated workflow totals: Tasks {self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 'N/A'}, Actions {self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 'N/A'}")
|
logger.debug(f"Updated workflow totals: Tasks {self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 'N/A'}, Actions {self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 'N/A'}")
|
||||||
|
|
@ -1662,7 +1744,7 @@ class HandlingTasks:
|
||||||
self.workflow.status = 'ready'
|
self.workflow.status = 'ready'
|
||||||
|
|
||||||
# Update workflow object in database with reset values
|
# Update workflow object in database with reset values
|
||||||
self.chatInterface.updateWorkflow(self.workflow.id, {
|
services.chatInterface.updateWorkflow(self.workflow.id, {
|
||||||
"currentRound": 0,
|
"currentRound": 0,
|
||||||
"currentTask": 0,
|
"currentTask": 0,
|
||||||
"currentAction": 0,
|
"currentAction": 0,
|
||||||
|
|
@ -1674,3 +1756,103 @@ class HandlingTasks:
|
||||||
logger.info("Workflow reset for new session - all values set to initial state and updated in database")
|
logger.info("Workflow reset for new session - all values set to initial state and updated in database")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error resetting workflow for new session: {str(e)}")
|
logger.error(f"Error resetting workflow for new session: {str(e)}")
|
||||||
|
|
||||||
|
# ===== Functions moved from serviceCenter =====
|
||||||
|
|
||||||
|
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
|
"""Execute a method action"""
|
||||||
|
try:
|
||||||
|
if methodName not in methods:
|
||||||
|
raise ValueError(f"Unknown method: {methodName}")
|
||||||
|
|
||||||
|
method = methods[methodName]
|
||||||
|
if actionName not in method['actions']:
|
||||||
|
raise ValueError(f"Unknown action: {actionName} for method {methodName}")
|
||||||
|
|
||||||
|
action = method['actions'][actionName]
|
||||||
|
|
||||||
|
# Execute the action
|
||||||
|
return await action['method'](parameters)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error executing method {methodName}.{actionName}: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def writeTraceLog(self, contextText: str, data: Any) -> None:
|
||||||
|
"""Write trace data to configured trace file if in debug mode"""
|
||||||
|
try:
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
|
# Only write if logger is in debug mode
|
||||||
|
if logger.level > logging.DEBUG:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get log directory from configuration
|
||||||
|
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
|
||||||
|
if not os.path.isabs(logDir):
|
||||||
|
# If relative path, make it relative to the gateway directory
|
||||||
|
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
logDir = os.path.join(gatewayDir, logDir)
|
||||||
|
|
||||||
|
# Ensure log directory exists
|
||||||
|
os.makedirs(logDir, exist_ok=True)
|
||||||
|
|
||||||
|
# Create trace file path
|
||||||
|
trace_file = os.path.join(logDir, "log_trace.log")
|
||||||
|
|
||||||
|
# Format the trace entry
|
||||||
|
timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||||
|
trace_entry = f"[{timestamp}] {contextText}\n"
|
||||||
|
|
||||||
|
# Add data if provided
|
||||||
|
if data is not None:
|
||||||
|
if isinstance(data, (dict, list)):
|
||||||
|
import json
|
||||||
|
trace_entry += f"Data: {json.dumps(data, indent=2, default=str)}\n"
|
||||||
|
else:
|
||||||
|
trace_entry += f"Data: {str(data)}\n"
|
||||||
|
|
||||||
|
trace_entry += "-" * 80 + "\n\n"
|
||||||
|
|
||||||
|
# Write to trace file
|
||||||
|
with open(trace_file, "a", encoding="utf-8") as f:
|
||||||
|
f.write(trace_entry)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Don't log trace errors to avoid recursion
|
||||||
|
pass
|
||||||
|
|
||||||
|
def clearTraceLog(self) -> None:
|
||||||
|
"""Clear the trace log file"""
|
||||||
|
try:
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from modules.shared.configuration import APP_CONFIG
|
||||||
|
|
||||||
|
# Get log directory from configuration
|
||||||
|
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
|
||||||
|
if not os.path.isabs(logDir):
|
||||||
|
# If relative path, make it relative to the gateway directory
|
||||||
|
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
logDir = os.path.join(gatewayDir, logDir)
|
||||||
|
|
||||||
|
# Create trace file path
|
||||||
|
trace_file = os.path.join(logDir, "log_trace.log")
|
||||||
|
|
||||||
|
# Only clear if logger is in debug mode
|
||||||
|
if logger.level > logging.DEBUG:
|
||||||
|
# Delete file if not in debug mode
|
||||||
|
if os.path.exists(trace_file):
|
||||||
|
os.remove(trace_file)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create empty file if in debug mode
|
||||||
|
with open(trace_file, "w", encoding="utf-8") as f:
|
||||||
|
f.write("")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Don't log trace errors to avoid recursion
|
||||||
|
pass
|
||||||
|
|
@ -3,13 +3,177 @@
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import importlib
|
||||||
|
import pkgutil
|
||||||
|
import inspect
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
from modules.interfaces.interfaceChatModel import TaskContext, ReviewContext
|
from modules.interfaces.interfaceChatModel import TaskContext, ReviewContext, ChatDocument, DocumentExchange
|
||||||
from modules.services.serviceDocument.documentUtility import getFileExtension
|
from modules.services.serviceDocument.documentUtility import getFileExtension
|
||||||
|
from modules.workflows.methods.methodBase import MethodBase
|
||||||
|
|
||||||
# Set up logger
|
# Set up logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Global methods catalog - moved from serviceCenter
|
||||||
|
methods = {}
|
||||||
|
|
||||||
|
def _discoverMethods(service_center):
|
||||||
|
"""Dynamically discover all method classes and their actions in modules methods package"""
|
||||||
|
try:
|
||||||
|
# Import the methods package
|
||||||
|
methodsPackage = importlib.import_module('modules.workflows.methods')
|
||||||
|
|
||||||
|
# Discover all modules in the package
|
||||||
|
for _, name, isPkg in pkgutil.iter_modules(methodsPackage.__path__):
|
||||||
|
if not isPkg and name.startswith('method'):
|
||||||
|
try:
|
||||||
|
# Import the module
|
||||||
|
module = importlib.import_module(f'modules.workflows.methods.{name}')
|
||||||
|
|
||||||
|
# Find all classes in the module that inherit from MethodBase
|
||||||
|
for itemName, item in inspect.getmembers(module):
|
||||||
|
if (inspect.isclass(item) and
|
||||||
|
issubclass(item, MethodBase) and
|
||||||
|
item != MethodBase):
|
||||||
|
# Instantiate the method
|
||||||
|
methodInstance = item(service_center)
|
||||||
|
|
||||||
|
# Discover actions from public methods
|
||||||
|
actions = {}
|
||||||
|
for methodName, method in inspect.getmembers(type(methodInstance), predicate=inspect.iscoroutinefunction):
|
||||||
|
if not methodName.startswith('_'):
|
||||||
|
# Bind the method to the instance
|
||||||
|
bound_method = method.__get__(methodInstance, type(methodInstance))
|
||||||
|
sig = inspect.signature(method)
|
||||||
|
params = {}
|
||||||
|
for paramName, param in sig.parameters.items():
|
||||||
|
if paramName not in ['self']:
|
||||||
|
# Get parameter type
|
||||||
|
paramType = param.annotation if param.annotation != param.empty else Any
|
||||||
|
|
||||||
|
# Get parameter description from docstring or default
|
||||||
|
paramDesc = None
|
||||||
|
if param.default != param.empty and hasattr(param.default, '__doc__'):
|
||||||
|
paramDesc = param.default.__doc__
|
||||||
|
|
||||||
|
params[paramName] = {
|
||||||
|
'type': paramType,
|
||||||
|
'required': param.default == param.empty,
|
||||||
|
'description': paramDesc,
|
||||||
|
'default': param.default if param.default != param.empty else None
|
||||||
|
}
|
||||||
|
|
||||||
|
actions[methodName] = {
|
||||||
|
'description': method.__doc__ or '',
|
||||||
|
'parameters': params,
|
||||||
|
'method': bound_method
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add method instance with discovered actions
|
||||||
|
methods[methodInstance.name] = {
|
||||||
|
'instance': methodInstance,
|
||||||
|
'description': methodInstance.description,
|
||||||
|
'actions': actions
|
||||||
|
}
|
||||||
|
logger.info(f"Discovered method: {methodInstance.name} with {len(actions)} actions")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error loading method module {name}: {str(e)}", exc_info=True)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error discovering methods: {str(e)}")
|
||||||
|
|
||||||
|
def getMethodsList(service_center) -> List[str]:
|
||||||
|
"""Get list of available methods with their signatures in the required format"""
|
||||||
|
# Initialize methods if not already done
|
||||||
|
if not methods:
|
||||||
|
_discoverMethods(service_center)
|
||||||
|
|
||||||
|
methodList = []
|
||||||
|
for methodName, method in methods.items():
|
||||||
|
methodInstance = method['instance']
|
||||||
|
for actionName, action in method['actions'].items():
|
||||||
|
# Use the new signature format from MethodBase
|
||||||
|
signature = methodInstance.getActionSignature(actionName)
|
||||||
|
if signature:
|
||||||
|
methodList.append(signature)
|
||||||
|
return methodList
|
||||||
|
|
||||||
|
def getEnhancedDocumentContext(service_center) -> str:
|
||||||
|
"""Get enhanced document context formatted for action planning prompts with proper docList and docItem references"""
|
||||||
|
try:
|
||||||
|
document_list = service_center.getDocumentReferenceList()
|
||||||
|
|
||||||
|
# Build technical context string for AI action planning
|
||||||
|
context = "AVAILABLE DOCUMENTS:\n\n"
|
||||||
|
|
||||||
|
# Process chat exchanges (current round)
|
||||||
|
if document_list["chat"]:
|
||||||
|
context += "CURRENT ROUND DOCUMENTS:\n"
|
||||||
|
for exchange in document_list["chat"]:
|
||||||
|
# Generate docList reference for the exchange (using message ID and label)
|
||||||
|
# Find the message that corresponds to this exchange
|
||||||
|
message_id = None
|
||||||
|
for message in service_center.workflow.messages:
|
||||||
|
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange.documentsLabel:
|
||||||
|
message_id = message.id
|
||||||
|
break
|
||||||
|
|
||||||
|
if message_id:
|
||||||
|
doc_list_ref = f"docList:{message_id}:{exchange.documentsLabel}"
|
||||||
|
else:
|
||||||
|
# Fallback to label-only format if message ID not found
|
||||||
|
doc_list_ref = f"docList:{exchange.documentsLabel}"
|
||||||
|
|
||||||
|
logger.debug(f"Using document label for action planning: {exchange.documentsLabel} (message_id: {message_id})")
|
||||||
|
context += f"- {doc_list_ref} contains:\n"
|
||||||
|
# Generate docItem references for each document in the list
|
||||||
|
for doc_ref in exchange.documents:
|
||||||
|
if doc_ref.startswith("docItem:"):
|
||||||
|
context += f" - {doc_ref}\n"
|
||||||
|
else:
|
||||||
|
# Convert to proper docItem format if needed
|
||||||
|
context += f" - docItem:{doc_ref}\n"
|
||||||
|
context += "\n"
|
||||||
|
|
||||||
|
# Process history exchanges (previous rounds)
|
||||||
|
if document_list["history"]:
|
||||||
|
context += "WORKFLOW HISTORY DOCUMENTS:\n"
|
||||||
|
for exchange in document_list["history"]:
|
||||||
|
# Generate docList reference for the exchange (using message ID and label)
|
||||||
|
# Find the message that corresponds to this exchange
|
||||||
|
message_id = None
|
||||||
|
for message in service_center.workflow.messages:
|
||||||
|
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange.documentsLabel:
|
||||||
|
message_id = message.id
|
||||||
|
break
|
||||||
|
|
||||||
|
if message_id:
|
||||||
|
doc_list_ref = f"docList:{message_id}:{exchange.documentsLabel}"
|
||||||
|
else:
|
||||||
|
# Fallback to label-only format if message ID not found
|
||||||
|
doc_list_ref = f"docList:{exchange.documentsLabel}"
|
||||||
|
|
||||||
|
logger.debug(f"Using history document label for action planning: {exchange.documentsLabel} (message_id: {message_id})")
|
||||||
|
context += f"- {doc_list_ref} contains:\n"
|
||||||
|
# Generate docItem references for each document in the list
|
||||||
|
for doc_ref in exchange.documents:
|
||||||
|
if doc_ref.startswith("docItem:"):
|
||||||
|
context += f" - {doc_ref}\n"
|
||||||
|
else:
|
||||||
|
# Convert to proper docItem format if needed
|
||||||
|
context += f" - docItem:{doc_ref}\n"
|
||||||
|
context += "\n"
|
||||||
|
|
||||||
|
if not document_list["chat"] and not document_list["history"]:
|
||||||
|
context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
|
||||||
|
|
||||||
|
return context
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error generating enhanced document context: {str(e)}")
|
||||||
|
return "NO DOCUMENTS AVAILABLE - Error generating document context."
|
||||||
|
|
||||||
# Prompt creation helpers
|
# Prompt creation helpers
|
||||||
|
|
||||||
def _getAvailableDocuments(workflow) -> str:
|
def _getAvailableDocuments(workflow) -> str:
|
||||||
|
|
@ -275,7 +439,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||||
|
|
||||||
async def createActionDefinitionPrompt(context: TaskContext, service) -> str:
|
async def createActionDefinitionPrompt(context: TaskContext, service) -> str:
|
||||||
"""Create enhanced prompt for action generation with user-friendly messages and enhanced document context"""
|
"""Create enhanced prompt for action generation with user-friendly messages and enhanced document context"""
|
||||||
methodList = service.getMethodsList()
|
methodList = getMethodsList(service)
|
||||||
method_actions = {}
|
method_actions = {}
|
||||||
for sig in methodList:
|
for sig in methodList:
|
||||||
if '.' in sig:
|
if '.' in sig:
|
||||||
|
|
@ -283,10 +447,10 @@ async def createActionDefinitionPrompt(context: TaskContext, service) -> str:
|
||||||
action = rest.split('(')[0]
|
action = rest.split('(')[0]
|
||||||
method_actions.setdefault(method, []).append((action, sig))
|
method_actions.setdefault(method, []).append((action, sig))
|
||||||
|
|
||||||
messageSummary = await service.summarizeChat(context.workflow.messages) if context.workflow else ""
|
messageSummary = await service.methodService.summarizeChat(context.workflow.messages) if context.workflow else ""
|
||||||
|
|
||||||
# Get enhanced document context using the new method
|
# Get enhanced document context using the new method
|
||||||
available_documents_str = service.getEnhancedDocumentContext()
|
available_documents_str = getEnhancedDocumentContext(service)
|
||||||
|
|
||||||
# Get available documents and connections using generic functions
|
# Get available documents and connections using generic functions
|
||||||
available_docs_summary = _getAvailableDocuments(context.workflow)
|
available_docs_summary = _getAvailableDocuments(context.workflow)
|
||||||
|
|
@ -299,7 +463,7 @@ async def createActionDefinitionPrompt(context: TaskContext, service) -> str:
|
||||||
for method, actions in method_actions.items():
|
for method, actions in method_actions.items():
|
||||||
available_methods_json[method] = {}
|
available_methods_json[method] = {}
|
||||||
# Get the method instance for accessing docstrings
|
# Get the method instance for accessing docstrings
|
||||||
method_instance = service.methods.get(method, {}).get('instance') if hasattr(service, 'methods') else None
|
method_instance = methods.get(method, {}).get('instance') if methods else None
|
||||||
|
|
||||||
for action, sig in actions:
|
for action, sig in actions:
|
||||||
# Parse the signature to extract parameters
|
# Parse the signature to extract parameters
|
||||||
|
|
@ -405,7 +569,7 @@ Previous review feedback:
|
||||||
user_language = service.user.language if service and service.user else 'en'
|
user_language = service.user.language if service and service.user else 'en'
|
||||||
|
|
||||||
# Get current workflow context for dynamic examples
|
# Get current workflow context for dynamic examples
|
||||||
workflow_context = service.getWorkflowContext()
|
workflow_context = service.methodService.getWorkflowContext()
|
||||||
current_round = workflow_context.get('currentRound', 0)
|
current_round = workflow_context.get('currentRound', 0)
|
||||||
current_task = workflow_context.get('currentTask', 1)
|
current_task = workflow_context.get('currentTask', 1)
|
||||||
|
|
||||||
|
|
@ -730,7 +894,7 @@ def createResultReviewPrompt(context: ReviewContext, service) -> str:
|
||||||
document_validation_summary += f" - No documents produced\n"
|
document_validation_summary += f" - No documents produced\n"
|
||||||
|
|
||||||
# Get enhanced document context using the new method
|
# Get enhanced document context using the new method
|
||||||
document_context = service.getEnhancedDocumentContext()
|
document_context = getEnhancedDocumentContext(service)
|
||||||
|
|
||||||
# Get user language from service
|
# Get user language from service
|
||||||
user_language = service.user.language if service and service.user else 'en'
|
user_language = service.user.language if service and service.user else 'en'
|
||||||
|
|
@ -837,7 +1001,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||||
def _build_tiny_catalog(service) -> str:
|
def _build_tiny_catalog(service) -> str:
|
||||||
"""Return minimal tool catalog: method -> { action -> [paramNames] }"""
|
"""Return minimal tool catalog: method -> { action -> [paramNames] }"""
|
||||||
try:
|
try:
|
||||||
method_signatures = service.getMethodsList()
|
method_signatures = getMethodsList(service)
|
||||||
except Exception:
|
except Exception:
|
||||||
method_signatures = []
|
method_signatures = []
|
||||||
catalog: Dict[str, Dict[str, List[str]]] = {}
|
catalog: Dict[str, Dict[str, List[str]]] = {}
|
||||||
|
|
@ -890,8 +1054,8 @@ def createActionParameterPrompt(context: TaskContext, selected_action: Dict[str,
|
||||||
|
|
||||||
# Get action signature from service center
|
# Get action signature from service center
|
||||||
action_signature = ""
|
action_signature = ""
|
||||||
if service and hasattr(service, 'methods') and method in service.methods:
|
if service and method in methods:
|
||||||
method_instance = service.methods[method]['instance']
|
method_instance = methods[method]['instance']
|
||||||
action_signature = method_instance.getActionSignature(name)
|
action_signature = method_instance.getActionSignature(name)
|
||||||
|
|
||||||
return f"""Provide only the required parameters for this action.
|
return f"""Provide only the required parameters for this action.
|
||||||
|
|
@ -6,20 +6,20 @@ import asyncio
|
||||||
|
|
||||||
from modules.interfaces.interfaceAppObjects import User
|
from modules.interfaces.interfaceAppObjects import User
|
||||||
|
|
||||||
from modules.interfaces.interfaceChatModel import (UserInputRequest, ChatMessage, ChatWorkflow, TaskItem, TaskStatus)
|
from modules.interfaces.interfaceChatModel import (UserInputRequest, ChatMessage, ChatWorkflow, TaskItem, TaskStatus, ChatDocument)
|
||||||
from modules.interfaces.interfaceChatObjects import ChatObjects
|
from modules.interfaces.interfaceChatObjects import ChatObjects
|
||||||
from modules.workflows._transfer.handlingTasks import HandlingTasks, WorkflowStoppedException
|
from modules.workflows.processing.handlingTasks import HandlingTasks, WorkflowStoppedException
|
||||||
from modules.interfaces.interfaceChatModel import WorkflowResult
|
from modules.interfaces.interfaceChatModel import WorkflowResult
|
||||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||||
|
import uuid
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class WorkflowManager:
|
class WorkflowManager:
|
||||||
"""Manager for workflow processing and coordination"""
|
"""Manager for workflow processing and coordination"""
|
||||||
|
|
||||||
def __init__(self, chatInterface: ChatObjects, currentUser: User):
|
def __init__(self, services):
|
||||||
self.chatInterface = chatInterface
|
self.services = services
|
||||||
self.currentUser = currentUser
|
|
||||||
self.handlingTasks = None
|
self.handlingTasks = None
|
||||||
|
|
||||||
# Exported functions
|
# Exported functions
|
||||||
|
|
@ -32,19 +32,22 @@ class WorkflowManager:
|
||||||
currentTime = get_utc_timestamp()
|
currentTime = get_utc_timestamp()
|
||||||
|
|
||||||
if workflowId:
|
if workflowId:
|
||||||
workflow = self.chatInterface.getWorkflow(workflowId)
|
workflow = self.services.getWorkflow(workflowId)
|
||||||
if not workflow:
|
if not workflow:
|
||||||
raise ValueError(f"Workflow {workflowId} not found")
|
raise ValueError(f"Workflow {workflowId} not found")
|
||||||
|
|
||||||
|
# Add workflow to services
|
||||||
|
self.services.workflow = workflow
|
||||||
|
|
||||||
if workflow.status == "running":
|
if workflow.status == "running":
|
||||||
logger.info(f"Stopping running workflow {workflowId} before processing new prompt")
|
logger.info(f"Stopping running workflow {workflowId} before processing new prompt")
|
||||||
workflow.status = "stopped"
|
workflow.status = "stopped"
|
||||||
workflow.lastActivity = currentTime
|
workflow.lastActivity = currentTime
|
||||||
self.chatInterface.updateWorkflow(workflowId, {
|
self.services.updateWorkflow(workflowId, {
|
||||||
"status": "stopped",
|
"status": "stopped",
|
||||||
"lastActivity": currentTime
|
"lastActivity": currentTime
|
||||||
})
|
})
|
||||||
self.chatInterface.createLog({
|
self.services.createLog({
|
||||||
"workflowId": workflowId,
|
"workflowId": workflowId,
|
||||||
"message": "Workflow stopped for new prompt",
|
"message": "Workflow stopped for new prompt",
|
||||||
"type": "info",
|
"type": "info",
|
||||||
|
|
@ -54,17 +57,17 @@ class WorkflowManager:
|
||||||
await asyncio.sleep(0.1)
|
await asyncio.sleep(0.1)
|
||||||
|
|
||||||
newRound = workflow.currentRound + 1
|
newRound = workflow.currentRound + 1
|
||||||
self.chatInterface.updateWorkflow(workflowId, {
|
self.services.updateWorkflow(workflowId, {
|
||||||
"status": "running",
|
"status": "running",
|
||||||
"lastActivity": currentTime,
|
"lastActivity": currentTime,
|
||||||
"currentRound": newRound
|
"currentRound": newRound
|
||||||
})
|
})
|
||||||
|
|
||||||
workflow = self.chatInterface.getWorkflow(workflowId)
|
workflow = self.services.getWorkflow(workflowId)
|
||||||
if not workflow:
|
if not workflow:
|
||||||
raise ValueError(f"Failed to reload workflow {workflowId} after update")
|
raise ValueError(f"Failed to reload workflow {workflowId} after update")
|
||||||
|
|
||||||
self.chatInterface.createLog({
|
self.services.createLog({
|
||||||
"workflowId": workflowId,
|
"workflowId": workflowId,
|
||||||
"message": f"Workflow resumed (round {workflow.currentRound})",
|
"message": f"Workflow resumed (round {workflow.currentRound})",
|
||||||
"type": "info",
|
"type": "info",
|
||||||
|
|
@ -82,7 +85,7 @@ class WorkflowManager:
|
||||||
"currentAction": 0,
|
"currentAction": 0,
|
||||||
"totalTasks": 0,
|
"totalTasks": 0,
|
||||||
"totalActions": 0,
|
"totalActions": 0,
|
||||||
"mandateId": self.chatInterface.mandateId,
|
"mandateId": self.services.mandateId,
|
||||||
"messageIds": [],
|
"messageIds": [],
|
||||||
"workflowMode": workflowMode,
|
"workflowMode": workflowMode,
|
||||||
"maxSteps": 5 if workflowMode == "React" else 1, # Set maxSteps for React mode
|
"maxSteps": 5 if workflowMode == "React" else 1, # Set maxSteps for React mode
|
||||||
|
|
@ -96,12 +99,15 @@ class WorkflowManager:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
workflow = self.chatInterface.createWorkflow(workflowData)
|
workflow = self.services.createWorkflow(workflowData)
|
||||||
logger.info(f"Created workflow with mode: {getattr(workflow, 'workflowMode', 'NOT_SET')}")
|
logger.info(f"Created workflow with mode: {getattr(workflow, 'workflowMode', 'NOT_SET')}")
|
||||||
logger.info(f"Workflow data passed: {workflowData.get('workflowMode', 'NOT_IN_DATA')}")
|
logger.info(f"Workflow data passed: {workflowData.get('workflowMode', 'NOT_IN_DATA')}")
|
||||||
workflow.currentRound = 1
|
workflow.currentRound = 1
|
||||||
self.chatInterface.updateWorkflow(workflow.id, {"currentRound": 1})
|
self.services.updateWorkflow(workflow.id, {"currentRound": 1})
|
||||||
self.chatInterface.updateWorkflowStats(workflow.id, bytesSent=0, bytesReceived=0)
|
self.services.updateWorkflowStats(workflow.id, bytesSent=0, bytesReceived=0)
|
||||||
|
|
||||||
|
# Add workflow to services
|
||||||
|
self.services.workflow = workflow
|
||||||
|
|
||||||
# Start workflow processing asynchronously
|
# Start workflow processing asynchronously
|
||||||
asyncio.create_task(self._workflowProcess(userInput, workflow))
|
asyncio.create_task(self._workflowProcess(userInput, workflow))
|
||||||
|
|
@ -114,17 +120,17 @@ class WorkflowManager:
|
||||||
async def workflowStop(self, workflowId: str) -> ChatWorkflow:
|
async def workflowStop(self, workflowId: str) -> ChatWorkflow:
|
||||||
"""Stops a running workflow."""
|
"""Stops a running workflow."""
|
||||||
try:
|
try:
|
||||||
workflow = self.chatInterface.getWorkflow(workflowId)
|
workflow = self.services.getWorkflow(workflowId)
|
||||||
if not workflow:
|
if not workflow:
|
||||||
raise ValueError(f"Workflow {workflowId} not found")
|
raise ValueError(f"Workflow {workflowId} not found")
|
||||||
|
|
||||||
workflow.status = "stopped"
|
workflow.status = "stopped"
|
||||||
workflow.lastActivity = get_utc_timestamp()
|
workflow.lastActivity = get_utc_timestamp()
|
||||||
self.chatInterface.updateWorkflow(workflowId, {
|
self.services.updateWorkflow(workflowId, {
|
||||||
"status": "stopped",
|
"status": "stopped",
|
||||||
"lastActivity": workflow.lastActivity
|
"lastActivity": workflow.lastActivity
|
||||||
})
|
})
|
||||||
self.chatInterface.createLog({
|
self.services.createLog({
|
||||||
"workflowId": workflowId,
|
"workflowId": workflowId,
|
||||||
"message": "Workflow stopped",
|
"message": "Workflow stopped",
|
||||||
"type": "warning",
|
"type": "warning",
|
||||||
|
|
@ -141,8 +147,7 @@ class WorkflowManager:
|
||||||
async def _workflowProcess(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> None:
|
async def _workflowProcess(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> None:
|
||||||
"""Process a workflow with user input"""
|
"""Process a workflow with user input"""
|
||||||
try:
|
try:
|
||||||
self.handlingTasks = HandlingTasks(self.chatInterface, self.currentUser, workflow)
|
self.handlingTasks = HandlingTasks(self.services, workflow)
|
||||||
self.handlingTasks.service.setUserLanguage(userInput.userLanguage)
|
|
||||||
message = await self._sendFirstMessage(userInput, workflow)
|
message = await self._sendFirstMessage(userInput, workflow)
|
||||||
task_plan = await self._planTasks(userInput, workflow)
|
task_plan = await self._planTasks(userInput, workflow)
|
||||||
workflow_result = await self._executeTasks(task_plan, workflow)
|
workflow_result = await self._executeTasks(task_plan, workflow)
|
||||||
|
|
@ -187,20 +192,20 @@ class WorkflowManager:
|
||||||
}
|
}
|
||||||
|
|
||||||
# Create message first to get messageId
|
# Create message first to get messageId
|
||||||
message = self.chatInterface.createMessage(messageData)
|
message = self.services.createMessage(messageData)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
|
|
||||||
# Clear trace log for new workflow session
|
# Clear trace log for new workflow session
|
||||||
self.handlingTasks.service.clearTraceLog()
|
self.handlingTasks.clearTraceLog()
|
||||||
|
|
||||||
# Add documents if any, now with messageId
|
# Add documents if any, now with messageId
|
||||||
if userInput.listFileId:
|
if userInput.listFileId:
|
||||||
# Process file IDs and add to message data
|
# Process file IDs and add to message data
|
||||||
documents = await self.handlingTasks.service.processFileIds(userInput.listFileId, message.id)
|
documents = await self._processFileIds(userInput.listFileId, message.id)
|
||||||
message.documents = documents
|
message.documents = documents
|
||||||
# Update the message with documents in database
|
# Update the message with documents in database
|
||||||
self.chatInterface.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]})
|
self.services.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]})
|
||||||
|
|
||||||
return message
|
return message
|
||||||
else:
|
else:
|
||||||
|
|
@ -302,14 +307,14 @@ class WorkflowManager:
|
||||||
"taskProgress": "stopped",
|
"taskProgress": "stopped",
|
||||||
"actionProgress": "stopped"
|
"actionProgress": "stopped"
|
||||||
}
|
}
|
||||||
message = self.chatInterface.createMessage(stopped_message)
|
message = self.services.createMessage(stopped_message)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
|
|
||||||
# Update workflow status to stopped
|
# Update workflow status to stopped
|
||||||
workflow.status = "stopped"
|
workflow.status = "stopped"
|
||||||
workflow.lastActivity = get_utc_timestamp()
|
workflow.lastActivity = get_utc_timestamp()
|
||||||
self.chatInterface.updateWorkflow(workflow.id, {
|
self.services.updateWorkflow(workflow.id, {
|
||||||
"status": "stopped",
|
"status": "stopped",
|
||||||
"lastActivity": workflow.lastActivity
|
"lastActivity": workflow.lastActivity
|
||||||
})
|
})
|
||||||
|
|
@ -334,14 +339,14 @@ class WorkflowManager:
|
||||||
"taskProgress": "stopped",
|
"taskProgress": "stopped",
|
||||||
"actionProgress": "stopped"
|
"actionProgress": "stopped"
|
||||||
}
|
}
|
||||||
message = self.chatInterface.createMessage(stopped_message)
|
message = self.services.createMessage(stopped_message)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
|
|
||||||
# Update workflow status to stopped
|
# Update workflow status to stopped
|
||||||
workflow.status = "stopped"
|
workflow.status = "stopped"
|
||||||
workflow.lastActivity = get_utc_timestamp()
|
workflow.lastActivity = get_utc_timestamp()
|
||||||
self.chatInterface.updateWorkflow(workflow.id, {
|
self.services.updateWorkflow(workflow.id, {
|
||||||
"status": "stopped",
|
"status": "stopped",
|
||||||
"lastActivity": workflow.lastActivity,
|
"lastActivity": workflow.lastActivity,
|
||||||
"totalTasks": workflow.totalTasks,
|
"totalTasks": workflow.totalTasks,
|
||||||
|
|
@ -349,7 +354,7 @@ class WorkflowManager:
|
||||||
})
|
})
|
||||||
|
|
||||||
# Add stopped log entry
|
# Add stopped log entry
|
||||||
self.chatInterface.createLog({
|
self.services.createLog({
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
"message": "Workflow stopped by user",
|
"message": "Workflow stopped by user",
|
||||||
"type": "warning",
|
"type": "warning",
|
||||||
|
|
@ -376,14 +381,14 @@ class WorkflowManager:
|
||||||
"taskProgress": "fail",
|
"taskProgress": "fail",
|
||||||
"actionProgress": "fail"
|
"actionProgress": "fail"
|
||||||
}
|
}
|
||||||
message = self.chatInterface.createMessage(error_message)
|
message = self.services.createMessage(error_message)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
|
|
||||||
# Update workflow status to failed
|
# Update workflow status to failed
|
||||||
workflow.status = "failed"
|
workflow.status = "failed"
|
||||||
workflow.lastActivity = get_utc_timestamp()
|
workflow.lastActivity = get_utc_timestamp()
|
||||||
self.chatInterface.updateWorkflow(workflow.id, {
|
self.services.updateWorkflow(workflow.id, {
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"lastActivity": workflow.lastActivity,
|
"lastActivity": workflow.lastActivity,
|
||||||
"totalTasks": workflow.totalTasks,
|
"totalTasks": workflow.totalTasks,
|
||||||
|
|
@ -391,7 +396,7 @@ class WorkflowManager:
|
||||||
})
|
})
|
||||||
|
|
||||||
# Add failed log entry
|
# Add failed log entry
|
||||||
self.chatInterface.createLog({
|
self.services.createLog({
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
"message": f"Workflow failed: {workflow_result.error or 'Unknown error'}",
|
"message": f"Workflow failed: {workflow_result.error or 'Unknown error'}",
|
||||||
"type": "error",
|
"type": "error",
|
||||||
|
|
@ -423,14 +428,14 @@ class WorkflowManager:
|
||||||
"taskProgress": "fail",
|
"taskProgress": "fail",
|
||||||
"actionProgress": "fail"
|
"actionProgress": "fail"
|
||||||
}
|
}
|
||||||
message = self.chatInterface.createMessage(error_message)
|
message = self.services.createMessage(error_message)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
|
|
||||||
# Update workflow status to failed
|
# Update workflow status to failed
|
||||||
workflow.status = "failed"
|
workflow.status = "failed"
|
||||||
workflow.lastActivity = get_utc_timestamp()
|
workflow.lastActivity = get_utc_timestamp()
|
||||||
self.chatInterface.updateWorkflow(workflow.id, {
|
self.services.updateWorkflow(workflow.id, {
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"lastActivity": workflow.lastActivity,
|
"lastActivity": workflow.lastActivity,
|
||||||
"totalTasks": workflow.totalTasks,
|
"totalTasks": workflow.totalTasks,
|
||||||
|
|
@ -468,7 +473,7 @@ class WorkflowManager:
|
||||||
}
|
}
|
||||||
|
|
||||||
# Create message using interface
|
# Create message using interface
|
||||||
message = self.chatInterface.createMessage(messageData)
|
message = self.services.createMessage(messageData)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
|
|
||||||
|
|
@ -477,13 +482,13 @@ class WorkflowManager:
|
||||||
workflow.lastActivity = get_utc_timestamp()
|
workflow.lastActivity = get_utc_timestamp()
|
||||||
|
|
||||||
# Update workflow in database
|
# Update workflow in database
|
||||||
self.chatInterface.updateWorkflow(workflow.id, {
|
self.services.updateWorkflow(workflow.id, {
|
||||||
"status": "completed",
|
"status": "completed",
|
||||||
"lastActivity": workflow.lastActivity
|
"lastActivity": workflow.lastActivity
|
||||||
})
|
})
|
||||||
|
|
||||||
# Add completion log entry
|
# Add completion log entry
|
||||||
self.chatInterface.createLog({
|
self.services.createLog({
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
"message": "Workflow completed",
|
"message": "Workflow completed",
|
||||||
"type": "success",
|
"type": "success",
|
||||||
|
|
@ -529,7 +534,7 @@ class WorkflowManager:
|
||||||
# Update workflow status to stopped
|
# Update workflow status to stopped
|
||||||
workflow.status = "stopped"
|
workflow.status = "stopped"
|
||||||
workflow.lastActivity = get_utc_timestamp()
|
workflow.lastActivity = get_utc_timestamp()
|
||||||
self.chatInterface.updateWorkflow(workflow.id, {
|
self.services.updateWorkflow(workflow.id, {
|
||||||
"status": "stopped",
|
"status": "stopped",
|
||||||
"lastActivity": workflow.lastActivity,
|
"lastActivity": workflow.lastActivity,
|
||||||
"totalTasks": workflow.totalTasks,
|
"totalTasks": workflow.totalTasks,
|
||||||
|
|
@ -554,12 +559,12 @@ class WorkflowManager:
|
||||||
"taskProgress": "pending",
|
"taskProgress": "pending",
|
||||||
"actionProgress": "pending"
|
"actionProgress": "pending"
|
||||||
}
|
}
|
||||||
message = self.chatInterface.createMessage(stopped_message)
|
message = self.services.createMessage(stopped_message)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
|
|
||||||
# Add log entry
|
# Add log entry
|
||||||
self.chatInterface.createLog({
|
self.services.createLog({
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
"message": "Workflow stopped by user",
|
"message": "Workflow stopped by user",
|
||||||
"type": "warning",
|
"type": "warning",
|
||||||
|
|
@ -574,7 +579,7 @@ class WorkflowManager:
|
||||||
# Update workflow status to failed
|
# Update workflow status to failed
|
||||||
workflow.status = "failed"
|
workflow.status = "failed"
|
||||||
workflow.lastActivity = get_utc_timestamp()
|
workflow.lastActivity = get_utc_timestamp()
|
||||||
self.chatInterface.updateWorkflow(workflow.id, {
|
self.services.updateWorkflow(workflow.id, {
|
||||||
"status": "failed",
|
"status": "failed",
|
||||||
"lastActivity": workflow.lastActivity,
|
"lastActivity": workflow.lastActivity,
|
||||||
"totalTasks": workflow.totalTasks,
|
"totalTasks": workflow.totalTasks,
|
||||||
|
|
@ -599,12 +604,12 @@ class WorkflowManager:
|
||||||
"taskProgress": "fail",
|
"taskProgress": "fail",
|
||||||
"actionProgress": "fail"
|
"actionProgress": "fail"
|
||||||
}
|
}
|
||||||
message = self.chatInterface.createMessage(error_message)
|
message = self.services.createMessage(error_message)
|
||||||
if message:
|
if message:
|
||||||
workflow.messages.append(message)
|
workflow.messages.append(message)
|
||||||
|
|
||||||
# Add error log entry
|
# Add error log entry
|
||||||
self.chatInterface.createLog({
|
self.services.createLog({
|
||||||
"workflowId": workflow.id,
|
"workflowId": workflow.id,
|
||||||
"message": f"Workflow failed: {str(error)}",
|
"message": f"Workflow failed: {str(error)}",
|
||||||
"type": "error",
|
"type": "error",
|
||||||
|
|
@ -613,3 +618,32 @@ class WorkflowManager:
|
||||||
})
|
})
|
||||||
|
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
async def _processFileIds(self, fileIds: List[str], messageId: str = None) -> List[ChatDocument]:
|
||||||
|
"""Process file IDs from existing files and return ChatDocument objects"""
|
||||||
|
documents = []
|
||||||
|
for fileId in fileIds:
|
||||||
|
try:
|
||||||
|
# Get file info from service
|
||||||
|
fileInfo = self.handlingTasks.service.methodService.getFileInfo(fileId)
|
||||||
|
if fileInfo:
|
||||||
|
# Create document directly with all file attributes
|
||||||
|
document = ChatDocument(
|
||||||
|
id=str(uuid.uuid4()),
|
||||||
|
messageId=messageId or "", # Use provided messageId or empty string as fallback
|
||||||
|
fileId=fileId,
|
||||||
|
fileName=fileInfo.get("fileName", "unknown"),
|
||||||
|
fileSize=fileInfo.get("size", 0),
|
||||||
|
mimeType=fileInfo.get("mimeType", "application/octet-stream")
|
||||||
|
)
|
||||||
|
documents.append(document)
|
||||||
|
logger.info(f"Processed file ID {fileId} -> {document.fileName}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"No file info found for file ID {fileId}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing file ID {fileId}: {str(e)}")
|
||||||
|
return documents
|
||||||
|
|
||||||
|
def _setUserLanguage(self, language: str) -> None:
|
||||||
|
"""Set user language for the service center"""
|
||||||
|
self.handlingTasks.service.user.language = language
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ def parse_line(line: str) -> Tuple[Optional[str], Optional[str], Optional[dateti
|
||||||
Extract (logger, function, timestamp) from a log line.
|
Extract (logger, function, timestamp) from a log line.
|
||||||
|
|
||||||
Expected format examples (single line):
|
Expected format examples (single line):
|
||||||
2025-09-18 16:35:04 - INFO - modules.workflows._transfer.handlingTasks - Task 1 - Starting action 3/4 - D:\\Athi\\...\\handlingTasks.py:572 - executeTask
|
2025-09-18 16:35:04 - INFO - modules.workflows.processing.handlingTasks - Task 1 - Starting action 3/4 - D:\\Athi\\...\\handlingTasks.py:572 - executeTask
|
||||||
|
|
||||||
Returns (logger, function, timestamp_dt) or (None, None, None) if not matched.
|
Returns (logger, function, timestamp_dt) or (None, None, None) if not matched.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue