Refactor full workflow engine 3.0

This commit is contained in:
ValueOn AG 2025-09-23 22:47:54 +02:00
parent 1019cb7a65
commit 472353fea0
40 changed files with 2605 additions and 3235 deletions

49
app.py
View file

@ -4,7 +4,7 @@ os.environ["NUMEXPR_MAX_THREADS"] = "12"
from fastapi import FastAPI, HTTPException, Depends, Body, status, Response
from fastapi.middleware.cors import CORSMiddleware
from contextlib import asynccontextmanager
from zoneinfo import ZoneInfo
import logging
from logging.handlers import RotatingFileHandler
@ -12,8 +12,7 @@ from datetime import timedelta, datetime
import pathlib
from modules.shared.configuration import APP_CONFIG
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from modules.shared.eventManagement import eventManager
class DailyRotatingFileHandler(RotatingFileHandler):
@ -202,46 +201,15 @@ instanceLabel = APP_CONFIG.get("APP_ENV_LABEL")
# Define lifespan context manager for application startup/shutdown events
@asynccontextmanager
async def lifespan(app: FastAPI):
# Startup logic
logger.info("Application is starting up")
# Setup APScheduler for JIRA sync
scheduler = AsyncIOScheduler(timezone=ZoneInfo("Europe/Zurich"))
try:
from modules.features.syncDelta.mainSyncDelta import perform_sync_jira_delta_group
# Schedule sync every 20 minutes (at minutes 00, 20, 40)
scheduler.add_job(
perform_sync_jira_delta_group,
CronTrigger(minute="0,20,40"),
id="jira_delta_group_sync",
replace_existing=True,
coalesce=True,
max_instances=1,
misfire_grace_time=1800,
)
scheduler.start()
logger.info("APScheduler started (jira_delta_group_sync every 20 minutes at 00, 20, 40)")
# Run initial sync on startup (non-blocking failure)
try:
logger.info("Running initial JIRA sync on app startup...")
await perform_sync_jira_delta_group()
logger.info("Initial JIRA sync completed successfully")
except Exception as e:
logger.error(f"Initial JIRA sync failed: {str(e)}")
except Exception as e:
logger.error(f"Failed to initialize scheduler or JIRA sync: {str(e)}")
eventManager.start()
yield
# Shutdown logic
eventManager.stop()
logger.info("Application has been shut down")
try:
if 'scheduler' in locals() and scheduler.running:
scheduler.shutdown(wait=False)
logger.info("APScheduler stopped")
except Exception as e:
logger.error(f"Error shutting down scheduler: {str(e)}")
# START APP
app = FastAPI(
@ -250,7 +218,6 @@ app = FastAPI(
lifespan=lifespan
)
# Parse CORS origins from environment variable
def get_allowed_origins():
origins_str = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080")

View file

@ -24,8 +24,11 @@ async def chatStart(interfaceChat, currentUser: User, userInput: UserInputReques
"""
try:
from modules.workflows.workflowManager import WorkflowManager
workflowManager = WorkflowManager(interfaceChat, currentUser)
return await workflowManager.workflowStart(userInput, workflowId, workflowMode)
from modules.services import getInterface as getServices
services = getServices(currentUser, None)
workflowManager = WorkflowManager(services)
workflow = await workflowManager.workflowStart(userInput, workflowId, workflowMode)
return workflow
except Exception as e:
logger.error(f"Error starting chat: {str(e)}")
raise
@ -34,7 +37,9 @@ async def chatStop(interfaceChat, currentUser: User, workflowId: str) -> ChatWor
"""Stops a running chat."""
try:
from modules.workflows.workflowManager import WorkflowManager
workflowManager = WorkflowManager(interfaceChat, currentUser)
from modules.services import getInterface as getServices
services = getServices(currentUser, None)
workflowManager = WorkflowManager(services)
return await workflowManager.workflowStop(workflowId)
except Exception as e:
logger.error(f"Error stopping chat: {str(e)}")

View file

@ -1,587 +1,285 @@
"""
Data Neutralization Service
Handles file processing for data neutralization including SharePoint integration
"""
import logging
import os
import uuid
from typing import Dict, List, Any, Optional, Tuple
from datetime import datetime
from pathlib import Path
import mimetypes
from typing import Any, Dict, List, Optional
from modules.interfaces.interfaceAppObjects import getInterface
from modules.interfaces.interfaceAppModel import User, DataNeutraliserConfig, DataNeutralizerAttributes
from modules.services.serviceNeutralization.neutralizer import DataAnonymizer
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.interfaces.interfaceAppModel import User
from modules.services.serviceNeutralization.mainNeutralization import NeutralizationService
logger = logging.getLogger(__name__)
class NeutralizationService:
"""Service for handling data neutralization operations"""
def __init__(self, current_user: User):
"""Initialize the service with user context"""
self.current_user = current_user
self.app_interface = getInterface(current_user)
def get_config(self) -> Optional[DataNeutraliserConfig]:
"""Get the neutralization configuration for the current user's mandate"""
return self.app_interface.getNeutralizationConfig()
def save_config(self, config_data: Dict[str, Any]) -> DataNeutraliserConfig:
"""Save or update the neutralization configuration"""
return self.app_interface.createOrUpdateNeutralizationConfig(config_data)
def neutralize_text(self, text: str, file_id: Optional[str] = None) -> Dict[str, Any]:
"""Neutralize text content and return results with attribute mappings"""
return self.app_interface.neutralizeText(text, file_id)
def get_attributes(self, file_id: Optional[str] = None) -> List[DataNeutralizerAttributes]:
"""Get neutralization attributes, optionally filtered by file ID"""
return self.app_interface.getNeutralizationAttributes(file_id)
def resolve_text(self, text: str) -> str:
"""Resolve UIDs in neutralized text back to original text"""
return self.app_interface.resolveNeutralizedText(text)
async def process_sharepoint_files(self, source_path: str, target_path: str) -> Dict[str, Any]:
"""
Process files from SharePoint source path, neutralize them, and store in target path
Args:
source_path: SharePoint path to read files from
target_path: SharePoint path to store neutralized files
Returns:
Dictionary with processing results
"""
class NeutralizationPlayground:
"""Feature/UI wrapper around NeutralizationService for playground & routes."""
def __init__(self, currentUser: User):
self.currentUser = currentUser
self.service = NeutralizationService(currentUser)
def processText(self, text: str) -> Dict[str, Any]:
return self.service.processText(text)
def processFiles(self, fileIds: List[str]) -> Dict[str, Any]:
results: List[Dict[str, Any]] = []
errors: List[str] = []
for fileId in fileIds:
try:
res = self.service.processFile(fileId)
results.append({
'file_id': fileId,
'neutralized_file_name': res.get('neutralized_file_name'),
'attributes_count': len(res.get('attributes', []))
})
except Exception as e:
logger.error(f"Error processing file {fileId}: {str(e)}")
errors.append(f"{fileId}: {str(e)}")
return {
'success': len(errors) == 0,
'total_files': len(fileIds),
'successful_files': len(results),
'failed_files': len(errors),
'results': results,
'errors': errors,
}
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
from modules.features.neutralizePlayground.sharepoint import SharepointProcessor
processor = SharepointProcessor(self.currentUser, self.service)
return await processor.processSharepointFiles(sourcePath, targetPath)
# Cleanup attributes
def cleanAttributes(self, fileId: str) -> bool:
if not self.service.app_interface:
return False
return self.service.app_interface.deleteNeutralizationAttributes(fileId)
# Stats
def getStats(self) -> Dict[str, Any]:
try:
logger.info(f"Processing SharePoint files from {source_path} to {target_path}")
# Get user's SharePoint connection that matches the source path
sharepoint_connection = await self._get_sharepoint_connection(source_path)
if not sharepoint_connection:
allAttributes = self.service._getAttributes()
patternCounts: Dict[str, int] = {}
for attr in allAttributes:
patternType = attr.patternType
patternCounts[patternType] = patternCounts.get(patternType, 0) + 1
uniqueFiles = set(attr.fileId for attr in allAttributes if attr.fileId)
return {
'total_attributes': len(allAttributes),
'unique_files': len(uniqueFiles),
'pattern_counts': patternCounts,
'mandate_id': self.currentUser.mandateId if self.currentUser else None,
}
except Exception as e:
logger.error(f"Error getting stats: {str(e)}")
return {
'total_attributes': 0,
'unique_files': 0,
'pattern_counts': {},
'error': str(e),
}
# Internal SharePoint helper module separated to keep feature logic tidy
class SharepointProcessor:
def __init__(self, currentUser: User, service: NeutralizationService):
self.currentUser = currentUser
self.service = service
async def processSharepointFiles(self, sourcePath: str, targetPath: str) -> Dict[str, Any]:
try:
logger.info(f"Processing SharePoint files from {sourcePath} to {targetPath}")
connection = await self._getSharepointConnection(sourcePath)
if not connection:
return {
"success": False,
"message": "No SharePoint connection found for user",
"processed_files": 0,
"errors": ["No SharePoint connection found"]
'success': False,
'message': 'No SharePoint connection found for user',
'processed_files': 0,
'errors': ['No SharePoint connection found'],
}
logger.info(f"Using SharePoint connection: {sharepoint_connection.get('id')} for path: {source_path}")
# Get SharePoint access token
sharepoint_token = self.app_interface.getConnectionToken(sharepoint_connection["id"])
if not sharepoint_token:
from modules.security.tokenManager import TokenManager
token = TokenManager().getFreshToken(self.service.app_interface, connection['id'])
if not token:
return {
"success": False,
"message": "No SharePoint access token found",
"processed_files": 0,
"errors": ["No SharePoint access token found"]
'success': False,
'message': 'No SharePoint access token found',
'processed_files': 0,
'errors': ['No SharePoint access token found'],
}
# Process files asynchronously
return await self._process_sharepoint_files_async(
source_path, target_path, sharepoint_token.tokenAccess
)
return await self._processSharepointFilesAsync(sourcePath, targetPath, token.tokenAccess)
except Exception as e:
logger.error(f"Error processing SharePoint files: {str(e)}")
return {
"success": False,
"message": f"Error processing SharePoint files: {str(e)}",
"processed_files": 0,
"errors": [str(e)]
'success': False,
'message': f'Error processing SharePoint files: {str(e)}',
'processed_files': 0,
'errors': [str(e)],
}
async def _get_sharepoint_connection(self, sharepoint_path: str = None):
"""Get user's SharePoint connection that matches the given path"""
async def _getSharepointConnection(self, sharepointPath: str = None):
try:
# Get all user connections
from modules.interfaces.interfaceAppModel import UserConnection
connections = self.app_interface.db.getRecordset(
connections = self.service.app_interface.db.getRecordset(
UserConnection,
recordFilter={"userId": self.app_interface.userId}
recordFilter={"userId": self.service.app_interface.userId}
)
# Find all Microsoft connections
msft_connections = [conn for conn in connections if conn.get("authority") == "msft"]
if not msft_connections:
logger.warning("No Microsoft connections found for user")
msftConnections = [c for c in connections if c.get('authority') == 'msft']
if not msftConnections:
logger.warning('No Microsoft connections found for user')
return None
if len(msft_connections) == 1:
logger.info(f"Found single Microsoft connection: {msft_connections[0].get('id')}")
return msft_connections[0]
# If multiple connections and we have a path, try to match
if sharepoint_path:
return await self._match_connection_to_path(msft_connections, sharepoint_path)
# If no path provided, return the first one
logger.info(f"Multiple Microsoft connections found, using first one: {msft_connections[0].get('id')}")
return msft_connections[0]
except Exception as e:
logger.error(f"Error getting SharePoint connection: {str(e)}")
if len(msftConnections) == 1:
logger.info(f"Found single Microsoft connection: {msftConnections[0].get('id')}")
return msftConnections[0]
if sharepointPath:
return await self._matchConnectionToPath(msftConnections, sharepointPath)
logger.info(f"Multiple Microsoft connections found, using first one: {msftConnections[0].get('id')}")
return msftConnections[0]
except Exception:
logger.error('Error getting SharePoint connection')
return None
async def _match_connection_to_path(self, connections: list, sharepoint_path: str):
"""Match a connection to the SharePoint path by testing access"""
async def _matchConnectionToPath(self, connections: list, sharepointPath: str):
try:
# Extract domain from the path
from urllib.parse import urlparse
parsed_url = urlparse(sharepoint_path)
target_domain = parsed_url.netloc.lower()
logger.info(f"Looking for connection matching domain: {target_domain}")
# Try each connection to see which one can access the site
targetDomain = urlparse(sharepointPath).netloc.lower()
logger.info(f"Looking for connection matching domain: {targetDomain}")
from modules.security.tokenManager import TokenManager
for connection in connections:
try:
# Get token for this connection
token = self.app_interface.getConnectionToken(connection["id"])
token = TokenManager().getFreshToken(self.service.app_interface, connection['id'])
if not token:
continue
# Test if this connection can access the SharePoint site
if await self._test_sharepoint_access(token.tokenAccess, sharepoint_path):
logger.info(f"Found matching connection for domain {target_domain}: {connection.get('id')}")
if await self._testSharepointAccess(token.tokenAccess, sharepointPath):
logger.info(f"Found matching connection for domain {targetDomain}: {connection.get('id')}")
return connection
except Exception as e:
except Exception:
continue
# If no specific match found, return the first connection
logger.warning(f"No specific connection match found for {target_domain}, using first available")
logger.warning(f"No specific connection match found for {targetDomain}, using first available")
return connections[0]
except Exception as e:
logger.error(f"Error matching connection to path: {str(e)}")
except Exception:
logger.error('Error matching connection to path')
return connections[0] if connections else None
async def _test_sharepoint_access(self, access_token: str, sharepoint_path: str) -> bool:
"""Test if the access token can access the given SharePoint path"""
async def _testSharepointAccess(self, accessToken: str, sharepointPath: str) -> bool:
try:
return await self._test_sharepoint_access_async(access_token, sharepoint_path)
except Exception as e:
return await self._testSharepointAccessAsync(accessToken, sharepointPath)
except Exception:
return False
async def _test_sharepoint_access_async(self, access_token: str, sharepoint_path: str) -> bool:
"""Async test for SharePoint access"""
async def _testSharepointAccessAsync(self, accessToken: str, sharepointPath: str) -> bool:
try:
from modules.connectors.connectorSharepoint import ConnectorSharepoint
connector = ConnectorSharepoint(access_token=access_token)
# Parse the path to get site URL
site_url, _ = self._parse_sharepoint_path(sharepoint_path)
if not site_url:
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
connector = SharepointService(access_token=accessToken)
siteUrl, _ = self._parseSharepointPath(sharepointPath)
if not siteUrl:
return False
# Try to find the site
site_info = await connector.find_site_by_web_url(site_url)
return site_info is not None
except Exception as e:
siteInfo = await connector.find_site_by_web_url(siteUrl)
return siteInfo is not None
except Exception:
return False
async def _process_sharepoint_files_async(self, source_path: str, target_path: str, access_token: str) -> Dict[str, Any]:
"""Process SharePoint files asynchronously"""
async def _processSharepointFilesAsync(self, sourcePath: str, targetPath: str, accessToken: str) -> Dict[str, Any]:
try:
import asyncio
from modules.connectors.connectorSharepoint import ConnectorSharepoint
# Initialize SharePoint connector
connector = ConnectorSharepoint(access_token=access_token)
# Parse source and target paths to extract site and folder info
source_site, source_folder = self._parse_sharepoint_path(source_path)
target_site, target_folder = self._parse_sharepoint_path(target_path)
if not source_site or not target_site:
return {
"success": False,
"message": "Invalid SharePoint path format",
"processed_files": 0,
"errors": ["Invalid SharePoint path format"]
}
# Find source site
source_site_info = await connector.find_site_by_web_url(source_site)
if not source_site_info:
return {
"success": False,
"message": f"Source site not found: {source_site}",
"processed_files": 0,
"errors": [f"Source site not found: {source_site}"]
}
# Find target site
target_site_info = await connector.find_site_by_web_url(target_site)
if not target_site_info:
return {
"success": False,
"message": f"Target site not found: {target_site}",
"processed_files": 0,
"errors": [f"Target site not found: {target_site}"]
}
# List files in source folder
logger.info(f"Listing files in folder: {source_folder} for site: {source_site_info['id']}")
files = await connector.list_folder_contents(source_site_info["id"], source_folder)
# If no files found, try listing the root folder to see what's available
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
connector = SharepointService(access_token=accessToken)
sourceSite, sourceFolder = self._parseSharepointPath(sourcePath)
targetSite, targetFolder = self._parseSharepointPath(targetPath)
if not sourceSite or not targetSite:
return {'success': False, 'message': 'Invalid SharePoint path format', 'processed_files': 0, 'errors': ['Invalid SharePoint path format']}
sourceSiteInfo = await connector.find_site_by_web_url(sourceSite)
if not sourceSiteInfo:
return {'success': False, 'message': f'Source site not found: {sourceSite}', 'processed_files': 0, 'errors': [f'Source site not found: {sourceSite}']}
targetSiteInfo = await connector.find_site_by_web_url(targetSite)
if not targetSiteInfo:
return {'success': False, 'message': f'Target site not found: {targetSite}', 'processed_files': 0, 'errors': [f'Target site not found: {targetSite}']}
logger.info(f"Listing files in folder: {sourceFolder} for site: {sourceSiteInfo['id']}")
files = await connector.list_folder_contents(sourceSiteInfo['id'], sourceFolder)
if not files:
logger.warning(f"No files found in folder '{source_folder}', trying root folder")
files = await connector.list_folder_contents(source_site_info["id"], "")
logger.warning(f"No files found in folder '{sourceFolder}', trying root folder")
files = await connector.list_folder_contents(sourceSiteInfo['id'], '')
if files:
# List available folders for debugging
folders = [f for f in files if f.get("type") == "folder"]
folder_names = [f.get('name') for f in folders]
logger.info(f"Available folders in root: {folder_names}")
# Format folder list for better UI display
folder_list = ", ".join(folder_names) if folder_names else "None"
folders = [f for f in files if f.get('type') == 'folder']
folderNames = [f.get('name') for f in folders]
logger.info(f"Available folders in root: {folderNames}")
folderList = ", ".join(folderNames) if folderNames else "None"
return {
"success": False,
"message": f"Folder '{source_folder}' not found. Available folders in root: {folder_list}",
"processed_files": 0,
"errors": [f"Folder '{source_folder}' not found. Available folders: {folder_list}"],
"available_folders": folder_names
'success': False,
'message': f"Folder '{sourceFolder}' not found. Available folders in root: {folderList}",
'processed_files': 0,
'errors': [f"Folder '{sourceFolder}' not found. Available folders: {folderList}"],
'available_folders': folderNames,
}
else:
return {
"success": False,
"message": f"No files found in source folder: {source_folder}",
"processed_files": 0,
"errors": [f"No files found in source folder: {source_folder}"]
}
# Filter for text files only
text_files = [f for f in files if f.get("type") == "file" and self._is_text_file(f.get("name", ""))]
if not text_files:
return {
"success": False,
"message": "No text files found in source folder",
"processed_files": 0,
"errors": ["No text files found in source folder"]
}
# Process files in parallel for better performance
processed_files = []
errors = []
# Create tasks for parallel processing
async def process_single_file(file_info):
"""Process a single file - download, neutralize, upload"""
return {'success': False, 'message': f'No files found in source folder: {sourceFolder}', 'processed_files': 0, 'errors': [f'No files found in source folder: {sourceFolder}']}
textFiles = [f for f in files if f.get('type') == 'file']
processed: List[Dict[str, Any]] = []
errors: List[str] = []
async def _processSingle(fileInfo: Dict[str, Any]):
try:
# Download file
file_content = await connector.download_file(source_site_info["id"], file_info["id"])
if not file_content:
return {"error": f"Failed to download file: {file_info['name']}"}
# Convert to text
fileContent = await connector.download_file(sourceSiteInfo['id'], fileInfo['id'])
if not fileContent:
return {'error': f"Failed to download file: {fileInfo['name']}"}
try:
text_content = file_content.decode('utf-8')
textContent = fileContent.decode('utf-8')
except UnicodeDecodeError:
text_content = file_content.decode('latin-1')
# Neutralize the text
neutralization_result = self.app_interface.neutralizeText(text_content, file_info["id"])
# Create neutralized filename
neutralized_filename = f"neutralized_{file_info['name']}"
# Upload neutralized file
neutralized_content = neutralization_result["neutralized_text"].encode('utf-8')
upload_result = await connector.upload_file(
target_site_info["id"],
target_folder,
neutralized_filename,
neutralized_content
)
if "error" in upload_result:
return {"error": f"Failed to upload neutralized file: {neutralized_filename} - {upload_result['error']}"}
else:
return {
"success": True,
"original_name": file_info["name"],
"neutralized_name": neutralized_filename,
"attributes_count": len(neutralization_result.get("attributes", []))
}
textContent = fileContent.decode('latin-1')
result = self.service._neutralizeText(textContent, 'text')
neutralizedFilename = f"neutralized_{fileInfo['name']}"
uploadResult = await connector.upload_file(targetSiteInfo['id'], targetFolder, neutralizedFilename, result['neutralized_text'].encode('utf-8'))
if 'error' in uploadResult:
return {'error': f"Failed to upload neutralized file: {neutralizedFilename} - {uploadResult['error']}"}
return {
'success': True,
'original_name': fileInfo['name'],
'neutralized_name': neutralizedFilename,
'attributes_count': len(result.get('attributes', [])),
}
except Exception as e:
error_msg = f"Error processing file {file_info['name']}: {str(e)}"
logger.error(error_msg)
return {"error": error_msg}
# Process all files in parallel
logger.info(f"Processing {len(text_files)} files in parallel...")
tasks = [process_single_file(file_info) for file_info in text_files]
return {'error': f"Error processing file {fileInfo['name']}: {str(e)}"}
tasks = [ _processSingle(f) for f in textFiles ]
results = await asyncio.gather(*tasks, return_exceptions=True)
# Process results
for i, result in enumerate(results):
if isinstance(result, Exception):
error_msg = f"Exception processing file {text_files[i]['name']}: {str(result)}"
errors.append(error_msg)
logger.error(error_msg)
elif isinstance(result, dict) and "error" in result:
errors.append(result["error"])
elif isinstance(result, dict) and result.get("success"):
processed_files.append({
"original_name": result["original_name"],
"neutralized_name": result["neutralized_name"],
"attributes_count": result["attributes_count"]
for i, r in enumerate(results):
if isinstance(r, Exception):
errors.append(f"Exception processing file {textFiles[i]['name']}: {str(r)}")
elif isinstance(r, dict) and 'error' in r:
errors.append(r['error'])
elif isinstance(r, dict) and r.get('success'):
processed.append({
'original_name': r['original_name'],
'neutralized_name': r['neutralized_name'],
'attributes_count': r['attributes_count'],
})
logger.info(f"Successfully processed file: {result['original_name']} -> {result['neutralized_name']}")
else:
error_msg = f"Unknown result processing file {text_files[i]['name']}: {result}"
errors.append(error_msg)
logger.error(error_msg)
errors.append(f"Unknown result processing file {textFiles[i]['name']}: {r}")
return {
"success": len(processed_files) > 0,
"message": f"Processed {len(processed_files)} files successfully",
"processed_files": len(processed_files),
"files": processed_files,
"errors": errors
'success': len(processed) > 0,
'message': f"Processed {len(processed)} files successfully",
'processed_files': len(processed),
'files': processed,
'errors': errors,
}
except Exception as e:
logger.error(f"Error in async SharePoint processing: {str(e)}")
return {
"success": False,
"message": f"Error in async SharePoint processing: {str(e)}",
"processed_files": 0,
"errors": [str(e)]
}
def _parse_sharepoint_path(self, path: str) -> tuple[str, str]:
"""Parse SharePoint path to extract site URL and folder path"""
return {'success': False, 'message': f'Error in async SharePoint processing: {str(e)}', 'processed_files': 0, 'errors': [str(e)]}
def _parseSharepointPath(self, path: str) -> tuple[str, str]:
try:
# Expected format: https://domain.sharepoint.com/sites/sitename/folder/path
if not path.startswith("https://"):
if not path.startswith('https://'):
return None, None
# Remove query parameters
if "?" in path:
path = path.split("?")[0]
# Split by /sites/
if "/sites/" not in path:
if '?' in path:
path = path.split('?')[0]
if '/sites/' not in path:
return None, None
parts = path.split("/sites/", 1)
parts = path.split('/sites/', 1)
if len(parts) != 2:
return None, None
# Extract domain and site name
domain = parts[0].replace("https://", "")
site_name = parts[1].split("/")[0]
# Create proper site URL for Graph API
site_url = f"https://{domain}/sites/{site_name}"
# Extract folder path (everything after the site name)
folder_parts = parts[1].split("/")[1:]
folder_path = "/".join(folder_parts) if folder_parts else ""
# URL decode the folder path
domain = parts[0].replace('https://', '')
siteName = parts[1].split('/')[0]
siteUrl = f"https://{domain}/sites/{siteName}"
folderParts = parts[1].split('/')[1:]
from urllib.parse import unquote
folder_path = unquote(folder_path)
return site_url, folder_path
except Exception as e:
logger.error(f"Error parsing SharePoint path '{path}': {str(e)}")
folderPath = unquote('/'.join(folderParts) if folderParts else '')
return siteUrl, folderPath
except Exception:
logger.error(f"Error parsing SharePoint path '{path}'")
return None, None
def _is_text_file(self, filename: str) -> bool:
"""Check if file is a text file based on extension"""
text_extensions = [
'.txt', '.csv', '.json', '.xml', '.md', '.log',
'.doc', '.docx', '.rtf', '.odt', # Document formats
'.html', '.htm', '.css', '.js', '.ts', '.py', '.java', '.cpp', '.c', '.h', # Code files
'.ini', '.cfg', '.conf', '.properties', # Config files
'.sql', '.yaml', '.yml', '.toml', # Data/config files
'.ps1', '.bat', '.sh', '.bash' # Script files
]
return any(filename.lower().endswith(ext) for ext in text_extensions)
def process_file_content(self, file_content: bytes, file_name: str, mime_type: str) -> Dict[str, Any]:
"""
Process file content for neutralization
Args:
file_content: Binary file content
file_name: Name of the file
mime_type: MIME type of the file
Returns:
Dictionary with neutralization results
"""
try:
# Determine content type based on MIME type
content_type = self._get_content_type_from_mime(mime_type)
# Decode content to text
try:
text_content = file_content.decode('utf-8')
except UnicodeDecodeError:
# Try with different encodings
for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
try:
text_content = file_content.decode(encoding)
break
except UnicodeDecodeError:
continue
else:
raise ValueError("Unable to decode file content")
# Generate a temporary file ID for tracking
temp_file_id = str(uuid.uuid4())
# Neutralize the content
neutralization_result = self.neutralize_text(text_content, temp_file_id)
# Encode the neutralized content back to bytes
neutralized_content = neutralization_result["neutralized_text"].encode('utf-8')
# Generate neutralized file name
neutralized_file_name = f"neutralized_{file_name}"
return {
"success": True,
"original_content": text_content,
"neutralized_content": neutralization_result["neutralized_text"],
"neutralized_file_name": neutralized_file_name,
"attributes": neutralization_result["attributes"],
"mapping": neutralization_result["mapping"],
"file_id": temp_file_id
}
except Exception as e:
logger.error(f"Error processing file content: {str(e)}")
return {
"success": False,
"error": str(e),
"original_content": None,
"neutralized_content": None
}
def _get_content_type_from_mime(self, mime_type: str) -> str:
"""Determine content type from MIME type for neutralization processing"""
if mime_type.startswith('text/'):
return 'text'
elif mime_type in ['application/json', 'application/xml', 'text/xml']:
return 'json' if 'json' in mime_type else 'xml'
elif mime_type in ['text/csv', 'application/csv']:
return 'csv'
else:
return 'text' # Default to text processing
def batch_neutralize_files(self, files_data: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Process multiple files for neutralization
Args:
files_data: List of dictionaries containing file information
Each dict should have: content, name, mime_type
Returns:
Dictionary with batch processing results
"""
try:
results = []
total_files = len(files_data)
successful_files = 0
errors = []
for file_data in files_data:
try:
result = self.process_file_content(
file_data['content'],
file_data['name'],
file_data['mime_type']
)
if result['success']:
successful_files += 1
results.append({
'file_name': file_data['name'],
'neutralized_file_name': result['neutralized_file_name'],
'file_id': result['file_id'],
'attributes_count': len(result['attributes'])
})
else:
errors.append(f"Failed to process {file_data['name']}: {result['error']}")
except Exception as e:
error_msg = f"Error processing {file_data['name']}: {str(e)}"
errors.append(error_msg)
logger.error(error_msg)
return {
"success": len(errors) == 0,
"total_files": total_files,
"successful_files": successful_files,
"failed_files": len(errors),
"results": results,
"errors": errors
}
except Exception as e:
logger.error(f"Error in batch neutralization: {str(e)}")
return {
"success": False,
"total_files": len(files_data),
"successful_files": 0,
"failed_files": len(files_data),
"results": [],
"errors": [str(e)]
}
def cleanup_file_attributes(self, file_id: str) -> bool:
"""Clean up neutralization attributes for a specific file"""
return self.app_interface.deleteNeutralizationAttributes(file_id)
def get_processing_stats(self) -> Dict[str, Any]:
"""Get statistics about neutralization processing"""
try:
# Get all attributes for the current mandate
all_attributes = self.get_attributes()
# Group by pattern type
pattern_counts = {}
for attr in all_attributes:
pattern_type = attr.patternType
pattern_counts[pattern_type] = pattern_counts.get(pattern_type, 0) + 1
# Get unique files
unique_files = set(attr.fileId for attr in all_attributes if attr.fileId)
return {
"total_attributes": len(all_attributes),
"unique_files": len(unique_files),
"pattern_counts": pattern_counts,
"mandate_id": self.current_user.mandateId
}
except Exception as e:
logger.error(f"Error getting processing stats: {str(e)}")
return {
"total_attributes": 0,
"unique_files": 0,
"pattern_counts": {},
"error": str(e)
}

View file

@ -11,7 +11,7 @@ import csv
import io
from datetime import datetime, UTC
from typing import Dict, Any, List, Optional
from modules.connectors.connectorSharepoint import ConnectorSharepoint
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
from modules.connectors.connectorTicketJira import ConnectorTicketJira
from modules.interfaces.interfaceAppObjects import getRootInterface
from modules.interfaces.interfaceAppModel import UserInDB
@ -232,6 +232,10 @@ class ManagerSyncDelta:
self.jira_connector = None
self.sharepoint_connector = None
self.target_site = None
# Initialize centralized services with root user
from modules.services import getInterface as getServices
root_user = self.root_interface.getUserByUsername("admin")
self.services = getServices(root_user, None)
def get_sync_file_name(self) -> str:
"""Get the appropriate sync file name based on the sync mode."""
@ -294,8 +298,9 @@ class ManagerSyncDelta:
logger.info(f"Found SharePoint connection: {sharepoint_connection.id}")
# Get SharePoint token for this connection
sharepoint_token = self.root_interface.getConnectionToken(sharepoint_connection.id)
# Get fresh SharePoint token for this connection
from modules.security.tokenManager import TokenManager
sharepoint_token = TokenManager().getFreshToken(self.root_interface, sharepoint_connection.id)
if not sharepoint_token:
logger.error("No SharePoint token found for Delta Group user connection")
return False
@ -303,7 +308,7 @@ class ManagerSyncDelta:
logger.info(f"Found SharePoint token: {sharepoint_token.id}")
# Initialize SharePoint connector with Graph API
self.sharepoint_connector = ConnectorSharepoint(access_token=sharepoint_token.tokenAccess)
self.sharepoint_connector = SharepointService(access_token=sharepoint_token.tokenAccess)
# Resolve the site by hostname + site path to get the real site ID
logger.info(
@ -552,3 +557,21 @@ async def perform_sync_jira_delta_group() -> bool:
except Exception as e:
logger.error(f"Error in perform_sync_jira_delta_group: {str(e)}")
return False
# Register scheduled job on import using the shared event manager
try:
from modules.shared.eventManagement import eventManager
# Schedule sync every 20 minutes (at minutes 00, 20, 40)
eventManager.register_cron(
job_id="jira_delta_group_sync",
func=perform_sync_jira_delta_group,
cron_kwargs={"minute": "0,20,40"},
replace_existing=True,
coalesce=True,
max_instances=1,
misfire_grace_time=1800,
)
logger.info("Registered jira_delta_group_sync via EventManagement (every 20 minutes)")
except Exception as e:
logger.error(f"Failed to register jira_delta_group_sync: {str(e)}")

View file

@ -1,527 +0,0 @@
import logging
from typing import Dict, Any, List, Union, Optional
from modules.connectors.connectorAiOpenai import AiOpenai, ContextLengthExceededException
from modules.connectors.connectorAiAnthropic import AiAnthropic
from modules.services.serviceDocument.documentExtraction import DocumentExtraction
from modules.interfaces.interfaceChatModel import ChatDocument
logger = logging.getLogger(__name__)
# AI Model Registry with Performance Data
AI_MODELS = {
"openai_gpt4o": {
"connector": "openai",
"max_tokens": 128000,
"cost_per_1k_tokens": 0.03, # Input
"cost_per_1k_tokens_output": 0.06, # Output
"speed_rating": 8, # 1-10
"quality_rating": 9, # 1-10
"supports_images": True,
"supports_documents": True,
"context_length": 128000,
"model_name": "gpt-4o"
},
"openai_gpt35": {
"connector": "openai",
"max_tokens": 16000,
"cost_per_1k_tokens": 0.0015,
"cost_per_1k_tokens_output": 0.002,
"speed_rating": 9,
"quality_rating": 7,
"supports_images": False,
"supports_documents": True,
"context_length": 16000,
"model_name": "gpt-3.5-turbo"
},
"anthropic_claude": {
"connector": "anthropic",
"max_tokens": 200000,
"cost_per_1k_tokens": 0.015,
"cost_per_1k_tokens_output": 0.075,
"speed_rating": 7,
"quality_rating": 10,
"supports_images": True,
"supports_documents": True,
"context_length": 200000,
"model_name": "claude-3-sonnet-20240229"
}
}
class AiCalls:
"""Interface for AI service interactions with centralized call method"""
def __init__(self):
self.openaiService = AiOpenai()
self.anthropicService = AiAnthropic()
self.document_extractor = DocumentExtraction()
async def callAi(
self,
prompt: str,
documents: List[ChatDocument] = None,
operation_type: str = "general",
priority: str = "balanced", # "speed", "quality", "cost", "balanced"
compress_prompt: bool = True,
compress_documents: bool = True,
process_documents_individually: bool = False,
max_cost: float = None,
max_processing_time: int = None
) -> str:
"""
Zentrale AI Call Methode mit intelligenter Modell-Auswahl und Content-Verarbeitung.
Args:
prompt: Der Hauptprompt für die AI
documents: Liste von Dokumenten zur Verarbeitung
operation_type: Art der Operation ("general", "document_analysis", "image_analysis", etc.)
priority: Priorität für Modell-Auswahl ("speed", "quality", "cost", "balanced")
compress_prompt: Ob der Prompt komprimiert werden soll
compress_documents: Ob Dokumente komprimiert werden sollen
process_documents_individually: Ob Dokumente einzeln verarbeitet werden sollen
max_cost: Maximale Kosten für den Call
max_processing_time: Maximale Verarbeitungszeit in Sekunden
Returns:
AI Response als String
"""
try:
# 1. Dokumente verarbeiten falls vorhanden
document_content = ""
if documents:
document_content = await self._process_documents_for_ai(
documents,
operation_type,
compress_documents,
process_documents_individually
)
# 2. Bestes Modell basierend auf Priorität und Content auswählen
selected_model = self._select_optimal_model(
prompt,
document_content,
priority,
operation_type,
max_cost,
max_processing_time
)
# 3. Content für das gewählte Modell optimieren
optimized_prompt, optimized_content = await self._optimize_content_for_model(
prompt,
document_content,
selected_model,
compress_prompt,
compress_documents
)
# 4. AI Call mit Failover ausführen
return await self._execute_ai_call_with_failover(
selected_model,
optimized_prompt,
optimized_content
)
except Exception as e:
logger.error(f"Error in centralized AI call: {str(e)}")
return f"Error: {str(e)}"
def _select_optimal_model(
self,
prompt: str,
document_content: str,
priority: str,
operation_type: str,
max_cost: float = None,
max_processing_time: int = None
) -> str:
"""Wählt das optimale Modell basierend auf Priorität und Content aus"""
# Content-Größe berechnen
total_content_size = len(prompt.encode('utf-8')) + len(document_content.encode('utf-8'))
# Verfügbare Modelle filtern
available_models = {}
for model_name, model_info in AI_MODELS.items():
# Prüfe ob Modell für Content-Größe geeignet ist
if total_content_size > model_info["context_length"] * 0.8: # 80% für Content
continue
# Prüfe Kosten-Limit
if max_cost:
estimated_cost = self._estimate_cost(model_info, total_content_size)
if estimated_cost > max_cost:
continue
# Prüfe Operation-Type Kompatibilität
if operation_type == "image_analysis" and not model_info["supports_images"]:
continue
available_models[model_name] = model_info
if not available_models:
# Fallback zum kleinsten Modell
return "openai_gpt35"
# Modell basierend auf Priorität auswählen
if priority == "speed":
return max(available_models.keys(), key=lambda x: available_models[x]["speed_rating"])
elif priority == "quality":
return max(available_models.keys(), key=lambda x: available_models[x]["quality_rating"])
elif priority == "cost":
return min(available_models.keys(), key=lambda x: available_models[x]["cost_per_1k_tokens"])
else: # balanced
# Gewichtete Bewertung: 40% Qualität, 30% Geschwindigkeit, 30% Kosten
def balanced_score(model_name):
model_info = available_models[model_name]
quality_score = model_info["quality_rating"] * 0.4
speed_score = model_info["speed_rating"] * 0.3
cost_score = (10 - (model_info["cost_per_1k_tokens"] * 1000)) * 0.3 # Niedrigere Kosten = höherer Score
return quality_score + speed_score + cost_score
return max(available_models.keys(), key=balanced_score)
def _estimate_cost(self, model_info: Dict, content_size: int) -> float:
"""Schätzt die Kosten für einen AI Call"""
# Grobe Schätzung: 1 Token ≈ 4 Zeichen
estimated_tokens = content_size / 4
input_cost = (estimated_tokens / 1000) * model_info["cost_per_1k_tokens"]
output_cost = (estimated_tokens / 1000) * model_info["cost_per_1k_tokens_output"] * 0.1 # 10% für Output
return input_cost + output_cost
async def _process_documents_for_ai(
self,
documents: List[ChatDocument],
operation_type: str,
compress_documents: bool,
process_individually: bool
) -> str:
"""Verarbeitet Dokumente für AI Call mit documentExtraction.py"""
if not documents:
return ""
processed_contents = []
for doc in documents:
try:
# Extrahiere Content mit documentExtraction.py
extracted = await self.document_extractor.processFileData(
doc.fileData,
doc.fileName,
doc.mimeType,
prompt=f"Extract relevant content for {operation_type}",
documentId=doc.id,
enableAI=True
)
# Kombiniere alle Content-Items
doc_content = []
for content_item in extracted.contents:
if content_item.data and content_item.data.strip():
doc_content.append(content_item.data)
if doc_content:
combined_doc_content = "\n\n".join(doc_content)
# Komprimiere falls gewünscht
if compress_documents and len(combined_doc_content.encode('utf-8')) > 10000: # 10KB Limit
combined_doc_content = await self._compress_content(
combined_doc_content,
10000,
"document"
)
processed_contents.append(f"Document: {doc.fileName}\n{combined_doc_content}")
except Exception as e:
logger.warning(f"Error processing document {doc.fileName}: {str(e)}")
processed_contents.append(f"Document: {doc.fileName}\n[Error processing document: {str(e)}]")
return "\n\n---\n\n".join(processed_contents)
async def _optimize_content_for_model(
self,
prompt: str,
document_content: str,
model_name: str,
compress_prompt: bool,
compress_documents: bool
) -> tuple[str, str]:
"""Optimiert Content für das gewählte Modell"""
model_info = AI_MODELS[model_name]
max_content_size = model_info["context_length"] * 0.7 # 70% für Content
optimized_prompt = prompt
optimized_content = document_content
# Prompt komprimieren falls gewünscht
if compress_prompt and len(prompt.encode('utf-8')) > 2000: # 2KB Limit für Prompt
optimized_prompt = await self._compress_content(prompt, 2000, "prompt")
# Dokument-Content komprimieren falls gewünscht
if compress_documents and document_content:
content_size = len(document_content.encode('utf-8'))
if content_size > max_content_size:
optimized_content = await self._compress_content(
document_content,
int(max_content_size),
"document"
)
return optimized_prompt, optimized_content
async def _compress_content(self, content: str, target_size: int, content_type: str) -> str:
"""Komprimiert Content intelligent basierend auf Typ"""
if len(content.encode('utf-8')) <= target_size:
return content
try:
# Verwende AI für intelligente Kompression
compression_prompt = f"""
Komprimiere den folgenden {content_type} auf maximal {target_size} Zeichen,
behalte aber alle wichtigen Informationen bei:
{content}
Gib nur den komprimierten Inhalt zurück, ohne zusätzliche Erklärungen.
"""
# Verwende das schnellste verfügbare Modell für Kompression
compression_model = "openai_gpt35"
model_info = AI_MODELS[compression_model]
connector = getattr(self, f"{model_info['connector']}Service")
messages = [{"role": "user", "content": compression_prompt}]
if model_info["connector"] == "openai":
compressed = await connector.callAiBasic(messages)
else:
response = await connector.callAiBasic(messages)
compressed = response["choices"][0]["message"]["content"]
return compressed
except Exception as e:
logger.warning(f"AI compression failed, using truncation: {str(e)}")
# Fallback: Einfache Truncation
return content[:target_size] + "... [truncated]"
async def _execute_ai_call_with_failover(
self,
model_name: str,
prompt: str,
document_content: str
) -> str:
"""Führt AI Call mit automatischem Failover aus"""
try:
model_info = AI_MODELS[model_name]
connector = getattr(self, f"{model_info['connector']}Service")
# Messages vorbereiten
messages = []
if document_content:
messages.append({
"role": "system",
"content": f"Context from documents:\n{document_content}"
})
messages.append({
"role": "user",
"content": prompt
})
# AI Call ausführen
if model_info["connector"] == "openai":
return await connector.callAiBasic(messages)
else: # anthropic
response = await connector.callAiBasic(messages)
return response["choices"][0]["message"]["content"]
except ContextLengthExceededException:
logger.warning(f"Context length exceeded for {model_name}, trying fallback")
# Fallback zu Modell mit größerem Context
fallback_model = self._find_fallback_model(model_name)
if fallback_model:
return await self._execute_ai_call_with_failover(fallback_model, prompt, document_content)
else:
# Letzter Ausweg: Content weiter komprimieren
compressed_prompt = await self._compress_content(prompt, 1000, "prompt")
compressed_content = await self._compress_content(document_content, 5000, "document")
return await self._execute_ai_call_with_failover("openai_gpt35", compressed_prompt, compressed_content)
except Exception as e:
logger.warning(f"AI call failed with {model_name}: {e}")
# Allgemeiner Fallback
return await self._execute_ai_call_with_failover("openai_gpt35", prompt, document_content)
def _find_fallback_model(self, current_model: str) -> Optional[str]:
"""Findet ein Fallback-Modell mit größerem Context"""
current_context = AI_MODELS[current_model]["context_length"]
# Suche Modell mit größerem Context
for model_name, model_info in AI_MODELS.items():
if model_info["context_length"] > current_context:
return model_name
return None
# Legacy methods
async def callAiTextBasic(self, prompt: str, context: Optional[str] = None) -> str:
"""
Basic text processing - now uses centralized AI call method.
Args:
prompt: The user prompt to process
context: Optional system context/prompt
Returns:
The AI response as text
"""
# Combine context with prompt if provided
full_prompt = prompt
if context:
full_prompt = f"Context: {context}\n\nUser Request: {prompt}"
# Use centralized AI call with speed priority for basic calls
return await self.callAi(
prompt=full_prompt,
priority="speed",
compress_prompt=True,
compress_documents=False
)
async def callAiTextAdvanced(self, prompt: str, context: Optional[str] = None, _is_fallback: bool = False) -> str:
"""
Advanced text processing - now uses centralized AI call method.
Args:
prompt: The user prompt to process
context: Optional system context/prompt
_is_fallback: Internal flag (kept for compatibility)
Returns:
The AI response as text
"""
# Combine context with prompt if provided
full_prompt = prompt
if context:
full_prompt = f"Context: {context}\n\nUser Request: {prompt}"
# Use centralized AI call with quality priority for advanced calls
return await self.callAi(
prompt=full_prompt,
priority="quality",
compress_prompt=False,
compress_documents=False
)
async def callAiImageBasic(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
"""
Basic image processing - now uses centralized AI call method.
Args:
prompt: The prompt for image analysis
imageData: The image data (file path or bytes)
mimeType: Optional MIME type of the image
Returns:
The AI response as text
"""
try:
# For image processing, use the original connector directly
# as the centralized method doesn't handle images yet
return await self.openaiService.callAiImage(prompt, imageData, mimeType)
except Exception as e:
logger.error(f"Error in OpenAI image call: {str(e)}")
return f"Error: {str(e)}"
async def callAiImageAdvanced(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
"""
Advanced image processing - now uses centralized AI call method.
Args:
prompt: The prompt for image analysis
imageData: The image data (file path or bytes)
mimeType: Optional MIME type of the image
Returns:
The AI response as text
"""
try:
# For image processing, use the original connector directly
# as the centralized method doesn't handle images yet
return await self.anthropicService.callAiImage(prompt, imageData, mimeType)
except Exception as e:
logger.error(f"Error in Anthropic image call: {str(e)}")
return f"Error: {str(e)}"
# Convenience methods for common use cases
async def callAiForDocumentAnalysis(
self,
prompt: str,
documents: List[ChatDocument],
priority: str = "balanced"
) -> str:
"""Convenience method for document analysis"""
return await self.callAi(
prompt=prompt,
documents=documents,
operation_type="document_analysis",
priority=priority,
compress_documents=True,
process_documents_individually=False
)
async def callAiForReportGeneration(
self,
prompt: str,
documents: List[ChatDocument],
priority: str = "quality"
) -> str:
"""Convenience method for report generation"""
return await self.callAi(
prompt=prompt,
documents=documents,
operation_type="report_generation",
priority=priority,
compress_documents=True,
process_documents_individually=True
)
async def callAiForEmailComposition(
self,
prompt: str,
documents: List[ChatDocument] = None,
priority: str = "speed"
) -> str:
"""Convenience method for email composition"""
return await self.callAi(
prompt=prompt,
documents=documents,
operation_type="email_composition",
priority=priority,
compress_prompt=True,
compress_documents=True
)
async def callAiForTaskPlanning(
self,
prompt: str,
documents: List[ChatDocument] = None,
priority: str = "balanced"
) -> str:
"""Convenience method for task planning"""
return await self.callAi(
prompt=prompt,
documents=documents,
operation_type="task_planning",
priority=priority,
compress_prompt=False,
compress_documents=True
)

View file

@ -0,0 +1,30 @@
from typing import Optional
from pydantic import BaseModel, Field
class AiCallOptions(BaseModel):
"""Options for centralized AI processing (no document extraction here)."""
operationType: str = Field(default="general", description="Type of operation")
priority: str = Field(default="balanced", description="speed|quality|cost|balanced")
compressPrompt: bool = Field(default=True, description="Whether to compress the prompt")
compressContext: bool = Field(default=True, description="Whether to compress optional context")
maxCost: Optional[float] = Field(default=None, description="Max cost budget")
maxProcessingTime: Optional[int] = Field(default=None, description="Max processing time in seconds")
class AiCallRequest(BaseModel):
"""Centralized AI call request payload for interface use."""
prompt: str = Field(description="The user prompt")
context: Optional[str] = Field(default=None, description="Optional external context (e.g., extracted docs)")
options: AiCallOptions = Field(default_factory=AiCallOptions)
class AiCallResponse(BaseModel):
"""Standardized AI call response."""
content: str = Field(description="AI response content")
modelName: str = Field(description="Selected model name")
usedTokens: Optional[int] = Field(default=None, description="Estimated used tokens")
costEstimate: Optional[float] = Field(default=None, description="Estimated cost of the call")

View file

@ -0,0 +1,117 @@
import logging
from typing import Dict, Any, List
from modules.connectors.connectorAiOpenai import AiOpenai
from modules.connectors.connectorAiAnthropic import AiAnthropic
from modules.interfaces.interfaceAiModel import AiCallOptions, AiCallRequest, AiCallResponse
logger = logging.getLogger(__name__)
# Local model registry (connectors specification) belongs in interface layer, not service
aiModels: Dict[str, Dict[str, Any]] = {
"openai_gpt4o": {
"connector": "openai",
"contextLength": 128000,
"costPer1kTokens": 0.03,
"costPer1kTokensOutput": 0.06,
"speedRating": 8,
"qualityRating": 9,
},
"openai_gpt35": {
"connector": "openai",
"contextLength": 16000,
"costPer1kTokens": 0.0015,
"costPer1kTokensOutput": 0.002,
"speedRating": 9,
"qualityRating": 7,
},
"anthropic_claude": {
"connector": "anthropic",
"contextLength": 200000,
"costPer1kTokens": 0.015,
"costPer1kTokensOutput": 0.075,
"speedRating": 7,
"qualityRating": 10,
},
}
class AiObjects:
"""Centralized AI interface: selects model and calls connector. No document handling."""
def __init__(self):
self.openaiService = AiOpenai()
self.anthropicService = AiAnthropic()
def _estimateCost(self, modelInfo: Dict[str, Any], contentSize: int) -> float:
estimatedTokens = contentSize / 4
inputCost = (estimatedTokens / 1000) * modelInfo["costPer1kTokens"]
outputCost = (estimatedTokens / 1000) * modelInfo["costPer1kTokensOutput"] * 0.1
return inputCost + outputCost
def _selectModel(self, prompt: str, context: str, options: AiCallOptions) -> str:
totalSize = len(prompt.encode("utf-8")) + len(context.encode("utf-8"))
candidates: Dict[str, Dict[str, Any]] = {}
for name, info in aiModels.items():
if totalSize > info["contextLength"] * 0.8:
continue
if options.maxCost is not None:
if self._estimateCost(info, totalSize) > options.maxCost:
continue
candidates[name] = info
if not candidates:
return "openai_gpt35"
if options.priority == "speed":
return max(candidates, key=lambda k: candidates[k]["speedRating"])
if options.priority == "quality":
return max(candidates, key=lambda k: candidates[k]["qualityRating"])
if options.priority == "cost":
return min(candidates, key=lambda k: candidates[k]["costPer1kTokens"])
def balancedScore(name: str) -> float:
info = candidates[name]
return info["qualityRating"] * 0.4 + info["speedRating"] * 0.3 + (10 - info["costPer1kTokens"] * 1000) * 0.3
return max(candidates, key=balancedScore)
def _connectorFor(self, modelName: str):
return self.openaiService if aiModels[modelName]["connector"] == "openai" else self.anthropicService
async def call(self, request: AiCallRequest) -> AiCallResponse:
prompt = request.prompt
context = request.context or ""
options = request.options
# Compress optionally (prompt/context) - simple truncation fallback kept here
def maybeTruncate(text: str, limit: int) -> str:
data = text.encode("utf-8")
if len(data) <= limit:
return text
return data[:limit].decode("utf-8", errors="ignore") + "... [truncated]"
if options.compressPrompt and len(prompt.encode("utf-8")) > 2000:
prompt = maybeTruncate(prompt, 2000)
if options.compressContext and len(context.encode("utf-8")) > 70000:
context = maybeTruncate(context, 70000)
modelName = self._selectModel(prompt, context, options)
messages: List[Dict[str, Any]] = []
if context:
messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
messages.append({"role": "user", "content": prompt})
connector = self._connectorFor(modelName)
if aiModels[modelName]["connector"] == "openai":
content = await connector.callAiBasic(messages)
else:
response = await connector.callAiBasic(messages)
content = response["choices"][0]["message"]["content"]
# Estimate cost/tokens
totalSize = len((prompt + context).encode("utf-8"))
cost = self._estimateCost(aiModels[modelName], totalSize)
usedTokens = int(totalSize / 4)
return AiCallResponse(content=content, modelName=modelName, usedTokens=usedTokens, costEstimate=cost)

View file

@ -201,7 +201,6 @@ class AppObjects:
"""
return self.access.canModify(model_class, recordId)
def getInitialId(self, model_class: type) -> Optional[str]:
"""Returns the initial ID for a table."""
return self.db.getInitialId(model_class)
@ -268,105 +267,6 @@ class AppObjects:
logger.error(f"Error getting user by ID: {str(e)}")
return None
def getUserConnections(self, userId: str) -> List[UserConnection]:
"""Returns all connections for a user."""
try:
# Get connections for this user
connections = self.db.getRecordset(UserConnection, recordFilter={"userId": userId})
# Convert to UserConnection objects
result = []
for conn_dict in connections:
try:
# Create UserConnection object
connection = UserConnection(
id=conn_dict["id"],
userId=conn_dict["userId"],
authority=conn_dict.get("authority"),
externalId=conn_dict.get("externalId", ""),
externalUsername=conn_dict.get("externalUsername", ""),
externalEmail=conn_dict.get("externalEmail"),
status=conn_dict.get("status", "pending"),
connectedAt=conn_dict.get("connectedAt"),
lastChecked=conn_dict.get("lastChecked"),
expiresAt=conn_dict.get("expiresAt")
)
result.append(connection)
except Exception as e:
logger.error(f"Error converting connection dict to object: {str(e)}")
continue
return result
except Exception as e:
logger.error(f"Error getting user connections: {str(e)}")
return []
def addUserConnection(self, userId: str, authority: AuthAuthority, externalId: str,
externalUsername: str, externalEmail: Optional[str] = None,
status: ConnectionStatus = ConnectionStatus.PENDING) -> UserConnection:
"""
Adds a new connection for a user.
Args:
userId: The ID of the user
authority: The authentication authority (e.g., MSFT, GOOGLE)
externalId: The external ID from the authority
externalUsername: The username from the authority
externalEmail: Optional email from the authority
status: The connection status (defaults to PENDING)
Returns:
The created UserConnection object
"""
try:
# Get the user
user = self.getUser(userId)
if not user:
raise ValueError(f"User not found: {userId}")
# Create new connection with all required fields
connection = UserConnection(
id=str(uuid.uuid4()),
userId=userId,
authority=authority,
externalId=externalId,
externalUsername=externalUsername,
externalEmail=externalEmail,
status=status,
connectedAt=get_utc_timestamp(),
lastChecked=get_utc_timestamp(),
expiresAt=None # Optional field, set to None by default
)
# Save to connections table
self.db.recordCreate(UserConnection, connection)
return connection
except Exception as e:
logger.error(f"Error adding user connection: {str(e)}")
raise ValueError(f"Failed to add user connection: {str(e)}")
def removeUserConnection(self, connectionId: str) -> None:
"""Remove a connection to an external service"""
try:
# Get connection
connections = self.db.getRecordset(UserConnection, recordFilter={
"id": connectionId
})
if not connections:
raise ValueError(f"Connection {connectionId} not found")
# Delete connection
self.db.recordDelete(UserConnection, connectionId)
except Exception as e:
logger.error(f"Error removing user connection: {str(e)}")
raise ValueError(f"Failed to remove user connection: {str(e)}")
def authenticateLocalUser(self, username: str, password: str) -> Optional[User]:
"""Authenticates a user by username and password using local authentication."""
# Clear the users table from cache and reload it
@ -551,6 +451,154 @@ class AppObjects:
logger.error(f"Error deleting user: {str(e)}")
raise ValueError(f"Failed to delete user: {str(e)}")
def _getInitialUser(self) -> Optional[Dict[str, Any]]:
"""Get the initial user record directly from database without access control."""
try:
initialUserId = self.getInitialId(UserInDB)
if not initialUserId:
return None
users = self.db.getRecordset(UserInDB, recordFilter={"id": initialUserId})
return users[0] if users else None
except Exception as e:
logger.error(f"Error getting initial user: {str(e)}")
return None
def checkUsernameAvailability(self, checkData: Dict[str, Any]) -> Dict[str, Any]:
"""Checks if a username is available for registration."""
try:
username = checkData.get("username")
authenticationAuthority = checkData.get("authenticationAuthority", "local")
if not username:
return {
"available": False,
"message": "Username is required"
}
# Get user by username
user = self.getUserByUsername(username)
# Check if user exists (User model instance)
if user is not None:
return {
"available": False,
"message": "Username is already taken"
}
return {
"available": True,
"message": "Username is available"
}
except Exception as e:
logger.error(f"Error checking username availability: {str(e)}")
return {
"available": False,
"message": f"Error checking username availability: {str(e)}"
}
# Connection methods
def getUserConnections(self, userId: str) -> List[UserConnection]:
"""Returns all connections for a user."""
try:
# Get connections for this user
connections = self.db.getRecordset(UserConnection, recordFilter={"userId": userId})
# Convert to UserConnection objects
result = []
for conn_dict in connections:
try:
# Create UserConnection object
connection = UserConnection(
id=conn_dict["id"],
userId=conn_dict["userId"],
authority=conn_dict.get("authority"),
externalId=conn_dict.get("externalId", ""),
externalUsername=conn_dict.get("externalUsername", ""),
externalEmail=conn_dict.get("externalEmail"),
status=conn_dict.get("status", "pending"),
connectedAt=conn_dict.get("connectedAt"),
lastChecked=conn_dict.get("lastChecked"),
expiresAt=conn_dict.get("expiresAt")
)
result.append(connection)
except Exception as e:
logger.error(f"Error converting connection dict to object: {str(e)}")
continue
return result
except Exception as e:
logger.error(f"Error getting user connections: {str(e)}")
return []
def addUserConnection(self, userId: str, authority: AuthAuthority, externalId: str,
externalUsername: str, externalEmail: Optional[str] = None,
status: ConnectionStatus = ConnectionStatus.PENDING) -> UserConnection:
"""
Adds a new connection for a user.
Args:
userId: The ID of the user
authority: The authentication authority (e.g., MSFT, GOOGLE)
externalId: The external ID from the authority
externalUsername: The username from the authority
externalEmail: Optional email from the authority
status: The connection status (defaults to PENDING)
Returns:
The created UserConnection object
"""
try:
# Get the user
user = self.getUser(userId)
if not user:
raise ValueError(f"User not found: {userId}")
# Create new connection with all required fields
connection = UserConnection(
id=str(uuid.uuid4()),
userId=userId,
authority=authority,
externalId=externalId,
externalUsername=externalUsername,
externalEmail=externalEmail,
status=status,
connectedAt=get_utc_timestamp(),
lastChecked=get_utc_timestamp(),
expiresAt=None # Optional field, set to None by default
)
# Save to connections table
self.db.recordCreate(UserConnection, connection)
return connection
except Exception as e:
logger.error(f"Error adding user connection: {str(e)}")
raise ValueError(f"Failed to add user connection: {str(e)}")
def removeUserConnection(self, connectionId: str) -> None:
"""Remove a connection to an external service"""
try:
# Get connection
connections = self.db.getRecordset(UserConnection, recordFilter={
"id": connectionId
})
if not connections:
raise ValueError(f"Connection {connectionId} not found")
# Delete connection
self.db.recordDelete(UserConnection, connectionId)
except Exception as e:
logger.error(f"Error removing user connection: {str(e)}")
raise ValueError(f"Failed to remove user connection: {str(e)}")
# Mandate methods
def getAllMandates(self) -> List[Mandate]:
@ -650,52 +698,7 @@ class AppObjects:
logger.error(f"Error deleting mandate: {str(e)}")
raise ValueError(f"Failed to delete mandate: {str(e)}")
def _getInitialUser(self) -> Optional[Dict[str, Any]]:
"""Get the initial user record directly from database without access control."""
try:
initialUserId = self.getInitialId(UserInDB)
if not initialUserId:
return None
users = self.db.getRecordset(UserInDB, recordFilter={"id": initialUserId})
return users[0] if users else None
except Exception as e:
logger.error(f"Error getting initial user: {str(e)}")
return None
def checkUsernameAvailability(self, checkData: Dict[str, Any]) -> Dict[str, Any]:
"""Checks if a username is available for registration."""
try:
username = checkData.get("username")
authenticationAuthority = checkData.get("authenticationAuthority", "local")
if not username:
return {
"available": False,
"message": "Username is required"
}
# Get user by username
user = self.getUserByUsername(username)
# Check if user exists (User model instance)
if user is not None:
return {
"available": False,
"message": "Username is already taken"
}
return {
"available": True,
"message": "Username is available"
}
except Exception as e:
logger.error(f"Error checking username availability: {str(e)}")
return {
"available": False,
"message": f"Error checking username availability: {str(e)}"
}
# Token methods
def saveAccessToken(self, token: Token, replace_existing: bool = True) -> None:
"""Save an access token for the current user (must NOT have connectionId)"""
@ -803,56 +806,8 @@ class AppObjects:
logger.error(f"Error saving connection token: {str(e)}")
raise
def getAccessToken(self, authority: str, auto_refresh: bool = True) -> Optional[Token]:
"""Get the latest valid access token for the current user and authority, optionally auto-refresh if expired"""
try:
# Validate that we're not looking for connection tokens
if not self.currentUser or not self.currentUser.id:
raise ValueError("No valid user context available for token retrieval")
# Get access tokens for this user and authority (must NOT have connectionId)
tokens = self.db.getRecordset(Token, recordFilter={
"userId": self.currentUser.id,
"authority": authority,
"connectionId": None # Ensure we only get access tokens
})
if not tokens:
return None
# Sort by creation date and get the latest
tokens.sort(key=lambda x: x.get("createdAt", ""), reverse=True)
latest_token = Token(**tokens[0])
# Check if token is expired
if latest_token.expiresAt and latest_token.expiresAt < get_utc_timestamp():
if auto_refresh:
# Import TokenManager here to avoid circular imports
from modules.security.tokenManager import TokenManager
token_manager = TokenManager()
# Try to refresh the token
refreshed_token = token_manager.refresh_token(latest_token)
if refreshed_token:
# Save the new token (which will automatically replace old ones)
self.saveAccessToken(refreshed_token)
return refreshed_token
else:
logger.warning(f"Failed to refresh expired access token for {authority}")
return None
else:
logger.warning(f"Access token for {authority} is expired (expiresAt: {latest_token.expiresAt})")
return None
return latest_token
except Exception as e:
logger.error(f"Error getting access token: {str(e)}")
return None
def getConnectionToken(self, connectionId: str, auto_refresh: bool = True) -> Optional[Token]:
"""Get the connection token for a specific connectionId, optionally auto-refresh if expired"""
def getConnectionToken(self, connectionId: str) -> Optional[Token]:
"""Get the latest stored token for a specific connectionId (no refresh)."""
try:
# Validate connectionId
if not connectionId:
@ -873,31 +828,7 @@ class AppObjects:
tokens.sort(key=lambda x: x.get("expiresAt", 0), reverse=True)
latest_token = Token(**tokens[0])
# Check if token is expired or expires within 30 minutes
current_time = get_utc_timestamp()
thirty_minutes = 30 * 60 # 30 minutes in seconds
if latest_token.expiresAt and latest_token.expiresAt < (current_time + thirty_minutes):
if auto_refresh:
# Import TokenManager here to avoid circular imports
from modules.security.tokenManager import TokenManager
token_manager = TokenManager()
# Try to refresh the token
refreshed_token = token_manager.refresh_token(latest_token)
if refreshed_token:
# Save the new token (which will automatically replace old ones)
self.saveConnectionToken(refreshed_token)
logger.info(f"Proactively refreshed connection token for connectionId {connectionId} (expired in {latest_token.expiresAt - current_time} seconds)")
return refreshed_token
else:
logger.warning(f"Token refresh failed for connectionId {connectionId}")
return None
else:
logger.warning(f"Connection token for connectionId {connectionId} expires soon (expiresAt: {latest_token.expiresAt})")
return None
# No auto-refresh here. Callers should use a higher-level service to refresh when needed.
return latest_token
@ -905,53 +836,6 @@ class AppObjects:
logger.error(f"Error getting connection token for connectionId {connectionId}: {str(e)}")
return None
def deleteAccessToken(self, authority: str) -> None:
"""Delete all access tokens for the current user and authority"""
try:
# Validate user context
if not self.currentUser or not self.currentUser.id:
raise ValueError("No valid user context available for token deletion")
# Get access tokens to delete (must NOT have connectionId)
tokens = self.db.getRecordset(Token, recordFilter={
"userId": self.currentUser.id,
"authority": authority,
"connectionId": None # Ensure we only delete access tokens
})
# Delete each token
for token in tokens:
self.db.recordDelete(Token, token["id"])
except Exception as e:
logger.error(f"Error deleting access token: {str(e)}")
raise
def deleteConnectionTokenByConnectionId(self, connectionId: str) -> None:
"""Delete all connection tokens for a specific connectionId"""
try:
# Validate connectionId
if not connectionId:
raise ValueError("connectionId is required for deleteConnectionTokenByConnectionId")
# Get connection tokens to delete
tokens = self.db.getRecordset(Token, recordFilter={
"connectionId": connectionId
})
# Delete each token
for token in tokens:
self.db.recordDelete(Token, token["id"])
except Exception as e:
logger.error(f"Error deleting connection token for connectionId {connectionId}: {str(e)}")
raise
# =====================
# Token revocation (LOCAL gateway JWTs)
# =====================
def findActiveTokenById(self, tokenId: str, userId: str, authority: AuthAuthority, sessionId: str = None, mandateId: str = None) -> Optional[Token]:
"""Find an active access token by its id (jti) with optional session/tenant scoping."""
try:
@ -1088,7 +972,7 @@ class AppObjects:
logger.error(f"Error during logout: {str(e)}")
raise
# Data Neutralization methods
# Neutralization methods
def getNeutralizationConfig(self) -> Optional[DataNeutraliserConfig]:
"""Get the data neutralization configuration for the current user's mandate"""
@ -1138,98 +1022,6 @@ class AppObjects:
logger.error(f"Error creating/updating neutralization config: {str(e)}")
raise ValueError(f"Failed to create/update neutralization config: {str(e)}")
def neutralizeText(self, text: str, file_id: Optional[str] = None) -> Dict[str, Any]:
"""Neutralize text content and store attribute mappings"""
try:
from modules.services.serviceNeutralization.neutralizer import DataAnonymizer
# Get neutralization configuration to extract namesToParse
config = self.getNeutralizationConfig()
names_to_parse = []
if config and hasattr(config, 'namesToParse') and config.namesToParse:
# Split by newlines and filter out empty strings
names_to_parse = [name.strip() for name in config.namesToParse.split('\n') if name.strip()]
# Initialize anonymizer with custom names
anonymizer = DataAnonymizer(names_to_parse=names_to_parse)
# Process the text
result = anonymizer.process_content(text, 'text')
# Store attribute mappings in database
stored_attributes = []
for original_text, neutralized_text in result.mapping.items():
# Extract pattern type and UUID from the neutralized text format [type.uuid]
pattern_type = "unknown"
placeholder_uuid = None
if neutralized_text.startswith("[") and "." in neutralized_text and neutralized_text.endswith("]"):
# Extract type and UUID from [type.uuid] format
inner = neutralized_text[1:-1] # Remove [ and ]
if "." in inner:
pattern_type, placeholder_uuid = inner.split(".", 1)
# Check if this exact original text already has a placeholder in the database
existing_attribute = self.getExistingPlaceholder(original_text)
if existing_attribute:
# Reuse existing placeholder
existing_uuid = existing_attribute.id
existing_pattern_type = existing_attribute.patternType
# Update the neutralized text to use the existing UUID
result.data = result.data.replace(neutralized_text, f"[{existing_pattern_type}.{existing_uuid}]")
result.mapping[original_text] = f"[{existing_pattern_type}.{existing_uuid}]"
stored_attributes.append(existing_attribute)
else:
# Create new attribute record with the UUID that the neutralizer generated
attribute_data = {
"id": placeholder_uuid, # Use the UUID from the neutralizer
"mandateId": self.mandateId,
"userId": self.userId,
"originalText": original_text,
"fileId": file_id,
"patternType": pattern_type
}
attribute = DataNeutralizerAttributes.from_dict(attribute_data)
created_attribute = self.db.recordCreate(DataNeutralizerAttributes, attribute)
stored_attributes.append(created_attribute)
# The neutralized text is already in the correct [type.uuid] format
# No need to replace it, as it's already properly formatted
return {
"neutralized_text": result.data,
"attributes": stored_attributes,
"mapping": result.mapping,
"replaced_fields": result.replaced_fields,
"processed_info": result.processed_info
}
except Exception as e:
logger.error(f"Error neutralizing text: {str(e)}")
raise ValueError(f"Failed to neutralize text: {str(e)}")
def getExistingPlaceholder(self, original_text: str) -> Optional[DataNeutralizerAttributes]:
"""Get existing placeholder for original text if it exists"""
try:
existing_attributes = self.db.getRecordset(DataNeutralizerAttributes, recordFilter={
"mandateId": self.mandateId,
"userId": self.userId,
"originalText": original_text
})
if existing_attributes:
return DataNeutralizerAttributes.from_dict(existing_attributes[0])
return None
except Exception as e:
logger.error(f"Error getting existing placeholder: {str(e)}")
return None
def getNeutralizationAttributes(self, file_id: Optional[str] = None) -> List[DataNeutralizerAttributes]:
"""Get neutralization attributes, optionally filtered by file ID"""
try:
@ -1246,35 +1038,6 @@ class AppObjects:
logger.error(f"Error getting neutralization attributes: {str(e)}")
return []
def resolveNeutralizedText(self, text: str) -> str:
"""Resolve UIDs in neutralized text back to original text"""
try:
# Find all placeholders in the new format [type.uuid]
placeholder_pattern = r'\[([a-z]+)\.([a-f0-9-]{36})\]'
matches = re.findall(placeholder_pattern, text)
resolved_text = text
for placeholder_type, uid in matches:
# Find the attribute with this UID (which is the record ID)
attributes = self.db.getRecordset(DataNeutralizerAttributes, recordFilter={
"mandateId": self.mandateId,
"id": uid
})
if attributes:
attribute = attributes[0]
# Replace placeholder with original text
placeholder = f"[{placeholder_type}.{uid}]"
resolved_text = resolved_text.replace(placeholder, attribute["originalText"])
else:
logger.warning(f"No attribute found for UID {uid}")
return resolved_text
except Exception as e:
logger.error(f"Error resolving neutralized text: {str(e)}")
return text
def deleteNeutralizationAttributes(self, file_id: str) -> bool:
"""Delete all neutralization attributes for a specific file"""
try:

View file

@ -6,7 +6,7 @@ import pandas as pd
import openpyxl
from modules.shared.timezoneUtils import get_utc_now
from modules.connectors.connectorSharepoint import ConnectorSharepoint
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
from modules.interfaces.interfaceTicketModel import TicketBase, Task
@ -14,7 +14,7 @@ from modules.interfaces.interfaceTicketModel import TicketBase, Task
@dataclass(slots=True)
class TicketSharepointSyncInterface:
connector_ticket: TicketBase
connector_sharepoint: ConnectorSharepoint
connector_sharepoint: SharepointService
task_sync_definition: dict
sync_folder: str
sync_file: str
@ -26,7 +26,7 @@ class TicketSharepointSyncInterface:
async def create(
cls,
connector_ticket: TicketBase,
connector_sharepoint: ConnectorSharepoint,
connector_sharepoint: SharepointService,
task_sync_definition: dict,
sync_folder: str,
sync_file: str,
@ -700,7 +700,7 @@ class TicketSharepointSyncInterface:
def _transform_tasks(
self, tasks: list[Task], include_put: bool = False
) -> list[Task]:
) -> list[Task]:
"""Transforms tasks according to the task_sync_definition."""
transformed_tasks = []

View file

@ -7,7 +7,7 @@ from modules.security.auth import limiter, getCurrentUser
# Import interfaces
from modules.interfaces.interfaceAppModel import User, DataNeutraliserConfig, DataNeutralizerAttributes
from modules.features.neutralizePlayground.mainNeutralizePlayground import NeutralizationService
from modules.features.neutralization.mainNeutralizationPlayground import NeutralizationService
# Configure logger
logger = logging.getLogger(__name__)

View file

@ -339,7 +339,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
)
# Create JWT token data (like Microsoft does)
from modules.security.auth import createAccessToken
from modules.security.jwtService import createAccessToken
jwt_token_data = {
"sub": user.username,
"mandateId": str(user.mandateId),
@ -637,29 +637,19 @@ async def verify_token(
detail="No Google connection found for current user"
)
# Get the current token
current_token = appInterface.getConnectionToken(google_connection.id, auto_refresh=False)
# Get a fresh token via TokenManager convenience method
from modules.security.tokenManager import TokenManager
current_token = TokenManager().getFreshToken(appInterface, google_connection.id)
if not current_token:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No Google token found for this connection"
)
# Verify the token
# Verify the (fresh) token
token_verification = await verify_google_token(current_token.tokenAccess)
if not token_verification.get("valid"):
# Try to refresh the token if verification failed
from modules.security.tokenManager import TokenManager
token_manager = TokenManager()
refreshed_token = token_manager.refresh_token(current_token)
if refreshed_token:
appInterface.saveConnectionToken(refreshed_token)
# Verify the refreshed token
token_verification = await verify_google_token(refreshed_token.tokenAccess)
return {
"valid": token_verification.get("valid", False),
"scopes": token_verification.get("scopes", []),
@ -721,8 +711,9 @@ async def refresh_token(
logger.debug(f"Found Google connection: {google_connection.id}, status={google_connection.status}")
# Get the token for this specific connection using the new method
current_token = appInterface.getConnectionToken(google_connection.id, auto_refresh=False)
# Get the token for this specific connection (fresh if expiring soon)
from modules.security.tokenManager import TokenManager
current_token = TokenManager().getFreshToken(appInterface, google_connection.id)
if not current_token:
raise HTTPException(
@ -731,38 +722,25 @@ async def refresh_token(
)
# If we could not obtain a fresh token, report error
if not current_token:
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to refresh token")
# Always attempt refresh (as per your requirement)
from modules.security.tokenManager import TokenManager
token_manager = TokenManager()
# Update the connection status and timing
google_connection.expiresAt = float(current_token.expiresAt) if current_token.expiresAt else google_connection.expiresAt
google_connection.lastChecked = get_utc_timestamp()
google_connection.status = ConnectionStatus.ACTIVE
appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.to_dict())
refreshed_token = token_manager.refresh_token(current_token)
if refreshed_token:
# Save the new connection token (which will automatically replace old ones)
appInterface.saveConnectionToken(refreshed_token)
# Update the connection's expiration time
google_connection.expiresAt = float(refreshed_token.expiresAt)
google_connection.lastChecked = get_utc_timestamp()
google_connection.status = ConnectionStatus.ACTIVE
# Save updated connection
appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.to_dict())
# Calculate time until expiration
current_time = get_utc_timestamp()
expires_in = int(refreshed_token.expiresAt - current_time)
return {
"message": "Token refreshed successfully",
"expires_at": refreshed_token.expiresAt,
"expires_in_seconds": expires_in
}
else:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to refresh token"
)
# Calculate time until expiration
current_time = get_utc_timestamp()
expires_in = int(current_token.expiresAt - current_time) if current_token.expiresAt else 0
return {
"message": "Token refreshed successfully",
"expires_at": current_token.expiresAt,
"expires_in_seconds": expires_in
}
except HTTPException:
raise

View file

@ -13,7 +13,8 @@ from jose import jwt
from pydantic import BaseModel
# Import auth modules
from modules.security.auth import createAccessToken, createAccessTokenWithCookie, setRefreshTokenCookie, getCurrentUser, limiter, SECRET_KEY, ALGORITHM
from modules.security.auth import getCurrentUser, limiter, SECRET_KEY, ALGORITHM
from modules.security.jwtService import createAccessToken, createRefreshToken, setAccessTokenCookie, setRefreshTokenCookie
from modules.interfaces.interfaceAppObjects import getInterface, getRootInterface
from modules.interfaces.interfaceAppModel import User, UserInDB, AuthAuthority, UserPrivilege, Token
from modules.shared.attributeUtils import ModelMixin
@ -91,11 +92,13 @@ async def login(
session_id = str(uuid.uuid4())
token_data["sid"] = session_id
# Create access token with httpOnly cookie
access_token = createAccessTokenWithCookie(token_data, response)
# Create access token + set cookie
access_token, _access_expires = createAccessToken(token_data)
setAccessTokenCookie(response, access_token)
# Create refresh token with httpOnly cookie
refresh_token = setRefreshTokenCookie(token_data, response)
# Create refresh token + set cookie
refresh_token, _refresh_expires = createRefreshToken(token_data)
setRefreshTokenCookie(response, refresh_token)
# Get expiration time for response
try:
@ -287,8 +290,9 @@ async def refresh_token(
"authenticationAuthority": currentUser.authenticationAuthority
}
# Create new access token with cookie
access_token = createAccessTokenWithCookie(token_data, response)
# Create new access token + set cookie
access_token, _expires = createAccessToken(token_data)
setAccessTokenCookie(response, access_token)
# Get expiration time
try:

View file

@ -14,7 +14,8 @@ import httpx
from modules.shared.configuration import APP_CONFIG
from modules.interfaces.interfaceAppObjects import getInterface, getRootInterface
from modules.interfaces.interfaceAppModel import AuthAuthority, User, Token, ConnectionStatus, UserConnection
from modules.security.auth import getCurrentUser, limiter, createAccessToken
from modules.security.auth import getCurrentUser, limiter
from modules.security.jwtService import createAccessToken
from modules.shared.attributeUtils import ModelMixin
from modules.shared.timezoneUtils import get_utc_now, create_expiration_timestamp, get_utc_timestamp
@ -559,9 +560,9 @@ async def refresh_token(
logger.debug(f"Found Microsoft connection: {msft_connection.id}, status={msft_connection.status}")
# Get the token for this specific connection using the new method
# Enable auto-refresh to handle expired tokens gracefully
current_token = appInterface.getConnectionToken(msft_connection.id, auto_refresh=True)
# Get a fresh token via TokenManager convenience method
from modules.security.tokenManager import TokenManager
current_token = TokenManager().getFreshToken(appInterface, msft_connection.id)
if not current_token:
raise HTTPException(

View file

@ -54,106 +54,7 @@ limiter = Limiter(key_func=get_remote_address)
# Logger
logger = logging.getLogger(__name__)
def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> Tuple[str, datetime]:
"""
Creates a JWT Access Token.
Args:
data: Data to encode (usually user ID or username)
expiresDelta: Validity duration of the token (optional)
Returns:
Tuple of (JWT Token as string, expiration datetime)
"""
toEncode = data.copy()
# Ensure a token id (jti) exists for revocation tracking (only required for local, harmless otherwise)
if "jti" not in toEncode or not toEncode.get("jti"):
toEncode["jti"] = str(uuid.uuid4())
if expiresDelta:
expire = get_utc_now() + expiresDelta
else:
expire = get_utc_now() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
toEncode.update({"exp": expire})
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
return encodedJwt, expire
def createAccessTokenWithCookie(data: dict, response: Response, expiresDelta: Optional[timedelta] = None) -> str:
"""
Creates a JWT Access Token and sets it as an httpOnly cookie.
Args:
data: Data to encode (usually user ID or username)
response: FastAPI Response object to set cookie
expiresDelta: Validity duration of the token (optional)
Returns:
JWT Token as string
"""
access_token, expires_at = createAccessToken(data, expiresDelta)
# Set httpOnly cookie
response.set_cookie(
key="auth_token",
value=access_token,
httponly=True,
secure=True, # HTTPS only in production
samesite="strict",
max_age=int(expiresDelta.total_seconds()) if expiresDelta else ACCESS_TOKEN_EXPIRE_MINUTES * 60
)
return access_token
def createRefreshToken(data: dict) -> Tuple[str, datetime]:
"""
Creates a JWT Refresh Token with longer expiration.
Args:
data: Data to encode (usually user ID or username)
Returns:
Tuple of (JWT Refresh Token as string, expiration datetime)
"""
toEncode = data.copy()
# Ensure a token id (jti) exists for revocation tracking
if "jti" not in toEncode or not toEncode.get("jti"):
toEncode["jti"] = str(uuid.uuid4())
# Add refresh token type
toEncode["type"] = "refresh"
expire = get_utc_now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
toEncode.update({"exp": expire})
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
return encodedJwt, expire
def setRefreshTokenCookie(data: dict, response: Response) -> str:
"""
Creates a JWT Refresh Token and sets it as an httpOnly cookie.
Args:
data: Data to encode (usually user ID or username)
response: FastAPI Response object to set cookie
Returns:
JWT Refresh Token as string
"""
refresh_token, expires_at = createRefreshToken(data)
# Set httpOnly cookie for refresh token
response.set_cookie(
key="refresh_token",
value=refresh_token,
httponly=True,
secure=True, # HTTPS only in production
samesite="strict",
max_age=REFRESH_TOKEN_EXPIRE_DAYS * 24 * 60 * 60 # Days to seconds
)
return refresh_token
# Note: JWT creation and cookie helpers moved to modules.security.jwtService
def _getUserBase(token: str = Depends(cookieAuth)) -> User:
"""

View file

@ -0,0 +1,72 @@
"""
JWT Service
Centralizes local JWT creation and cookie helpers.
"""
from datetime import timedelta
from typing import Optional, Tuple
from fastapi import Response
from jose import jwt
from modules.shared.configuration import APP_CONFIG
from modules.shared.timezoneUtils import get_utc_now
# Config
SECRET_KEY = APP_CONFIG.get("APP_JWT_KEY_SECRET")
ALGORITHM = APP_CONFIG.get("Auth_ALGORITHM")
ACCESS_TOKEN_EXPIRE_MINUTES = int(APP_CONFIG.get("APP_TOKEN_EXPIRY"))
REFRESH_TOKEN_EXPIRE_DAYS = int(APP_CONFIG.get("APP_REFRESH_TOKEN_EXPIRY", "7"))
def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> Tuple[str, "datetime"]:
"""Create a JWT access token and return (token, expiresAt)."""
toEncode = data.copy()
if "jti" not in toEncode or not toEncode.get("jti"):
import uuid
toEncode["jti"] = str(uuid.uuid4())
expire = get_utc_now() + (expiresDelta if expiresDelta else timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES))
toEncode.update({"exp": expire})
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
return encodedJwt, expire
def createRefreshToken(data: dict) -> Tuple[str, "datetime"]:
"""Create a JWT refresh token and return (token, expiresAt)."""
toEncode = data.copy()
if "jti" not in toEncode or not toEncode.get("jti"):
import uuid
toEncode["jti"] = str(uuid.uuid4())
toEncode["type"] = "refresh"
expire = get_utc_now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
toEncode.update({"exp": expire})
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
return encodedJwt, expire
def setAccessTokenCookie(response: Response, token: str, expiresDelta: Optional[timedelta] = None) -> None:
"""Set access token as httpOnly cookie."""
maxAge = int(expiresDelta.total_seconds()) if expiresDelta else ACCESS_TOKEN_EXPIRE_MINUTES * 60
response.set_cookie(
key="auth_token",
value=token,
httponly=True,
secure=True,
samesite="strict",
max_age=maxAge
)
def setRefreshTokenCookie(response: Response, token: str) -> None:
"""Set refresh token as httpOnly cookie."""
response.set_cookie(
key="refresh_token",
value=token,
httponly=True,
secure=True,
samesite="strict",
max_age=REFRESH_TOKEN_EXPIRE_DAYS * 24 * 60 * 60
)

View file

@ -6,7 +6,7 @@ Handles all token operations including automatic refresh for backend services.
import logging
import httpx
from datetime import datetime
from typing import Optional, Dict, Any
from typing import Optional, Dict, Any, Callable
from modules.interfaces.interfaceAppModel import Token, AuthAuthority
from modules.shared.configuration import APP_CONFIG
@ -198,4 +198,66 @@ class TokenManager:
except Exception as e:
logger.error(f"Error refreshing token: {str(e)}")
return None
def ensure_fresh_token(self, token: Token, *, seconds_before_expiry: int = 30 * 60, save_callback: Optional[Callable[[Token], None]] = None) -> Optional[Token]:
"""Ensure a token is fresh; refresh if expiring within threshold.
Args:
token: Existing token to validate/refresh.
seconds_before_expiry: Threshold window to proactively refresh.
save_callback: Optional function to persist a refreshed token.
Returns:
A fresh token (refreshed or original) or None if refresh failed.
"""
try:
if token is None:
return None
now_ts = get_utc_timestamp()
expires_at = token.expiresAt or 0
# If token expires within the threshold, try to refresh
if expires_at and expires_at < (now_ts + seconds_before_expiry):
logger.info(
f"ensure_fresh_token: Token for connection {token.connectionId} expiring soon "
f"(in {max(0, expires_at - now_ts)}s). Attempting proactive refresh."
)
refreshed = self.refresh_token(token)
if refreshed:
if save_callback is not None:
try:
save_callback(refreshed)
except Exception as e:
logger.warning(f"ensure_fresh_token: Failed to persist refreshed token: {e}")
return refreshed
else:
logger.warning("ensure_fresh_token: Token refresh failed")
return None
# Token is sufficiently fresh
return token
except Exception as e:
logger.error(f"ensure_fresh_token: Error ensuring fresh token: {e}")
return None
# Convenience wrapper to fetch and ensure fresh token for a connection via interface layer
def getFreshToken(self, interfaceApp, connectionId: str, secondsBeforeExpiry: int = 30 * 60) -> Optional[Token]:
"""Return a fresh token for a connection, refreshing when expiring soon.
Reads the latest stored token via interfaceApp.getConnectionToken, then
uses ensure_fresh_token to refresh if needed and persists the refreshed
token via interfaceApp.saveConnectionToken.
"""
try:
token = interfaceApp.getConnectionToken(connectionId)
if not token:
return None
return self.ensure_fresh_token(
token,
seconds_before_expiry=secondsBeforeExpiry,
save_callback=lambda t: interfaceApp.saveConnectionToken(t)
)
except Exception as e:
logger.error(f"getFreshToken: Error fetching or refreshing token for connection {connectionId}: {e}")
return None

View file

@ -51,8 +51,8 @@ class TokenRefreshService:
try:
logger.debug(f"Refreshing Google token for connection {connection.id}")
# Get current token
current_token = interface.getConnectionToken(connection.id, auto_refresh=False)
# Get current token (no refresh in interface layer)
current_token = interface.getConnectionToken(connection.id)
if not current_token:
logger.warning(f"No Google token found for connection {connection.id}")
return False
@ -100,8 +100,8 @@ class TokenRefreshService:
try:
logger.debug(f"Refreshing Microsoft token for connection {connection.id}")
# Get current token
current_token = interface.getConnectionToken(connection.id, auto_refresh=False)
# Get current token (no refresh in interface layer)
current_token = interface.getConnectionToken(connection.id)
if not current_token:
logger.warning(f"No Microsoft token found for connection {connection.id}")
return False

View file

@ -0,0 +1,100 @@
from typing import Any
from modules.interfaces.interfaceAppModel import User
from modules.interfaces.interfaceChatModel import ChatWorkflow
from modules.services.serviceWorkflows.mainServiceWorkflows import WorkflowService
class PublicService:
"""Lightweight proxy exposing only public callable attributes of a target.
- Hides names starting with '_'
- Optionally restricts to callables only
- Optional name_filter predicate for allow-list patterns
"""
def __init__(self, target: Any, functions_only: bool = True, name_filter=None):
self._target = target
self._functions_only = functions_only
self._name_filter = name_filter
def __getattr__(self, name: str):
if name.startswith('_'):
raise AttributeError(f"'{type(self._target).__name__}' attribute '{name}' is private")
if self._name_filter and not self._name_filter(name):
raise AttributeError(f"'{name}' not exposed by policy")
attr = getattr(self._target, name)
if self._functions_only and not callable(attr):
raise AttributeError(f"'{name}' is not a function")
return attr
def __dir__(self):
names = [
n for n in dir(self._target)
if not n.startswith('_')
and (not self._functions_only or callable(getattr(self._target, n, None)))
and (self._name_filter(n) if self._name_filter else True)
]
return sorted(names)
class Services:
def __init__(self, user: User, workflow: ChatWorkflow):
self.user: User = user
self.workflow: ChatWorkflow = workflow
# Directly expose existing service modules
from .serviceDocument.mainServiceDocumentExtraction import DocumentExtractionService
self.document = PublicService(DocumentExtractionService(self))
from .serviceDocument.mainServiceDocumentGeneration import DocumentGenerationService
self.document = PublicService(DocumentGenerationService(self))
from .serviceNeutralization.mainNeutralization import NeutralizationService
self.neutralization = PublicService(NeutralizationService())
from .serviceSharepoint.mainSharepoint import SharePointService
self.sharepoint = PublicService(SharePointService(self))
from .serviceAi.mainServiceAi import AiService
self.ai = PublicService(AiService(self))
from .serviceWorkflows.mainServiceWorkflows import WorkflowService
self.workflow = PublicService(WorkflowService(self))
# Initialize chat interface for workflow operations
from modules.interfaces.interfaceChatObjects import getInterface as getChatInterface
self.chatInterface = getChatInterface(user)
# Chat interface wrapper methods
def getWorkflow(self, workflowId: str):
return self.chatInterface.getWorkflow(workflowId)
def createWorkflow(self, workflowData: dict):
return self.chatInterface.createWorkflow(workflowData)
def updateWorkflow(self, workflowId: str, workflowData: dict):
return self.chatInterface.updateWorkflow(workflowId, workflowData)
def createMessage(self, messageData: dict):
return self.chatInterface.createMessage(messageData)
def updateMessage(self, messageId: str, messageData: dict):
return self.chatInterface.updateMessage(messageId, messageData)
def createLog(self, logData: dict):
return self.chatInterface.createLog(logData)
def updateWorkflowStats(self, workflowId: str, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0):
return self.chatInterface.updateWorkflowStats(workflowId, bytesSent, bytesReceived, tokenCount)
@property
def mandateId(self):
return self.chatInterface.mandateId
def getInterface(user: User, workflow: ChatWorkflow) -> Services:
return Services(user, workflow)

View file

@ -0,0 +1,137 @@
import logging
from typing import Dict, Any, List, Optional, Tuple
from modules.interfaces.interfaceChatModel import ChatDocument
from modules.services.serviceDocument.documentExtraction import DocumentExtractionService
from modules.interfaces.interfaceAiModel import AiCallRequest, AiCallOptions
from modules.interfaces.interfaceAiObjects import AiObjects
logger = logging.getLogger(__name__)
# Model registry is now provided by interfaces via AiModels
class AiService:
"""Centralized AI service orchestrating documents, model selection and failover.
The concrete connector instances (OpenAI/Anthropic) are injected by the interface layer.
"""
def __init__(self, aiObjects: AiObjects | None = None) -> None:
# Only depend on interfaces
self.aiObjects = aiObjects or AiObjects()
self.documentExtractor = DocumentExtractionService()
async def callAi(
self,
prompt: str,
documents: Optional[List[ChatDocument]] = None,
processDocumentsIndividually: bool = False,
options: Optional[AiCallOptions] = None,
) -> str:
try:
documentContent = ""
if documents:
documentContent = await self._processDocumentsForAi(
documents,
options.operationType if options else "general",
options.compressContext if options else True,
processDocumentsIndividually,
)
effectiveOptions = options or AiCallOptions()
request = AiCallRequest(
prompt=prompt,
context=documentContent or None,
options=effectiveOptions,
)
response = await self.aiObjects.call(request)
return response.content
except Exception as e:
logger.error(f"Error in centralized AI call: {str(e)}")
return f"Error: {str(e)}"
# Model selection now handled by interface AiObjects
# Cost estimation handled by interface for model selection
async def _processDocumentsForAi(
self,
documents: List[ChatDocument],
operationType: str,
compressDocuments: bool,
processIndividually: bool,
) -> str:
if not documents:
return ""
processedContents: List[str] = []
for doc in documents:
try:
extracted = await self.documentExtractor.processFileData(
doc.fileData,
doc.fileName,
doc.mimeType,
prompt=f"Extract relevant content for {operationType}",
documentId=doc.id,
enableAI=True,
)
docContent: List[str] = []
for contentItem in extracted.contents:
if contentItem.data and contentItem.data.strip():
docContent.append(contentItem.data)
if docContent:
combinedDocContent = "\n\n".join(docContent)
if (
compressDocuments
and len(combinedDocContent.encode("utf-8")) > 10000
):
combinedDocContent = await self._compressContent(
combinedDocContent, 10000, "document"
)
processedContents.append(
f"Document: {doc.fileName}\n{combinedDocContent}"
)
except Exception as e:
logger.warning(
f"Error processing document {doc.fileName}: {str(e)}"
)
processedContents.append(
f"Document: {doc.fileName}\n[Error processing document: {str(e)}]"
)
return "\n\n---\n\n".join(processedContents)
# Prompt/context optimization (compression) handled by interface
async def _compressContent(self, content: str, targetSize: int, contentType: str) -> str:
if len(content.encode("utf-8")) <= targetSize:
return content
try:
compressionPrompt = f"""
Komprimiere den folgenden {contentType} auf maximal {targetSize} Zeichen,
behalte aber alle wichtigen Informationen bei:
{content}
Gib nur den komprimierten Inhalt zurück, ohne zusätzliche Erklärungen.
"""
# Service must not call connectors directly; use simple truncation fallback here
data = content.encode("utf-8")
return data[:targetSize].decode("utf-8", errors="ignore") + "... [truncated]"
except Exception as e:
logger.warning(f"AI compression failed, using truncation: {str(e)}")
return content[:targetSize] + "... [truncated]"
# Failover logic now centralized in interface via model selection; service delegates a single call
# Fallback selection moved to interface; service doesn't select models directly

File diff suppressed because it is too large Load diff

View file

@ -22,7 +22,7 @@ from modules.interfaces.interfaceChatModel import (
ContentItem,
ContentMetadata
)
from modules.services.serviceNeutralization.neutralizer import DataAnonymizer
from modules.services.serviceNeutralization.mainNeutralization import NeutralizationService
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
@ -36,13 +36,16 @@ class FileProcessingError(Exception):
"""Custom exception for file processing errors."""
pass
class DocumentExtraction:
class DocumentExtractionService:
"""Processor for handling document operations and content extraction."""
def __init__(self, serviceCenter=None):
"""Initialize the document processor."""
self._neutralizer = DataAnonymizer() if APP_CONFIG.get("ENABLE_CONTENT_NEUTRALIZATION", False) else None
self._neutralizer = NeutralizationService() if APP_CONFIG.get("ENABLE_CONTENT_NEUTRALIZATION", False) else None
self._serviceCenter = serviceCenter
# Centralized services interface (for AI)
from modules.services import getInterface as getServices
self.services = getServices(serviceCenter.user, serviceCenter.workflow)
self.supportedTypes: Dict[str, Callable[[bytes, str, str], Awaitable[List[ContentItem]]]] = {
# Text and data files
@ -218,10 +221,6 @@ class DocumentExtraction:
# This should never be reached, but just in case
raise FileProcessingError(f"Failed to decode {fileName} with any encoding")
def initialize(self) -> None:
"""Initialize the document processor."""
pass
def _loadPdfExtractor(self):
"""Loads PDF extraction libraries when needed"""
global pdfExtractorLoaded
@ -1132,18 +1131,18 @@ class DocumentExtraction:
# Create a basic content item explaining the limitation
info_content = f"""Legacy Word Document (.doc) - {fileName}
Note: This is a legacy .doc format file. For better content extraction,
consider converting to .docx format.
Note: This is a legacy .doc format file. For better content extraction,
consider converting to .docx format.
File size: {len(fileData)} bytes
Format: Microsoft Word 97-2003 Document
File size: {len(fileData)} bytes
Format: Microsoft Word 97-2003 Document
Content extraction from .doc files requires specialized tools like:
- antiword (Linux/Unix)
- catdoc (Linux/Unix)
- Microsoft Word (for conversion)
Content extraction from .doc files requires specialized tools like:
- antiword (Linux/Unix)
- catdoc (Linux/Unix)
- Microsoft Word (for conversion)
The raw binary content is available but not human-readable."""
The raw binary content is available but not human-readable."""
contentItems.append(ContentItem(
label="info",
@ -1183,18 +1182,18 @@ The raw binary content is available but not human-readable."""
# Create a basic content item explaining the limitation
info_content = f"""Legacy Excel Document (.xls) - {fileName}
Note: This is a legacy .xls format file. For better content extraction,
consider converting to .xlsx format.
Note: This is a legacy .xls format file. For better content extraction,
consider converting to .xlsx format.
File size: {len(fileData)} bytes
Format: Microsoft Excel 97-2003 Workbook
File size: {len(fileData)} bytes
Format: Microsoft Excel 97-2003 Workbook
Content extraction from .xls files requires specialized tools like:
- xlrd (Python library)
- Microsoft Excel (for conversion)
- LibreOffice (for conversion)
Content extraction from .xls files requires specialized tools like:
- xlrd (Python library)
- Microsoft Excel (for conversion)
- LibreOffice (for conversion)
The raw binary content is available but not human-readable."""
The raw binary content is available but not human-readable."""
contentItems.append(ContentItem(
label="info",
@ -1234,18 +1233,18 @@ The raw binary content is available but not human-readable."""
# Create a basic content item explaining the limitation
info_content = f"""Legacy PowerPoint Document (.ppt) - {fileName}
Note: This is a legacy .ppt format file. For better content extraction,
consider converting to .pptx format.
Note: This is a legacy .ppt format file. For better content extraction,
consider converting to .pptx format.
File size: {len(fileData)} bytes
Format: Microsoft PowerPoint 97-2003 Presentation
File size: {len(fileData)} bytes
Format: Microsoft PowerPoint 97-2003 Presentation
Content extraction from .ppt files requires specialized tools like:
- python-pptx (limited support for .ppt)
- Microsoft PowerPoint (for conversion)
- LibreOffice (for conversion)
Content extraction from .ppt files requires specialized tools like:
- python-pptx (limited support for .ppt)
- Microsoft PowerPoint (for conversion)
- LibreOffice (for conversion)
The raw binary content is available but not human-readable."""
The raw binary content is available but not human-readable."""
contentItems.append(ContentItem(
label="info",
@ -1417,11 +1416,7 @@ The raw binary content is available but not human-readable."""
# Process with AI based on content type
try:
if mimeType.startswith('image/') and mimeType != "image/svg+xml":
# For images (excluding SVG), extract meaningful content as text
# Use AI to analyze the image and extract relevant information
# Create a specific prompt for image content extraction
# For images (excluding SVG), analyze via centralized AI service
imagePrompt = f"""
Analyze this image and extract the actual content and information from it.
Focus on extracting text, data, charts, diagrams, or any meaningful content.
@ -1430,8 +1425,19 @@ The raw binary content is available but not human-readable."""
Original prompt: {prompt}
"""
processedContent = await self._serviceCenter.callAiImageBasic(imagePrompt, chunk, mimeType)
from modules.interfaces.interfaceChatModel import ChatDocument
image_doc = ChatDocument(fileData=chunk, fileName="image", mimeType=mimeType)
processedContent = await self.services.ai.callAi(
prompt=imagePrompt,
documents=[image_doc],
options={
"process_type": "image",
"operation_type": "analyse_content",
"priority": "balanced",
"compress_documents": True,
"max_cost": 0.03
}
)
else:
# For text content (including SVG), use text AI service
# Neutralize content if neutralizer is enabled (only for text)
@ -1456,7 +1462,36 @@ The raw binary content is available but not human-readable."""
# For code files, preserve the complete content without AI processing
processedContent = contentToProcess
else:
processedContent = await self._serviceCenter.callAiTextBasic(aiPrompt, contentToProcess)
if self.services and hasattr(self.services, 'ai'):
processedContent = await self.services.ai.callAi(
prompt=aiPrompt,
documents=None,
options={
"process_type": "text",
"operation_type": "analyse_content",
"priority": "balanced",
"compress_prompt": True,
"compress_documents": False,
"processing_mode": "advanced",
"max_cost": 0.05,
"max_processing_time": 30
}
)
else:
# Fallback to basic AI processing with centralized service
processedContent = await self.services.ai.callAi(
prompt=aiPrompt,
documents=None,
options={
"process_type": "text",
"operation_type": "analyse_content",
"priority": "speed",
"compress_prompt": True,
"compress_documents": False,
"max_cost": 0.01,
"max_processing_time": 15
}
)
chunkResults.append(processedContent)
except Exception as aiError:

View file

@ -13,7 +13,7 @@ from modules.services.serviceDocument.documentUtility import (
logger = logging.getLogger(__name__)
class DocumentGenerator:
class DocumentGenerationService:
def __init__(self, service):
self.service = service

View file

@ -0,0 +1,206 @@
"""
Data Neutralization Service
Handles file processing for data neutralization including SharePoint integration
DSGVO-konformer Daten-Neutralisierer für KI-Agentensysteme
Unterstützt TXT, JSON, CSV, Excel und Word-Dateien
Mehrsprachig: DE, EN, FR, IT
"""
import logging
import re
import os
import uuid
import json
from typing import Dict, List, Any, Optional, Tuple
from datetime import datetime
from pathlib import Path
import mimetypes
from modules.interfaces.interfaceAppObjects import getInterface
from modules.interfaces.interfaceAppModel import User, DataNeutraliserConfig, DataNeutralizerAttributes
from modules.shared.timezoneUtils import get_utc_timestamp
# Import all necessary classes and functions for neutralization
from modules.services.serviceNeutralization.subProcessCommon import ProcessResult, CommonUtils, NeutralizationResult, NeutralizationAttribute
from modules.services.serviceNeutralization.subProcessText import TextProcessor, PlainText
from modules.services.serviceNeutralization.subProcessList import ListProcessor, TableData
from modules.services.serviceNeutralization.subProcessBinary import BinaryProcessor, BinaryData
from modules.services.serviceNeutralization.subParseString import StringParser
from modules.services.serviceNeutralization.subPatterns import Pattern, HeaderPatterns, DataPatterns, TextTablePatterns
logger = logging.getLogger(__name__)
class NeutralizationService:
"""Service for handling data neutralization operations"""
def __init__(self, current_user: User = None, names_to_parse: List[str] = None):
"""Initialize the service with user context and anonymization processors
Args:
current_user: User object for context (optional for basic neutralization)
names_to_parse: List of names to parse and replace (case-insensitive)
"""
self.current_user = current_user
self.app_interface = getInterface(current_user) if current_user else None
# Initialize anonymization processors
self.names_to_parse = names_to_parse or []
self.textProcessor = TextProcessor(names_to_parse)
self.listProcessor = ListProcessor(names_to_parse)
self.binaryProcessor = BinaryProcessor()
self.commonUtils = CommonUtils()
def getConfig(self) -> Optional[DataNeutraliserConfig]:
"""Get the neutralization configuration for the current user's mandate"""
if not self.app_interface:
return None
return self.app_interface.getNeutralizationConfig()
def saveConfig(self, config_data: Dict[str, Any]) -> DataNeutraliserConfig:
"""Save or update the neutralization configuration"""
if not self.app_interface:
raise ValueError("User context required for saving configuration")
return self.app_interface.createOrUpdateNeutralizationConfig(config_data)
# Public API: process text or file
def processText(self, text: str) -> Dict[str, Any]:
"""Neutralize a raw text string and return a standard result dict."""
return self._neutralizeText(text, 'text')
def processFile(self, fileId: str) -> Dict[str, Any]:
"""Neutralize a file referenced by its fileId using app interface."""
if not self.app_interface:
raise ValueError("User context is required to process a file by fileId")
# Fetch file data and metadata
fileInfo = None
try:
# getFile returns an object; fallback to dict-like
fileInfo = self.app_interface.getFile(fileId)
except Exception:
fileInfo = None
fileName = getattr(fileInfo, 'fileName', None) if fileInfo else None
mimeType = getattr(fileInfo, 'mimeType', None) if fileInfo else None
fileData = self.app_interface.getFileData(fileId)
if not fileData:
raise ValueError(f"No file data found for fileId: {fileId}")
# Determine textType from mime
textType = self._getContentTypeFromMime(mimeType or '')
# Decode to text
try:
textContent = fileData.decode('utf-8')
except UnicodeDecodeError:
decoded = None
for enc in ['latin-1', 'cp1252', 'iso-8859-1']:
try:
decoded = fileData.decode(enc)
break
except UnicodeDecodeError:
continue
if decoded is None:
raise ValueError("Unable to decode file content")
textContent = decoded
result = self._neutralizeText(textContent, textType)
# Add a reasonable output filename if original known
if fileName:
result['neutralized_file_name'] = f"neutralized_{fileName}"
result['file_id'] = fileId
return result
def resolveText(self, text: str) -> str:
if not self.app_interface:
return text
try:
placeholder_pattern = r'\[([a-z]+)\.([a-f0-9-]{36})\]'
matches = re.findall(placeholder_pattern, text)
resolved_text = text
for placeholder_type, uid in matches:
attributes = self.app_interface.db.getRecordset(
DataNeutralizerAttributes,
recordFilter={
"mandateId": self.app_interface.mandateId,
"id": uid
}
)
if attributes:
attribute = attributes[0]
placeholder = f"[{placeholder_type}.{uid}]"
resolved_text = resolved_text.replace(placeholder, attribute["originalText"])
return resolved_text
except Exception:
return text
# Helper functions
def _neutralizeText(self, text: str, textType: str = None) -> Dict[str, Any]:
"""Process text and return unified dict for API consumption."""
try:
# Auto-detect content type if not provided
if textType is None:
textType = self.commonUtils.detect_content_type(text)
# Check if content is binary data
if self.binaryProcessor.is_binary_content(text):
data, mapping, replaced_fields, processed_info = self.binaryProcessor.process_binary_content(text)
neutralized_text = text if isinstance(data, str) else str(data)
attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()]
return NeutralizationResult(
neutralized_text=neutralized_text,
mapping=mapping,
attributes=attributes,
processed_info=processed_info
).model_dump()
# Inline former _processData routing
if textType in ['csv', 'json', 'xml']:
if textType == 'csv':
data, mapping, replaced_fields, processed_info = self.listProcessor.process_csv_content(text)
elif textType == 'json':
data, mapping, replaced_fields, processed_info = self.listProcessor.process_json_content(text)
else: # xml
data, mapping, replaced_fields, processed_info = self.listProcessor.process_xml_content(text)
else:
data, mapping, replaced_fields, processed_info = self.textProcessor.process_text_content(text)
# Stringify data consistently
if textType == 'csv':
try:
neutralized_text = data.to_csv(index=False)
except Exception:
neutralized_text = str(data)
elif textType == 'json':
neutralized_text = json.dumps(data, ensure_ascii=False)
elif textType == 'xml':
neutralized_text = str(data)
else:
neutralized_text = str(data)
attributes = [NeutralizationAttribute(original=k, placeholder=v) for k, v in mapping.items()]
return NeutralizationResult(
neutralized_text=neutralized_text,
mapping=mapping,
attributes=attributes,
processed_info=processed_info
).model_dump()
except Exception as e:
logger.error(f"Error processing content: {str(e)}")
return NeutralizationResult(
neutralized_text='',
mapping={},
attributes=[],
processed_info={'type': 'error', 'error': str(e)}
).model_dump()
def _getContentTypeFromMime(self, mime_type: str) -> str:
"""Determine content type from MIME type for neutralization processing"""
if mime_type.startswith('text/'):
return 'text'
elif mime_type in ['application/json', 'application/xml', 'text/xml']:
return 'json' if 'json' in mime_type else 'xml'
elif mime_type in ['text/csv', 'application/csv']:
return 'csv'
else:
return 'text' # Default to text processing

View file

@ -1,112 +0,0 @@
"""
DSGVO-konformer Daten-Neutralisierer für KI-Agentensysteme
Unterstützt TXT, JSON, CSV, Excel und Word-Dateien
Mehrsprachig: DE, EN, FR, IT
"""
import logging
from typing import Dict, List, Any
# Import all necessary classes and functions
from modules.services.serviceNeutralization.subProcessCommon import ProcessResult, CommonUtils
from modules.services.serviceNeutralization.subProcessText import TextProcessor, PlainText
from modules.services.serviceNeutralization.subProcessList import ListProcessor, TableData
from modules.services.serviceNeutralization.subProcessBinary import BinaryProcessor, BinaryData
from modules.services.serviceNeutralization.subParseString import StringParser
from modules.services.serviceNeutralization.subPatterns import Pattern, HeaderPatterns, DataPatterns, TextTablePatterns
# Configure logging
logger = logging.getLogger(__name__)
# Export all classes and functions for external use
__all__ = [
'DataAnonymizer',
'ProcessResult',
'CommonUtils',
'TextProcessor',
'PlainText',
'ListProcessor',
'TableData',
'BinaryProcessor',
'BinaryData',
'StringParser',
'Pattern',
'HeaderPatterns',
'DataPatterns',
'TextTablePatterns'
]
class DataAnonymizer:
"""Hauptklasse für die Datenanonymisierung"""
def __init__(self, names_to_parse: List[str] = None):
"""Initialize the anonymizer with specialized processors
Args:
names_to_parse: List of names to parse and replace (case-insensitive)
"""
self.names_to_parse = names_to_parse or []
# Initialize specialized processors
self.text_processor = TextProcessor(names_to_parse)
self.list_processor = ListProcessor(names_to_parse)
self.binary_processor = BinaryProcessor()
# Common utilities
self.common_utils = CommonUtils()
def process_content(self, content: str, content_type: str = None) -> ProcessResult:
"""
Process content and return anonymized data
Args:
content: Content to process
content_type: Type of content ('csv', 'json', 'xml', 'text', 'binary')
If None, will auto-detect
Returns:
ProcessResult: Contains anonymized data, mapping, replaced fields and processing info
"""
try:
# Auto-detect content type if not provided
if content_type is None:
content_type = self.common_utils.detect_content_type(content)
# Check if content is binary data
if self.binary_processor.is_binary_content(content):
return self.binary_processor.process_binary_content(content)
# Route to appropriate processor based on content type
if content_type in ['csv', 'json', 'xml']:
if content_type == 'csv':
result, mapping, replaced_fields, processed_info = self.list_processor.process_csv_content(content)
elif content_type == 'json':
result, mapping, replaced_fields, processed_info = self.list_processor.process_json_content(content)
else: # xml
result, mapping, replaced_fields, processed_info = self.list_processor.process_xml_content(content)
return ProcessResult(result, mapping, replaced_fields, processed_info)
else:
# Handle as text
result, mapping, replaced_fields, processed_info = self.text_processor.process_text_content(content)
return ProcessResult(result, mapping, replaced_fields, processed_info)
except Exception as e:
logger.error(f"Error processing content: {str(e)}")
return ProcessResult(None, {}, [], {'type': 'error', 'error': str(e)})
def get_mapping(self) -> Dict[str, str]:
"""
Get the combined mapping from all processors
Returns:
Dict[str, str]: Combined mapping dictionary
"""
text_mapping = self.text_processor.get_mapping()
list_mapping = self.list_processor.get_mapping()
return self.common_utils.merge_mappings(text_mapping, list_mapping)
def clear_mapping(self):
"""Clear the mapping in all processors"""
self.text_processor.clear_mapping()
self.list_processor.clear_mapping()

View file

@ -1,91 +0,0 @@
# Neutralizer Module Structure
This module provides DSGVO-compliant data anonymization for AI agent systems. The code has been refactored into specialized sub-modules for better maintainability and code reuse.
## Module Overview
### Core Module
- **`neutralizer.py`** - Main DataAnonymizer class that orchestrates all processing
### Specialized Processors
- **`subProcessText.py`** - Handles plain text processing without header information
- **`subProcessList.py`** - Handles structured data with headers (CSV, JSON, XML)
- **`subProcessBinary.py`** - Handles binary data types (images, audio, video, etc.)
### Utility Modules
- **`subParseString.py`** - String parsing and replacement utilities for emails, phones, addresses, IDs and names
- **`subProcessCommon.py`** - Common utilities and data structures shared across modules
- **`patterns.py`** - Pattern definitions for data anonymization
## Key Features
### 1. Modular Architecture
- **Separation of Concerns**: Each module handles a specific type of data processing
- **Code Reuse**: Common functionality is centralized in utility modules
- **Maintainability**: Easier to modify and extend individual components
### 2. Processing Order
1. **Pattern-based matches** (emails, phones, addresses, etc.) are processed FIRST
2. **Custom names** from the user list are processed SECOND
3. **Already anonymized content** (placeholders) is skipped
### 3. Supported Data Types
- **Text**: Plain text documents, emails, etc.
- **Structured Data**: CSV, JSON, XML with headers
- **Binary Data**: Images, audio, video (framework ready, implementation pending)
### 4. Placeholder Protection
- Prevents re-anonymization of already processed content
- Uses format `[tag.uuid]` for placeholders
- Validates placeholder format before processing
## Usage Example
```python
from modules.neutralizer import DataAnonymizer
# Initialize with custom names
anonymizer = DataAnonymizer(names_to_parse=['John Doe', 'Jane Smith'])
# Process content (auto-detects type)
result = anonymizer.process_content(content, content_type='text')
# Or specify content type explicitly
result = anonymizer.process_content(content, content_type='csv')
# Get mapping of original values to placeholders
mapping = anonymizer.get_mapping()
```
## Module Dependencies
```
neutralizer.py
├── subProcessCommon.py (ProcessResult, CommonUtils)
├── subProcessText.py (TextProcessor)
├── subProcessList.py (ListProcessor)
├── subProcessBinary.py (BinaryProcessor)
└── patterns.py (Pattern definitions)
subProcessText.py
└── subParseString.py (StringParser)
subProcessList.py
├── subParseString.py (StringParser)
└── patterns.py (HeaderPatterns)
subProcessBinary.py
└── (standalone)
subParseString.py
└── patterns.py (DataPatterns)
```
## Benefits of New Structure
1. **Single Responsibility**: Each module has one clear purpose
2. **DRY Principle**: No code duplication across modules
3. **Testability**: Individual modules can be tested in isolation
4. **Extensibility**: Easy to add new data types or processing methods
5. **Maintainability**: Changes to one module don't affect others
6. **Performance**: Specialized processors are optimized for their data types

View file

@ -5,6 +5,7 @@ Shared functions and data structures
import re
from typing import Dict, List, Any, Union, Optional
from pydantic import BaseModel
from dataclasses import dataclass
@dataclass
@ -15,6 +16,19 @@ class ProcessResult:
replaced_fields: List[str]
processed_info: Dict[str, Any] # Additional processing information
class NeutralizationAttribute(BaseModel):
"""Single attribute describing a replacement mapping."""
original: str
placeholder: str
patternType: Optional[str] = None
class NeutralizationResult(BaseModel):
"""Unified result for all content types, suitable for API responses."""
neutralized_text: str
mapping: Dict[str, str]
attributes: List[NeutralizationAttribute]
processed_info: Dict[str, Any]
class CommonUtils:
"""Common utility functions for data processing"""

View file

@ -82,7 +82,8 @@ class TextProcessor:
# Get processing information
processed_info = {
'type': 'text',
'tables': [{'headers': t.headers, 'row_count': len(t.rows)} for t in tables] if hasattr(tables[0], 'headers') else []
'tables': ([{'headers': t.headers, 'row_count': len(t.rows)} for t in tables]
if tables else [])
}
return result, self.string_parser.get_mapping(), [], processed_info

View file

@ -10,7 +10,7 @@ from datetime import datetime, UTC
logger = logging.getLogger(__name__)
class ConnectorSharepoint:
class SharepointService:
"""SharePoint connector using Microsoft Graph API for reliable authentication."""
def __init__(self, access_token: str):

View file

@ -0,0 +1,546 @@
import logging
import uuid
from typing import Dict, Any, List, Optional
from modules.interfaces.interfaceAppModel import User, UserConnection
from modules.interfaces.interfaceChatModel import ChatDocument, ChatMessage, ExtractedContent
from modules.services.serviceDocument.documentExtraction import DocumentExtractionService
from modules.services.serviceDocument.documentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
from modules.shared.timezoneUtils import get_utc_timestamp
logger = logging.getLogger(__name__)
class WorkflowService:
"""Service class containing methods for document processing, chat operations, and workflow management"""
def __init__(self, service_center):
self.service_center = service_center
self.user = service_center.user
self.workflow = service_center.workflow
self.interfaceChat = service_center.interfaceChat
self.interfaceComponent = service_center.interfaceComponent
self.interfaceApp = service_center.interfaceApp
self.documentProcessor = service_center.documentProcessor
# Centralized services interface (for AI)
from modules.services import getInterface as getServices
self.services = getServices(self.user, self.workflow)
async def summarizeChat(self, messages: List[ChatMessage]) -> str:
"""
Summarize chat messages from last to first message with status="first"
Args:
messages: List of chat messages to summarize
Returns:
str: Summary of the chat in user's language
"""
try:
# Get messages from last to first, stopping at first message with status="first"
relevantMessages = []
for msg in reversed(messages):
relevantMessages.append(msg)
if msg.status == "first":
break
# Create prompt for AI
prompt = f"""You are an AI assistant providing a summary of a chat conversation.
Please respond in '{self.user.language}' language.
Chat History:
{chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))}
Instructions:
1. Summarize the conversation's key points and outcomes
2. Be concise but informative
3. Use a professional but friendly tone
4. Focus on important decisions and next steps if any
Please provide a comprehensive summary of this conversation."""
# Get summary using centralized AI (speed priority)
return await self.services.ai.callAi(
prompt=prompt,
documents=None,
options={
"process_type": "text",
"operation_type": "generate_content",
"priority": "speed",
"compress_prompt": True,
"compress_documents": False,
"max_cost": 0.01
}
)
except Exception as e:
logger.error(f"Error summarizing chat: {str(e)}")
return f"Error summarizing chat: {str(e)}"
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
"""Get ChatDocuments from a list of document references using all three formats."""
try:
all_documents = []
for doc_ref in documentList:
if doc_ref.startswith("docItem:"):
# docItem:<id>:<filename> - extract ID and find document
parts = doc_ref.split(':')
if len(parts) >= 2:
doc_id = parts[1]
# Find the document by ID
for message in self.workflow.messages:
if message.documents:
for doc in message.documents:
if doc.id == doc_id:
doc_name = getattr(doc, 'fileName', 'unknown')
logger.debug(f"Found docItem reference {doc_ref}: {doc_name}")
all_documents.append(doc)
break
elif doc_ref.startswith("docList:"):
# docList:<messageId>:<label> or docList:<label> - extract message ID and find document list
parts = doc_ref.split(':')
if len(parts) >= 3:
# Format: docList:<messageId>:<label>
message_id = parts[1]
label = parts[2]
# Find the message by ID and get all its documents
for message in self.workflow.messages:
if str(message.id) == message_id:
if message.documents:
doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')]
logger.debug(f"Found docList reference {doc_ref}: {len(message.documents)} documents - {doc_names}")
all_documents.extend(message.documents)
else:
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
break
elif len(parts) >= 2:
# Format: docList:<label> - find message by documentsLabel
label = parts[1]
logger.debug(f"Looking for message with documentsLabel: {label}")
# Find messages with matching documentsLabel
matching_messages = []
for message in self.workflow.messages:
# Check both attribute and raw data for documentsLabel
msg_label = getattr(message, 'documentsLabel', None)
if msg_label == label:
matching_messages.append(message)
logger.debug(f"Found message {message.id} with matching documentsLabel: {msg_label}")
else:
# Debug: show what labels we're comparing
logger.debug(f"Message {message.id} has documentsLabel: '{msg_label}' (looking for: '{label}')")
if matching_messages:
# Use the newest message (highest publishedAt)
matching_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
newest_message = matching_messages[0]
if newest_message.documents:
doc_names = [doc.fileName for doc in newest_message.documents if hasattr(doc, 'fileName')]
logger.debug(f"Found docList reference {doc_ref}: {len(newest_message.documents)} documents - {doc_names}")
all_documents.extend(newest_message.documents)
else:
logger.debug(f"Found docList reference {doc_ref} but message has no documents")
else:
logger.debug(f"No messages found with documentsLabel: {label}")
else:
# Direct label reference (round1_task2_action3_contextinfo)
# Search for messages with matching documentsLabel to find the actual documents
if doc_ref.startswith("round"):
# Parse round/task/action to find the corresponding document list
label_parts = doc_ref.split('_', 3)
if len(label_parts) >= 4:
round_num = int(label_parts[0].replace('round', ''))
task_num = int(label_parts[1].replace('task', ''))
action_num = int(label_parts[2].replace('action', ''))
context_info = label_parts[3]
logger.debug(f"Resolving round reference: round{round_num}_task{task_num}_action{action_num}_{context_info}")
logger.debug(f"Looking for messages with documentsLabel matching: {doc_ref}")
# Find messages with matching documentsLabel (this is the correct way!)
# In case of retries, we want the NEWEST message (most recent publishedAt)
matching_messages = []
for message in self.workflow.messages:
msg_documents_label = getattr(message, 'documentsLabel', '')
# Check if this message's documentsLabel matches our reference
if msg_documents_label == doc_ref:
# Found a matching message, collect it for comparison
matching_messages.append(message)
logger.debug(f"Found message {message.id} with matching documentsLabel: {msg_documents_label}")
# If we found matching messages, take the newest one (highest publishedAt)
if matching_messages:
# Sort by publishedAt descending (newest first)
matching_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
newest_message = matching_messages[0]
logger.debug(f"Found {len(matching_messages)} matching messages, using newest: {newest_message.id} (publishedAt: {getattr(newest_message, 'publishedAt', 'unknown')})")
logger.debug(f"Newest message has {len(newest_message.documents) if newest_message.documents else 0} documents")
if newest_message.documents:
doc_names = [doc.fileName for doc in newest_message.documents if hasattr(doc, 'fileName')]
logger.debug(f"Added {len(newest_message.documents)} documents from newest message {newest_message.id}: {doc_names}")
all_documents.extend(newest_message.documents)
else:
logger.debug(f"No documents found in newest message {newest_message.id}")
else:
logger.debug(f"No messages found with documentsLabel: {doc_ref}")
# Fallback: also check if any message has this documentsLabel as a prefix
logger.debug(f"Trying fallback search for messages with documentsLabel containing: {doc_ref}")
fallback_messages = []
for message in self.workflow.messages:
msg_documents_label = getattr(message, 'documentsLabel', '')
if msg_documents_label and msg_documents_label.startswith(doc_ref):
fallback_messages.append(message)
logger.debug(f"Found fallback message {message.id} with documentsLabel: {msg_documents_label}")
if fallback_messages:
# Sort by publishedAt descending (newest first)
fallback_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
newest_fallback = fallback_messages[0]
logger.debug(f"Using fallback message {newest_fallback.id} with documentsLabel: {getattr(newest_fallback, 'documentsLabel', 'unknown')}")
if newest_fallback.documents:
doc_names = [doc.fileName for doc in newest_fallback.documents if hasattr(doc, 'fileName')]
logger.debug(f"Added {len(newest_fallback.documents)} documents from fallback message {newest_fallback.id}: {doc_names}")
all_documents.extend(newest_fallback.documents)
else:
logger.debug(f"No documents found in fallback message {newest_fallback.id}")
else:
logger.debug(f"No fallback messages found either")
logger.debug(f"Resolved {len(all_documents)} documents from document list: {documentList}")
return all_documents
except Exception as e:
logger.error(f"Error getting documents from document list: {str(e)}")
return []
def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str:
"""Get connection reference from UserConnection with enhanced state information"""
# Get token information to check if it's expired
token = None
token_status = "unknown"
try:
# Get a fresh token via TokenManager convenience method
logger.debug(f"Getting fresh token for connection {connection.id}")
from modules.security.tokenManager import TokenManager
token = TokenManager().getFreshToken(self.interfaceApp, connection.id)
if token:
if hasattr(token, 'expiresAt') and token.expiresAt:
current_time = get_utc_timestamp()
logger.debug(f"getConnectionReferenceFromUserConnection: Current time: {current_time}")
logger.debug(f"getConnectionReferenceFromUserConnection: Token expires at: {token.expiresAt}")
if current_time > token.expiresAt:
token_status = "expired"
else:
# Check if this token was recently refreshed (within last 5 minutes)
time_since_creation = current_time - token.createdAt if hasattr(token, 'createdAt') else 0
if time_since_creation < 300: # 5 minutes
token_status = "valid (refreshed)"
else:
token_status = "valid"
else:
token_status = "no_expiration"
else:
token_status = "no_token"
except Exception as e:
token_status = f"error: {str(e)}"
# Build enhanced reference with state information
base_ref = f"connection:{connection.authority.value}:{connection.externalUsername}:{connection.id}"
state_info = f" [status:{connection.status.value}, token:{token_status}]"
logger.debug(f"getConnectionReferenceFromUserConnection: Built reference: {base_ref + state_info}")
return base_ref + state_info
def getUserConnectionFromConnectionReference(self, connectionReference: str) -> Optional[UserConnection]:
"""Get UserConnection from reference string (handles both old and enhanced formats)"""
try:
# Parse reference format: connection:{authority}:{username}:{id} [status:..., token:...]
# Remove state information if present
base_reference = connectionReference.split(' [')[0]
parts = base_reference.split(':')
if len(parts) != 4 or parts[0] != "connection":
return None
authority = parts[1]
username = parts[2]
conn_id = parts[3]
# Get user connections through AppObjects interface
user_connections = self.interfaceApp.getUserConnections(self.user.id)
# Find matching connection
for conn in user_connections:
if str(conn.id) == conn_id and conn.authority.value == authority and conn.externalUsername == username:
return conn
return None
except Exception as e:
logger.error(f"Error parsing connection reference: {str(e)}")
return None
def getFileInfo(self, fileId: str) -> Dict[str, Any]:
"""Get file information"""
file_item = self.interfaceComponent.getFile(fileId)
if file_item:
return {
"id": file_item.id,
"fileName": file_item.fileName,
"size": file_item.fileSize,
"mimeType": file_item.mimeType,
"fileHash": file_item.fileHash,
"creationDate": file_item.creationDate
}
return None
def getFileData(self, fileId: str) -> bytes:
"""Get file data by ID"""
return self.interfaceComponent.getFileData(fileId)
async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
"""Extract content from ChatDocument using prompt"""
try:
# ChatDocument is just a reference, so we need to get file data using fileId
if not hasattr(document, 'fileId') or not document.fileId:
logger.error(f"Document {document.id} has no fileId")
raise ValueError("Document has no fileId")
# Get file data from service center using document's fileId
fileData = self.getFileData(document.fileId)
if not fileData:
logger.error(f"No file data found for fileId: {document.fileId}")
raise ValueError("No file data found for document")
# Get fileName and mime type from document properties
try:
fileName = document.fileName
mimeType = document.mimeType
except Exception as e:
# Try to diagnose and recover the issue
diagnosis = self._diagnoseDocumentAccess(document)
logger.error(f"Critical error: Cannot access document properties for document {document.id}. Diagnosis: {diagnosis}")
# Attempt recovery
if self._recoverDocumentAccess(document):
try:
fileName = document.fileName
mimeType = document.mimeType
logger.info(f"Document access recovered for {document.id} - proceeding with AI extraction")
except Exception as recovery_error:
logger.error(f"Recovery failed for document {document.id}: {str(recovery_error)}")
raise RuntimeError(f"Document {document.id} properties are permanently inaccessible after recovery attempt - cannot proceed with AI extraction: {str(recovery_error)}")
else:
# Recovery failed - don't continue with invalid data
raise RuntimeError(f"Document {document.id} properties are inaccessible and recovery failed. Diagnosis: {diagnosis}")
# Process with document processor directly
extractedContent = await self.documentProcessor.processFileData(
fileData=fileData,
fileName=fileName,
mimeType=mimeType,
base64Encoded=False,
prompt=prompt,
documentId=document.id
)
# Note: ExtractedContent model only has 'id' and 'contents' fields
# No need to set objectId or objectType as they don't exist in the model
return extractedContent
except Exception as e:
logger.error(f"Error extracting from document: {str(e)}")
raise
def _diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
"""
Diagnose document access issues and provide recovery information.
This method helps identify why document properties are inaccessible.
"""
try:
diagnosis = {
'document_id': document.id,
'file_id': document.fileId,
'has_component_interface': document._componentInterface is not None,
'component_interface_type': type(document._componentInterface).__name__ if document._componentInterface else None,
'file_exists': False,
'file_info': None,
'error_details': None
}
# Check if component interface is set
if not document._componentInterface:
diagnosis['error_details'] = "Component interface not set - document cannot access file system"
return diagnosis
# Try to access the file directly
try:
file_info = self.interfaceComponent.getFile(document.fileId)
if file_info:
diagnosis['file_exists'] = True
diagnosis['file_info'] = {
'fileName': file_info.fileName if hasattr(file_info, 'fileName') else 'N/A',
'fileSize': file_info.fileSize if hasattr(file_info, 'fileSize') else 'N/A',
'mimeType': file_info.mimeType if hasattr(file_info, 'mimeType') else 'N/A'
}
else:
diagnosis['error_details'] = f"File with ID {document.fileId} not found in component interface"
except Exception as e:
diagnosis['error_details'] = f"Error accessing file {document.fileId}: {str(e)}"
return diagnosis
except Exception as e:
return {
'document_id': document.id if hasattr(document, 'id') else 'unknown',
'file_id': document.fileId if hasattr(document, 'fileId') else 'unknown',
'error_details': f"Error during diagnosis: {str(e)}"
}
def _recoverDocumentAccess(self, document: ChatDocument) -> bool:
"""
Attempt to recover document access by re-setting the component interface.
Returns True if recovery was successful.
"""
try:
logger.info(f"Attempting to recover document access for document {document.id}")
# Re-set the component interface
document.setComponentInterface(self.interfaceComponent)
# Test if we can now access the fileName
try:
test_fileName = document.fileName
logger.info(f"Document access recovered for {document.id} -> {test_fileName}")
return True
except Exception as e:
logger.error(f"Document access recovery failed for {document.id}: {str(e)}")
return False
except Exception as e:
logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
return False
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> ChatDocument:
"""Create document with file in one step - handles file creation internally"""
# Convert content to bytes based on base64 flag
if base64encoded:
import base64
content_bytes = base64.b64decode(content)
else:
content_bytes = content.encode('utf-8')
# Create the file (hash and size are computed inside interfaceComponent)
file_item = self.interfaceComponent.createFile(
name=fileName,
mimeType=mimeType,
content=content_bytes
)
# Then store the file data
self.interfaceComponent.createFileData(file_item.id, content_bytes)
# Get file info to copy attributes
file_info = self.getFileInfo(file_item.id)
if not file_info:
logger.error(f"Could not get file info for fileId: {file_item.id}")
raise ValueError(f"File info not found for fileId: {file_item.id}")
# Create document with all file attributes copied
document = ChatDocument(
id=str(uuid.uuid4()),
messageId=messageId or "", # Use provided messageId or empty string as fallback
fileId=file_item.id,
fileName=file_info.get("fileName", fileName),
fileSize=file_info.get("size", 0),
mimeType=file_info.get("mimeType", mimeType)
)
return document
def calculateObjectSize(self, obj: Any) -> int:
"""
Calculate the size of an object in bytes.
Args:
obj: Object to calculate size for
Returns:
int: Size in bytes
"""
try:
import json
import sys
if obj is None:
return 0
# Convert object to JSON string and calculate size
json_str = json.dumps(obj, ensure_ascii=False, default=str)
return len(json_str.encode('utf-8'))
except Exception as e:
logger.error(f"Error calculating object size: {str(e)}")
return 0
def getWorkflowContext(self) -> Dict[str, int]:
"""Get current workflow context for document generation"""
try:
return {
'currentRound': self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 0,
'currentTask': self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 0,
'currentAction': self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 0
}
except Exception as e:
logger.error(f"Error getting workflow context: {str(e)}")
return {'currentRound': 0, 'currentTask': 0, 'currentAction': 0}
def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
"""Set current workflow context for document generation and routing"""
try:
# Prepare update data
update_data = {}
if round_number is not None:
self.workflow.currentRound = round_number
update_data["currentRound"] = round_number
if task_number is not None:
self.workflow.currentTask = task_number
update_data["currentTask"] = task_number
if action_number is not None:
self.workflow.currentAction = action_number
update_data["currentAction"] = action_number
# Persist changes to database if any updates were made
if update_data:
self.interfaceChat.updateWorkflow(self.workflow.id, update_data)
logger.debug(f"Updated workflow context: Round {self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 'N/A'}, Task {self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 'N/A'}, Action {self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 'N/A'}")
except Exception as e:
logger.error(f"Error setting workflow context: {str(e)}")
def getWorkflowStats(self) -> Dict[str, Any]:
"""Get comprehensive workflow statistics including current context"""
try:
workflow_context = self.getWorkflowContext()
return {
'currentRound': workflow_context['currentRound'],
'currentTask': workflow_context['currentTask'],
'currentAction': workflow_context['currentAction'],
'totalTasks': self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 0,
'totalActions': self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 0,
'workflowStatus': self.workflow.status if hasattr(self.workflow, 'status') else 'unknown',
'workflowId': self.workflow.id if hasattr(self.workflow, 'id') else 'unknown'
}
except Exception as e:
logger.error(f"Error getting workflow stats: {str(e)}")
return {
'currentRound': 0,
'currentTask': 0,
'currentAction': 0,
'totalTasks': 0,
'totalActions': 0,
'workflowStatus': 'unknown',
'workflowId': 'unknown'
}

View file

@ -0,0 +1,120 @@
import logging
from typing import Callable, Optional, Dict, Any
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from apscheduler.triggers.interval import IntervalTrigger
from zoneinfo import ZoneInfo
logger = logging.getLogger(__name__)
class EventManagement:
"""
Generic event scheduler wrapper around APScheduler's AsyncIOScheduler.
Features:
- start/stop lifecycle
- register timed events with either cron or interval style
- remove events by id
"""
def __init__(self, timezone: str = "Europe/Zurich"):
self._timezone = ZoneInfo(timezone)
self._scheduler: Optional[AsyncIOScheduler] = None
@property
def scheduler(self) -> AsyncIOScheduler:
if self._scheduler is None:
self._scheduler = AsyncIOScheduler(timezone=self._timezone)
return self._scheduler
def start(self) -> None:
if not self.scheduler.running:
self.scheduler.start()
logger.info("EventManagement scheduler started")
def stop(self) -> None:
if self._scheduler and self._scheduler.running:
try:
self._scheduler.shutdown(wait=False)
logger.info("EventManagement scheduler stopped")
except Exception as exc:
logger.error(f"Error stopping scheduler: {exc}")
def register_cron(
self,
job_id: str,
func: Callable,
*,
cron_kwargs: Optional[Dict[str, Any]] = None,
replace_existing: bool = True,
coalesce: bool = True,
max_instances: int = 1,
misfire_grace_time: int = 1800,
**kwargs: Any,
) -> None:
"""
Register a job using CronTrigger. Provide cron fields as keyword args, e.g.:
cron_kwargs={"minute": "0,20,40"}
"""
trigger = CronTrigger(timezone=self._timezone, **(cron_kwargs or {}))
self.scheduler.add_job(
func,
trigger,
id=job_id,
replace_existing=replace_existing,
coalesce=coalesce,
max_instances=max_instances,
misfire_grace_time=misfire_grace_time,
**kwargs,
)
logger.info(f"Registered cron job '{job_id}' with args {cron_kwargs}")
def register_interval(
self,
job_id: str,
func: Callable,
*,
seconds: Optional[int] = None,
minutes: Optional[int] = None,
hours: Optional[int] = None,
replace_existing: bool = True,
coalesce: bool = True,
max_instances: int = 1,
misfire_grace_time: int = 1800,
**kwargs: Any,
) -> None:
"""
Register a job using IntervalTrigger.
"""
trigger = IntervalTrigger(
seconds=seconds, minutes=minutes, hours=hours, timezone=self._timezone
)
self.scheduler.add_job(
func,
trigger,
id=job_id,
replace_existing=replace_existing,
coalesce=coalesce,
max_instances=max_instances,
misfire_grace_time=misfire_grace_time,
**kwargs,
)
logger.info(
f"Registered interval job '{job_id}' (h={hours}, m={minutes}, s={seconds})"
)
def remove(self, job_id: str) -> None:
try:
self.scheduler.remove_job(job_id)
logger.info(f"Removed job '{job_id}'")
except Exception as exc:
logger.warning(f"Could not remove job '{job_id}': {exc}")
# Singleton instance for easy import and reuse
eventManager = EventManagement()

View file

@ -20,6 +20,9 @@ class MethodAi(MethodBase):
super().__init__(service)
self.name = "ai"
self.description = "AI processing methods"
# Centralized services interface (for AI)
from modules.services import getInterface as getServices
self.services = getServices(self.service.user, self.service.workflow)
def _format_timestamp_for_filename(self) -> str:
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
@ -177,10 +180,43 @@ class MethodAi(MethodBase):
min_tokens_hint = "\n\nPlease ensure the response is substantial and complete."
call_prompt = enhanced_prompt + min_tokens_hint
if processingMode in ["advanced", "detailed"]:
result = await self.service.callAiTextAdvanced(call_prompt, context)
else:
result = await self.service.callAiTextBasic(call_prompt, context)
# Centralized AI call with optional document context
documents = []
try:
if documentList:
for d in (chatDocuments or []):
try:
file_data = self.service.getFileData(d.fileId)
documents.append(
ChatDocument(
fileData=file_data,
fileName=d.fileName,
mimeType=d.mimeType
)
)
except Exception:
continue
except Exception:
documents = None
output_format = output_extension.replace('.', '') or 'txt'
result = await self.services.ai.callAi(
prompt=call_prompt,
documents=documents or None,
options={
"process_type": "text",
"operation_type": "generate_content",
"priority": "quality" if processingMode in ["advanced", "detailed"] else "speed",
"compress_prompt": processingMode != "detailed",
"compress_documents": True,
"process_documents_individually": True,
"processing_mode": processingMode,
"result_format_requested": output_format,
"include_metadata": includeMetadata,
"max_cost": 0.05 if processingMode in ["advanced", "detailed"] else 0.02,
"max_processing_time": 45 if processingMode in ["advanced", "detailed"] else 20
}
)
# If expected JSON and too short/not JSON, retry with stricter JSON guardrails
if output_extension == ".json":
@ -207,7 +243,23 @@ class MethodAi(MethodBase):
"Include all requested fields with detailed content."
)
try:
result = await self.service.callAiTextAdvanced(guardrail_prompt, context)
result = await self.services.ai.callAi(
prompt=guardrail_prompt,
documents=context or None,
options={
"process_type": "text",
"operation_type": "generate_content",
"priority": "quality",
"compress_prompt": False,
"compress_documents": True,
"process_documents_individually": True,
"processing_mode": "detailed",
"result_format_requested": "json",
"include_metadata": False,
"max_cost": 0.03,
"max_processing_time": 30
}
)
except Exception:
result = cleaned # fallback to first attempt

View file

@ -22,6 +22,9 @@ class MethodDocument(MethodBase):
super().__init__(serviceCenter)
self.name = "document"
self.description = "Handle document operations like extraction and analysis"
# Centralized services interface (for AI)
from modules.services import getInterface as getServices
self.services = getServices(self.service.user, self.service.workflow)
def _format_timestamp_for_filename(self) -> str:
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
@ -530,7 +533,18 @@ class MethodDocument(MethodBase):
# Call AI to generate the formatted content
logger.info(f"Calling AI for {extension} format conversion")
formatted_content = await self.service.callAiTextBasic(ai_prompt, content)
formatted_content = await self.services.ai.callAi(
prompt=ai_prompt,
documents=None,
options={
"process_type": "text",
"operation_type": "generate_content",
"priority": "speed",
"compress_prompt": True,
"compress_documents": False,
"max_cost": 0.02
}
)
if not formatted_content or formatted_content.strip() == "":
logger.warning("AI format conversion failed, using fallback")
@ -751,7 +765,36 @@ SOURCE DOCUMENT CONTENT:
# Call AI to generate the report
logger.info(f"Generating AI report for {len(validDocuments)} documents")
aiReport = await self.service.callAiTextAdvanced(aiPrompt, combinedContent)
# Build ChatDocument list from chatDocuments
documents = []
try:
from modules.interfaces.interfaceChatModel import ChatDocument as ChatDoc
for d in validDocuments:
try:
data = self.service.getFileData(d.fileId) if hasattr(d, 'fileId') else None
if data:
documents.append(ChatDoc(fileData=data, fileName=d.fileName, mimeType=d.mimeType))
except Exception:
continue
except Exception:
documents = None
aiReport = await self.services.ai.callAi(
prompt=aiPrompt,
documents=documents or None,
options={
"process_type": "text",
"operation_type": "report_generation",
"priority": "quality",
"compress_prompt": False,
"compress_documents": True,
"process_documents_individually": True,
"result_format_requested": "html",
"include_metadata": includeMetadata,
"processing_mode": "detailed",
"max_cost": 0.08,
"max_processing_time": 90
}
)
# If AI call fails, return error - AI is crucial for report generation
if not aiReport or aiReport.strip() == "":

View file

@ -96,6 +96,9 @@ class MethodOutlook(MethodBase):
super().__init__(serviceCenter)
self.name = "outlook"
self.description = "Handle Microsoft Outlook email operations"
# Centralized services interface (for AI)
from modules.services import getInterface as getServices
self.services = getServices(self.service.user, self.service.workflow)
def _format_timestamp_for_filename(self) -> str:
"""Format current timestamp as YYYYMMDD-hhmmss for filenames."""
@ -116,8 +119,9 @@ class MethodOutlook(MethodBase):
logger.debug(f"Found connection: {userConnection.id}, status: {userConnection.status.value}, authority: {userConnection.authority.value}")
# Get the token for this specific connection
token = self.service.interfaceApp.getConnectionToken(userConnection.id)
# Get a fresh token for this specific connection
from modules.security.tokenManager import TokenManager
token = TokenManager().getFreshToken(self.service.interfaceApp, userConnection.id)
if not token:
logger.error(f"Token not found for connection: {userConnection.id}")
logger.debug(f"Connection details: {userConnection}")
@ -1605,7 +1609,36 @@ class MethodOutlook(MethodBase):
# Call AI to compose the email
try:
composed_email = await self.service.interfaceAiCalls.callAiTextAdvanced(ai_prompt)
# Centralized AI call for email composition with document context
documents = []
try:
if composition_documents:
from modules.interfaces.interfaceChatModel import ChatDocument as ChatDoc
for d in composition_documents:
try:
data = self.service.getFileData(d.fileId) if hasattr(d, 'fileId') else None
if data:
documents.append(ChatDoc(fileData=data, fileName=d.fileName, mimeType=d.mimeType))
except Exception:
continue
except Exception:
documents = None
composed_email = await self.services.ai.callAi(
prompt=ai_prompt,
documents=documents or None,
options={
"process_type": "text",
"operation_type": "email_composition",
"priority": "speed",
"compress_prompt": True,
"compress_documents": True,
"process_documents_individually": False,
"include_metadata": True,
"max_cost": 0.02,
"max_processing_time": 15
}
)
# Parse the AI response to ensure it's valid JSON
try:

View file

@ -48,8 +48,9 @@ class MethodSharepoint(MethodBase):
logger.warning(f"Connection {userConnection.id} status is not active/pending: {userConnection.status.value}")
return None
# Get the token for this specific connection
token = self.service.interfaceApp.getConnectionToken(userConnection.id)
# Get a fresh token for this specific connection
from modules.security.tokenManager import TokenManager
token = TokenManager().getFreshToken(self.service.interfaceApp, userConnection.id)
if not token:
logger.warning(f"No token found for connection {userConnection.id}")
return None

View file

@ -22,6 +22,9 @@ class MethodWeb(MethodBase):
super().__init__(serviceCenter)
self.name = "web"
self.description = "Web search, crawling, and scraping operations using Tavily"
# Centralized services interface (for AI)
from modules.services import getInterface as getServices
self.services = getServices(self.service.user, self.service.workflow)
@action
async def search(self, parameters: Dict[str, Any]) -> ActionResult:
@ -274,7 +277,21 @@ class MethodWeb(MethodBase):
"Return only bullet points without any preface."
)
context = content[:4000]
summary = await self.service.callAiTextBasic(prompt, context)
# Centralized AI summary (balanced analyse_content)
summary = await self.services.ai.callAi(
prompt=prompt,
documents=None,
options={
"process_type": "text",
"operation_type": "analyse_content",
"priority": "balanced",
"compress_prompt": True,
"compress_documents": False,
"processing_mode": "advanced",
"max_cost": 0.05,
"max_processing_time": 30
}
)
summary = summary.strip()
except Exception:
summary = ""

View file

@ -12,8 +12,8 @@ from modules.interfaces.interfaceChatModel import (
)
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.workflows._transfer.executionState import TaskExecutionState
from modules.workflows._transfer.promptFactory import (
from modules.workflows.processing.executionState import TaskExecutionState
from modules.workflows.processing.promptFactory import (
createTaskPlanningPrompt,
createActionDefinitionPrompt,
createResultReviewPrompt,
@ -21,7 +21,8 @@ from modules.workflows._transfer.promptFactory import (
createActionParameterPrompt,
createRefinementPrompt
)
from modules.services.serviceDocument.documentGeneration import DocumentGenerator
from modules.services.serviceDocument.mainServiceDocumentGeneration import DocumentGenerationService
from modules.workflows.processing.promptFactory import methods
import uuid
logger = logging.getLogger(__name__)
@ -31,13 +32,10 @@ class WorkflowStoppedException(Exception):
pass
class HandlingTasks:
def __init__(self, chatInterface, currentUser, workflow=None):
self.chatInterface = chatInterface
self.currentUser = currentUser
def __init__(self, services, workflow=None):
self.services = services
self.workflow = workflow
from modules.services.serviceCenter import ServiceCenter
self.service = ServiceCenter(currentUser, workflow)
self.documentGenerator = DocumentGenerator(self.service)
self.documentGenerator = DocumentGenerationService(self.services.center)
def _checkWorkflowStopped(self):
"""
@ -46,7 +44,7 @@ class HandlingTasks:
"""
try:
# Get the current workflow status from the database to avoid stale data
current_workflow = self.chatInterface.getWorkflow(self.service.workflow.id)
current_workflow = services.chatInterface.getWorkflow(self.service.workflow.id)
if current_workflow and current_workflow.status == "stopped":
logger.info("Workflow stopped by user, aborting execution")
raise WorkflowStoppedException("Workflow was stopped by user")
@ -113,9 +111,23 @@ class HandlingTasks:
# Log task planning prompt sent to AI
logger.info("=== TASK PLANNING PROMPT SENT TO AI ===")
# Trace task planning prompt
self.service.writeTraceLog("Task Plan Prompt", task_planning_prompt)
self.writeTraceLog("Task Plan Prompt", task_planning_prompt)
prompt = await self.service.callAiTextAdvanced(task_planning_prompt)
# Centralized AI call: Task planning (quality, detailed)
prompt = await self.services.ai.callAi(
prompt=task_planning_prompt,
documents=None,
options={
"process_type": "text",
"operation_type": "generate_plan",
"priority": "quality",
"compress_prompt": False,
"compress_documents": False,
"processing_mode": "detailed",
"max_cost": 0.10,
"max_processing_time": 30
}
)
# Check if AI response is valid
if not prompt:
@ -125,7 +137,7 @@ class HandlingTasks:
logger.info("=== TASK PLANNING AI RESPONSE RECEIVED ===")
logger.info(f"Response length: {len(prompt) if prompt else 0}")
# Trace task planning response
self.service.writeTraceLog("Task Plan Response", prompt)
self.writeTraceLog("Task Plan Response", prompt)
# Inline _parseTaskPlanResponse logic
try:
@ -251,7 +263,7 @@ class HandlingTasks:
"taskProgress": "pending"
}
message = self.chatInterface.createMessage(message_data)
message = services.chatInterface.createMessage(message_data)
if message:
workflow.messages.append(message)
@ -359,9 +371,23 @@ class HandlingTasks:
# Generate the action definition prompt
action_prompt = await createActionDefinitionPrompt(action_context, self.service)
# Trace action planning prompt
self.service.writeTraceLog("Action Plan Prompt", action_prompt)
self.writeTraceLog("Action Plan Prompt", action_prompt)
prompt = await self.service.callAiTextAdvanced(action_prompt)
# Centralized AI call: Action planning (quality, detailed)
prompt = await self.services.ai.callAi(
prompt=action_prompt,
documents=None,
options={
"process_type": "text",
"operation_type": "generate_plan",
"priority": "quality",
"compress_prompt": False,
"compress_documents": False,
"processing_mode": "detailed",
"max_cost": 0.10,
"max_processing_time": 30
}
)
# Check if AI response is valid
if not prompt:
@ -371,7 +397,7 @@ class HandlingTasks:
logger.info("=== ACTION PLAN AI RESPONSE RECEIVED ===")
logger.info(f"Response length: {len(prompt) if prompt else 0}")
# Trace action planning response
self.service.writeTraceLog("Action Plan Response", prompt)
self.writeTraceLog("Action Plan Response", prompt)
# Inline parseActionResponse logic here
json_start = prompt.find('{')
@ -438,9 +464,23 @@ class HandlingTasks:
async def plan_select(self, context: TaskContext) -> Dict[str, Any]:
"""Plan: select exactly one action. Returns {"action": {method, name}}"""
prompt = createActionSelectionPrompt(context, self.service)
self.service.writeTraceLog("React Plan Selection Prompt", prompt)
response = await self.service.callAiTextAdvanced(prompt)
self.service.writeTraceLog("React Plan Selection Response", response)
self.writeTraceLog("React Plan Selection Prompt", prompt)
# Centralized AI call for plan selection (use plan generation quality)
response = await self.services.ai.callAi(
prompt=prompt,
documents=None,
options={
"process_type": "text",
"operation_type": "generate_plan",
"priority": "quality",
"compress_prompt": False,
"compress_documents": False,
"processing_mode": "detailed",
"max_cost": 0.10,
"max_processing_time": 30
}
)
self.writeTraceLog("React Plan Selection Response", response)
json_start = response.find('{') if response else -1
json_end = response.rfind('}') + 1 if response else 0
if json_start == -1 or json_end == 0:
@ -454,9 +494,23 @@ class HandlingTasks:
"""Act: request minimal parameters then execute selected action."""
action = selection.get('action', {})
params_prompt = createActionParameterPrompt(context, action, self.service)
self.service.writeTraceLog("React Parameters Prompt", params_prompt)
params_resp = await self.service.callAiTextAdvanced(params_prompt)
self.service.writeTraceLog("React Parameters Response", params_resp)
self.writeTraceLog("React Parameters Prompt", params_prompt)
# Centralized AI call for parameter suggestion (balanced analysis)
params_resp = await self.services.ai.callAi(
prompt=params_prompt,
documents=None,
options={
"process_type": "text",
"operation_type": "analyse_content",
"priority": "balanced",
"compress_prompt": True,
"compress_documents": False,
"processing_mode": "advanced",
"max_cost": 0.05,
"max_processing_time": 30
}
)
self.writeTraceLog("React Parameters Response", params_resp)
js = params_resp[params_resp.find('{'):params_resp.rfind('}')+1] if params_resp else '{}'
try:
param_obj = json.loads(js)
@ -508,9 +562,23 @@ class HandlingTasks:
async def refine_decide(self, context: TaskContext, observation: Dict[str, Any]) -> Dict[str, Any]:
"""Refine: decide continue or stop, with reason"""
prompt = createRefinementPrompt(context, observation)
self.service.writeTraceLog("React Refinement Prompt", prompt)
resp = await self.service.callAiTextAdvanced(prompt)
self.service.writeTraceLog("React Refinement Response", resp)
self.writeTraceLog("React Refinement Prompt", prompt)
# Centralized AI call for refinement decision (balanced analysis)
resp = await self.services.ai.callAi(
prompt=prompt,
documents=None,
options={
"process_type": "text",
"operation_type": "analyse_content",
"priority": "balanced",
"compress_prompt": True,
"compress_documents": False,
"processing_mode": "advanced",
"max_cost": 0.05,
"max_processing_time": 30
}
)
self.writeTraceLog("React Refinement Response", resp)
js = resp[resp.find('{'):resp.rfind('}')+1] if resp else '{}'
try:
decision = json.loads(js)
@ -560,7 +628,7 @@ class HandlingTasks:
if task_step.userMessage:
task_start_message["message"] += f"\n\n💬 {task_step.userMessage}"
message = self.chatInterface.createMessage(task_start_message)
message = services.chatInterface.createMessage(task_start_message)
if message:
workflow.messages.append(message)
logger.info(f"Task start message created for task {task_index}")
@ -590,7 +658,7 @@ class HandlingTasks:
decision = await self.refine_decide(context, observation)
# Telemetry: simple duration per step
duration = time.time() - t0
self.chatInterface.createLog({
services.chatInterface.createLog({
"workflowId": workflow.id,
"message": f"react_step_duration_sec={duration:.3f}",
"type": "info"
@ -611,12 +679,12 @@ class HandlingTasks:
"actionNumber": step,
"actionProgress": "success" if result.success else "fail"
}
self.chatInterface.createMessage(msg)
services.chatInterface.createMessage(msg)
except Exception as e:
logger.error(f"React step {step} error: {e}")
break
from modules.workflows._transfer.executionState import should_continue
from modules.workflows.processing.executionState import should_continue
if not should_continue(observation, last_review_dict, step, state.max_steps):
break
step += 1
@ -709,7 +777,7 @@ class HandlingTasks:
"actionNumber": action_number
})
message = self.chatInterface.createMessage(action_start_message)
message = services.chatInterface.createMessage(action_start_message)
if message:
workflow.messages.append(message)
logger.info(f"Action start message created for action {action_number}")
@ -763,7 +831,7 @@ class HandlingTasks:
"taskProgress": "success"
}
message = self.chatInterface.createMessage(task_completion_message)
message = services.chatInterface.createMessage(task_completion_message)
if message:
workflow.messages.append(message)
logger.info(f"Task completion message created for task {task_index}")
@ -855,7 +923,7 @@ class HandlingTasks:
"taskProgress": "retry"
}
message = self.chatInterface.createMessage(retry_message)
message = services.chatInterface.createMessage(retry_message)
if message:
workflow.messages.append(message)
@ -908,7 +976,7 @@ class HandlingTasks:
}
try:
message = self.chatInterface.createMessage(message_data)
message = services.chatInterface.createMessage(message_data)
if message:
workflow.messages.append(message)
logger.info(f"Created user-facing retry message for failed task: {task_step.objective}")
@ -962,7 +1030,7 @@ class HandlingTasks:
}
try:
message = self.chatInterface.createMessage(message_data)
message = services.chatInterface.createMessage(message_data)
if message:
workflow.messages.append(message)
logger.info(f"Created user-facing error message for failed task: {task_step.objective}")
@ -1024,15 +1092,29 @@ class HandlingTasks:
logger.info(f"Action Results Count: {len(review_context.action_results) if review_context.action_results else 0}")
logger.info(f"Task Actions Count: {len(review_context.task_actions) if review_context.task_actions else 0}")
# Trace result review prompt
self.service.writeTraceLog("Result Review Prompt", prompt)
self.writeTraceLog("Result Review Prompt", prompt)
response = await self.service.callAiTextAdvanced(prompt)
# Centralized AI call: Result validation (balanced analysis)
response = await self.services.ai.callAi(
prompt=prompt,
documents=None,
options={
"process_type": "text",
"operation_type": "analyse_content",
"priority": "balanced",
"compress_prompt": True,
"compress_documents": False,
"processing_mode": "advanced",
"max_cost": 0.05,
"max_processing_time": 30
}
)
# Log result review response received
logger.info("=== RESULT REVIEW AI RESPONSE RECEIVED ===")
logger.info(f"Response length: {len(response) if response else 0}")
# Trace result review response
self.service.writeTraceLog("Result Review Response", response)
self.writeTraceLog("Result Review Response", response)
# Inline parseReviewResponse logic here
json_start = response.find('{')
@ -1169,10 +1251,10 @@ class HandlingTasks:
actionData["execParameters"] = {}
# Use generic field separation based on TaskAction model
simple_fields, object_fields = self.chatInterface._separate_object_fields(TaskAction, actionData)
simple_fields, object_fields = services.chatInterface._separate_object_fields(TaskAction, actionData)
# Create action in database
createdAction = self.chatInterface.db.recordCreate(TaskAction, simple_fields)
createdAction = services.chatInterface.db.recordCreate(TaskAction, simple_fields)
# Convert to TaskAction model
return TaskAction(
@ -1229,7 +1311,7 @@ class HandlingTasks:
# Check workflow status before executing the action
self._checkWorkflowStopped()
result = await self.service.executeAction(
result = await self.executeAction(
methodName=action.execMethod,
actionName=action.execAction,
parameters=enhanced_parameters
@ -1245,7 +1327,7 @@ class HandlingTasks:
"resultLabel": result_label,
"documentsCount": len(result.documents) if result.documents else 0
}
self.service.writeTraceLog("Action Result", action_result_trace)
self.writeTraceLog("Action Result", action_result_trace)
# Process documents from the action result
created_documents = []
@ -1276,7 +1358,7 @@ class HandlingTasks:
if created_documents:
message.documents = created_documents
# Update the message in the database
self.chatInterface.updateMessage(message.id, {"documents": [doc.dict() for doc in created_documents]})
services.chatInterface.updateMessage(message.id, {"documents": [doc.dict() for doc in created_documents]})
# Log action results
logger.info(f"Action completed successfully")
@ -1302,7 +1384,7 @@ class HandlingTasks:
message = await self.createActionMessage(action, result, workflow, result_label, [], task_step, task_index)
# Create database log entry for action failure
self.chatInterface.createLog({
services.chatInterface.createLog({
"workflowId": workflow.id,
"message": f"❌ **Task {task_num}**\n\n❌ **Action {action_num}/{total_actions}** failed: {result.error}",
"type": "error"
@ -1398,7 +1480,7 @@ class HandlingTasks:
logger.info(f"Creating ERROR message: {message_text}")
logger.info(f"Message data: {message_data}")
message = self.chatInterface.createMessage(message_data)
message = services.chatInterface.createMessage(message_data)
if message:
workflow.messages.append(message)
logger.info(f"Message created: {action.execMethod}.{action.execAction}")
@ -1558,7 +1640,7 @@ class HandlingTasks:
self.workflow.totalActions = 0
# Update in database
self.chatInterface.updateWorkflow(self.workflow.id, update_data)
services.chatInterface.updateWorkflow(self.workflow.id, update_data)
logger.info(f"Updated workflow {self.workflow.id} after task plan created: {update_data}")
except Exception as e:
@ -1582,7 +1664,7 @@ class HandlingTasks:
self.workflow.totalActions = 0
# Update in database
self.chatInterface.updateWorkflow(self.workflow.id, update_data)
services.chatInterface.updateWorkflow(self.workflow.id, update_data)
logger.info(f"Updated workflow {self.workflow.id} before executing task {task_number}: {update_data}")
except Exception as e:
@ -1602,7 +1684,7 @@ class HandlingTasks:
self.workflow.totalActions = total_actions
# Update in database
self.chatInterface.updateWorkflow(self.workflow.id, update_data)
services.chatInterface.updateWorkflow(self.workflow.id, update_data)
logger.info(f"Updated workflow {self.workflow.id} after action planning: {update_data}")
except Exception as e:
@ -1622,7 +1704,7 @@ class HandlingTasks:
self.workflow.currentAction = action_number
# Update in database
self.chatInterface.updateWorkflow(self.workflow.id, update_data)
services.chatInterface.updateWorkflow(self.workflow.id, update_data)
logger.info(f"Updated workflow {self.workflow.id} before executing action {action_number}: {update_data}")
except Exception as e:
@ -1643,7 +1725,7 @@ class HandlingTasks:
# Update workflow object in database if we have changes
if update_data:
self.chatInterface.updateWorkflow(self.workflow.id, update_data)
services.chatInterface.updateWorkflow(self.workflow.id, update_data)
logger.info(f"Updated workflow {self.workflow.id} totals in database: {update_data}")
logger.debug(f"Updated workflow totals: Tasks {self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 'N/A'}, Actions {self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 'N/A'}")
@ -1662,7 +1744,7 @@ class HandlingTasks:
self.workflow.status = 'ready'
# Update workflow object in database with reset values
self.chatInterface.updateWorkflow(self.workflow.id, {
services.chatInterface.updateWorkflow(self.workflow.id, {
"currentRound": 0,
"currentTask": 0,
"currentAction": 0,
@ -1673,4 +1755,104 @@ class HandlingTasks:
logger.info("Workflow reset for new session - all values set to initial state and updated in database")
except Exception as e:
logger.error(f"Error resetting workflow for new session: {str(e)}")
logger.error(f"Error resetting workflow for new session: {str(e)}")
# ===== Functions moved from serviceCenter =====
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
"""Execute a method action"""
try:
if methodName not in methods:
raise ValueError(f"Unknown method: {methodName}")
method = methods[methodName]
if actionName not in method['actions']:
raise ValueError(f"Unknown action: {actionName} for method {methodName}")
action = method['actions'][actionName]
# Execute the action
return await action['method'](parameters)
except Exception as e:
logger.error(f"Error executing method {methodName}.{actionName}: {str(e)}")
raise
def writeTraceLog(self, contextText: str, data: Any) -> None:
"""Write trace data to configured trace file if in debug mode"""
try:
import logging
import os
from datetime import datetime, UTC
from modules.shared.configuration import APP_CONFIG
# Only write if logger is in debug mode
if logger.level > logging.DEBUG:
return
# Get log directory from configuration
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
if not os.path.isabs(logDir):
# If relative path, make it relative to the gateway directory
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
logDir = os.path.join(gatewayDir, logDir)
# Ensure log directory exists
os.makedirs(logDir, exist_ok=True)
# Create trace file path
trace_file = os.path.join(logDir, "log_trace.log")
# Format the trace entry
timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
trace_entry = f"[{timestamp}] {contextText}\n"
# Add data if provided
if data is not None:
if isinstance(data, (dict, list)):
import json
trace_entry += f"Data: {json.dumps(data, indent=2, default=str)}\n"
else:
trace_entry += f"Data: {str(data)}\n"
trace_entry += "-" * 80 + "\n\n"
# Write to trace file
with open(trace_file, "a", encoding="utf-8") as f:
f.write(trace_entry)
except Exception as e:
# Don't log trace errors to avoid recursion
pass
def clearTraceLog(self) -> None:
"""Clear the trace log file"""
try:
import logging
import os
from modules.shared.configuration import APP_CONFIG
# Get log directory from configuration
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
if not os.path.isabs(logDir):
# If relative path, make it relative to the gateway directory
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
logDir = os.path.join(gatewayDir, logDir)
# Create trace file path
trace_file = os.path.join(logDir, "log_trace.log")
# Only clear if logger is in debug mode
if logger.level > logging.DEBUG:
# Delete file if not in debug mode
if os.path.exists(trace_file):
os.remove(trace_file)
return
# Create empty file if in debug mode
with open(trace_file, "w", encoding="utf-8") as f:
f.write("")
except Exception as e:
# Don't log trace errors to avoid recursion
pass

View file

@ -3,13 +3,177 @@
import json
import logging
import importlib
import pkgutil
import inspect
from typing import Any, Dict, List
from modules.interfaces.interfaceChatModel import TaskContext, ReviewContext
from modules.interfaces.interfaceChatModel import TaskContext, ReviewContext, ChatDocument, DocumentExchange
from modules.services.serviceDocument.documentUtility import getFileExtension
from modules.workflows.methods.methodBase import MethodBase
# Set up logger
logger = logging.getLogger(__name__)
# Global methods catalog - moved from serviceCenter
methods = {}
def _discoverMethods(service_center):
"""Dynamically discover all method classes and their actions in modules methods package"""
try:
# Import the methods package
methodsPackage = importlib.import_module('modules.workflows.methods')
# Discover all modules in the package
for _, name, isPkg in pkgutil.iter_modules(methodsPackage.__path__):
if not isPkg and name.startswith('method'):
try:
# Import the module
module = importlib.import_module(f'modules.workflows.methods.{name}')
# Find all classes in the module that inherit from MethodBase
for itemName, item in inspect.getmembers(module):
if (inspect.isclass(item) and
issubclass(item, MethodBase) and
item != MethodBase):
# Instantiate the method
methodInstance = item(service_center)
# Discover actions from public methods
actions = {}
for methodName, method in inspect.getmembers(type(methodInstance), predicate=inspect.iscoroutinefunction):
if not methodName.startswith('_'):
# Bind the method to the instance
bound_method = method.__get__(methodInstance, type(methodInstance))
sig = inspect.signature(method)
params = {}
for paramName, param in sig.parameters.items():
if paramName not in ['self']:
# Get parameter type
paramType = param.annotation if param.annotation != param.empty else Any
# Get parameter description from docstring or default
paramDesc = None
if param.default != param.empty and hasattr(param.default, '__doc__'):
paramDesc = param.default.__doc__
params[paramName] = {
'type': paramType,
'required': param.default == param.empty,
'description': paramDesc,
'default': param.default if param.default != param.empty else None
}
actions[methodName] = {
'description': method.__doc__ or '',
'parameters': params,
'method': bound_method
}
# Add method instance with discovered actions
methods[methodInstance.name] = {
'instance': methodInstance,
'description': methodInstance.description,
'actions': actions
}
logger.info(f"Discovered method: {methodInstance.name} with {len(actions)} actions")
except Exception as e:
logger.error(f"Error loading method module {name}: {str(e)}", exc_info=True)
except Exception as e:
logger.error(f"Error discovering methods: {str(e)}")
def getMethodsList(service_center) -> List[str]:
"""Get list of available methods with their signatures in the required format"""
# Initialize methods if not already done
if not methods:
_discoverMethods(service_center)
methodList = []
for methodName, method in methods.items():
methodInstance = method['instance']
for actionName, action in method['actions'].items():
# Use the new signature format from MethodBase
signature = methodInstance.getActionSignature(actionName)
if signature:
methodList.append(signature)
return methodList
def getEnhancedDocumentContext(service_center) -> str:
"""Get enhanced document context formatted for action planning prompts with proper docList and docItem references"""
try:
document_list = service_center.getDocumentReferenceList()
# Build technical context string for AI action planning
context = "AVAILABLE DOCUMENTS:\n\n"
# Process chat exchanges (current round)
if document_list["chat"]:
context += "CURRENT ROUND DOCUMENTS:\n"
for exchange in document_list["chat"]:
# Generate docList reference for the exchange (using message ID and label)
# Find the message that corresponds to this exchange
message_id = None
for message in service_center.workflow.messages:
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange.documentsLabel:
message_id = message.id
break
if message_id:
doc_list_ref = f"docList:{message_id}:{exchange.documentsLabel}"
else:
# Fallback to label-only format if message ID not found
doc_list_ref = f"docList:{exchange.documentsLabel}"
logger.debug(f"Using document label for action planning: {exchange.documentsLabel} (message_id: {message_id})")
context += f"- {doc_list_ref} contains:\n"
# Generate docItem references for each document in the list
for doc_ref in exchange.documents:
if doc_ref.startswith("docItem:"):
context += f" - {doc_ref}\n"
else:
# Convert to proper docItem format if needed
context += f" - docItem:{doc_ref}\n"
context += "\n"
# Process history exchanges (previous rounds)
if document_list["history"]:
context += "WORKFLOW HISTORY DOCUMENTS:\n"
for exchange in document_list["history"]:
# Generate docList reference for the exchange (using message ID and label)
# Find the message that corresponds to this exchange
message_id = None
for message in service_center.workflow.messages:
if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange.documentsLabel:
message_id = message.id
break
if message_id:
doc_list_ref = f"docList:{message_id}:{exchange.documentsLabel}"
else:
# Fallback to label-only format if message ID not found
doc_list_ref = f"docList:{exchange.documentsLabel}"
logger.debug(f"Using history document label for action planning: {exchange.documentsLabel} (message_id: {message_id})")
context += f"- {doc_list_ref} contains:\n"
# Generate docItem references for each document in the list
for doc_ref in exchange.documents:
if doc_ref.startswith("docItem:"):
context += f" - {doc_ref}\n"
else:
# Convert to proper docItem format if needed
context += f" - docItem:{doc_ref}\n"
context += "\n"
if not document_list["chat"] and not document_list["history"]:
context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
return context
except Exception as e:
logger.error(f"Error generating enhanced document context: {str(e)}")
return "NO DOCUMENTS AVAILABLE - Error generating document context."
# Prompt creation helpers
def _getAvailableDocuments(workflow) -> str:
@ -275,7 +439,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
async def createActionDefinitionPrompt(context: TaskContext, service) -> str:
"""Create enhanced prompt for action generation with user-friendly messages and enhanced document context"""
methodList = service.getMethodsList()
methodList = getMethodsList(service)
method_actions = {}
for sig in methodList:
if '.' in sig:
@ -283,10 +447,10 @@ async def createActionDefinitionPrompt(context: TaskContext, service) -> str:
action = rest.split('(')[0]
method_actions.setdefault(method, []).append((action, sig))
messageSummary = await service.summarizeChat(context.workflow.messages) if context.workflow else ""
messageSummary = await service.methodService.summarizeChat(context.workflow.messages) if context.workflow else ""
# Get enhanced document context using the new method
available_documents_str = service.getEnhancedDocumentContext()
available_documents_str = getEnhancedDocumentContext(service)
# Get available documents and connections using generic functions
available_docs_summary = _getAvailableDocuments(context.workflow)
@ -299,7 +463,7 @@ async def createActionDefinitionPrompt(context: TaskContext, service) -> str:
for method, actions in method_actions.items():
available_methods_json[method] = {}
# Get the method instance for accessing docstrings
method_instance = service.methods.get(method, {}).get('instance') if hasattr(service, 'methods') else None
method_instance = methods.get(method, {}).get('instance') if methods else None
for action, sig in actions:
# Parse the signature to extract parameters
@ -405,7 +569,7 @@ Previous review feedback:
user_language = service.user.language if service and service.user else 'en'
# Get current workflow context for dynamic examples
workflow_context = service.getWorkflowContext()
workflow_context = service.methodService.getWorkflowContext()
current_round = workflow_context.get('currentRound', 0)
current_task = workflow_context.get('currentTask', 1)
@ -730,7 +894,7 @@ def createResultReviewPrompt(context: ReviewContext, service) -> str:
document_validation_summary += f" - No documents produced\n"
# Get enhanced document context using the new method
document_context = service.getEnhancedDocumentContext()
document_context = getEnhancedDocumentContext(service)
# Get user language from service
user_language = service.user.language if service and service.user else 'en'
@ -837,7 +1001,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
def _build_tiny_catalog(service) -> str:
"""Return minimal tool catalog: method -> { action -> [paramNames] }"""
try:
method_signatures = service.getMethodsList()
method_signatures = getMethodsList(service)
except Exception:
method_signatures = []
catalog: Dict[str, Dict[str, List[str]]] = {}
@ -890,8 +1054,8 @@ def createActionParameterPrompt(context: TaskContext, selected_action: Dict[str,
# Get action signature from service center
action_signature = ""
if service and hasattr(service, 'methods') and method in service.methods:
method_instance = service.methods[method]['instance']
if service and method in methods:
method_instance = methods[method]['instance']
action_signature = method_instance.getActionSignature(name)
return f"""Provide only the required parameters for this action.

View file

@ -6,20 +6,20 @@ import asyncio
from modules.interfaces.interfaceAppObjects import User
from modules.interfaces.interfaceChatModel import (UserInputRequest, ChatMessage, ChatWorkflow, TaskItem, TaskStatus)
from modules.interfaces.interfaceChatModel import (UserInputRequest, ChatMessage, ChatWorkflow, TaskItem, TaskStatus, ChatDocument)
from modules.interfaces.interfaceChatObjects import ChatObjects
from modules.workflows._transfer.handlingTasks import HandlingTasks, WorkflowStoppedException
from modules.workflows.processing.handlingTasks import HandlingTasks, WorkflowStoppedException
from modules.interfaces.interfaceChatModel import WorkflowResult
from modules.shared.timezoneUtils import get_utc_timestamp
import uuid
logger = logging.getLogger(__name__)
class WorkflowManager:
"""Manager for workflow processing and coordination"""
def __init__(self, chatInterface: ChatObjects, currentUser: User):
self.chatInterface = chatInterface
self.currentUser = currentUser
def __init__(self, services):
self.services = services
self.handlingTasks = None
# Exported functions
@ -32,19 +32,22 @@ class WorkflowManager:
currentTime = get_utc_timestamp()
if workflowId:
workflow = self.chatInterface.getWorkflow(workflowId)
workflow = self.services.getWorkflow(workflowId)
if not workflow:
raise ValueError(f"Workflow {workflowId} not found")
# Add workflow to services
self.services.workflow = workflow
if workflow.status == "running":
logger.info(f"Stopping running workflow {workflowId} before processing new prompt")
workflow.status = "stopped"
workflow.lastActivity = currentTime
self.chatInterface.updateWorkflow(workflowId, {
self.services.updateWorkflow(workflowId, {
"status": "stopped",
"lastActivity": currentTime
})
self.chatInterface.createLog({
self.services.createLog({
"workflowId": workflowId,
"message": "Workflow stopped for new prompt",
"type": "info",
@ -54,17 +57,17 @@ class WorkflowManager:
await asyncio.sleep(0.1)
newRound = workflow.currentRound + 1
self.chatInterface.updateWorkflow(workflowId, {
self.services.updateWorkflow(workflowId, {
"status": "running",
"lastActivity": currentTime,
"currentRound": newRound
})
workflow = self.chatInterface.getWorkflow(workflowId)
workflow = self.services.getWorkflow(workflowId)
if not workflow:
raise ValueError(f"Failed to reload workflow {workflowId} after update")
self.chatInterface.createLog({
self.services.createLog({
"workflowId": workflowId,
"message": f"Workflow resumed (round {workflow.currentRound})",
"type": "info",
@ -82,7 +85,7 @@ class WorkflowManager:
"currentAction": 0,
"totalTasks": 0,
"totalActions": 0,
"mandateId": self.chatInterface.mandateId,
"mandateId": self.services.mandateId,
"messageIds": [],
"workflowMode": workflowMode,
"maxSteps": 5 if workflowMode == "React" else 1, # Set maxSteps for React mode
@ -96,12 +99,15 @@ class WorkflowManager:
}
}
workflow = self.chatInterface.createWorkflow(workflowData)
workflow = self.services.createWorkflow(workflowData)
logger.info(f"Created workflow with mode: {getattr(workflow, 'workflowMode', 'NOT_SET')}")
logger.info(f"Workflow data passed: {workflowData.get('workflowMode', 'NOT_IN_DATA')}")
workflow.currentRound = 1
self.chatInterface.updateWorkflow(workflow.id, {"currentRound": 1})
self.chatInterface.updateWorkflowStats(workflow.id, bytesSent=0, bytesReceived=0)
self.services.updateWorkflow(workflow.id, {"currentRound": 1})
self.services.updateWorkflowStats(workflow.id, bytesSent=0, bytesReceived=0)
# Add workflow to services
self.services.workflow = workflow
# Start workflow processing asynchronously
asyncio.create_task(self._workflowProcess(userInput, workflow))
@ -114,17 +120,17 @@ class WorkflowManager:
async def workflowStop(self, workflowId: str) -> ChatWorkflow:
"""Stops a running workflow."""
try:
workflow = self.chatInterface.getWorkflow(workflowId)
workflow = self.services.getWorkflow(workflowId)
if not workflow:
raise ValueError(f"Workflow {workflowId} not found")
workflow.status = "stopped"
workflow.lastActivity = get_utc_timestamp()
self.chatInterface.updateWorkflow(workflowId, {
self.services.updateWorkflow(workflowId, {
"status": "stopped",
"lastActivity": workflow.lastActivity
})
self.chatInterface.createLog({
self.services.createLog({
"workflowId": workflowId,
"message": "Workflow stopped",
"type": "warning",
@ -141,8 +147,7 @@ class WorkflowManager:
async def _workflowProcess(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> None:
"""Process a workflow with user input"""
try:
self.handlingTasks = HandlingTasks(self.chatInterface, self.currentUser, workflow)
self.handlingTasks.service.setUserLanguage(userInput.userLanguage)
self.handlingTasks = HandlingTasks(self.services, workflow)
message = await self._sendFirstMessage(userInput, workflow)
task_plan = await self._planTasks(userInput, workflow)
workflow_result = await self._executeTasks(task_plan, workflow)
@ -187,20 +192,20 @@ class WorkflowManager:
}
# Create message first to get messageId
message = self.chatInterface.createMessage(messageData)
message = self.services.createMessage(messageData)
if message:
workflow.messages.append(message)
# Clear trace log for new workflow session
self.handlingTasks.service.clearTraceLog()
self.handlingTasks.clearTraceLog()
# Add documents if any, now with messageId
if userInput.listFileId:
# Process file IDs and add to message data
documents = await self.handlingTasks.service.processFileIds(userInput.listFileId, message.id)
documents = await self._processFileIds(userInput.listFileId, message.id)
message.documents = documents
# Update the message with documents in database
self.chatInterface.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]})
self.services.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]})
return message
else:
@ -302,14 +307,14 @@ class WorkflowManager:
"taskProgress": "stopped",
"actionProgress": "stopped"
}
message = self.chatInterface.createMessage(stopped_message)
message = self.services.createMessage(stopped_message)
if message:
workflow.messages.append(message)
# Update workflow status to stopped
workflow.status = "stopped"
workflow.lastActivity = get_utc_timestamp()
self.chatInterface.updateWorkflow(workflow.id, {
self.services.updateWorkflow(workflow.id, {
"status": "stopped",
"lastActivity": workflow.lastActivity
})
@ -334,14 +339,14 @@ class WorkflowManager:
"taskProgress": "stopped",
"actionProgress": "stopped"
}
message = self.chatInterface.createMessage(stopped_message)
message = self.services.createMessage(stopped_message)
if message:
workflow.messages.append(message)
# Update workflow status to stopped
workflow.status = "stopped"
workflow.lastActivity = get_utc_timestamp()
self.chatInterface.updateWorkflow(workflow.id, {
self.services.updateWorkflow(workflow.id, {
"status": "stopped",
"lastActivity": workflow.lastActivity,
"totalTasks": workflow.totalTasks,
@ -349,7 +354,7 @@ class WorkflowManager:
})
# Add stopped log entry
self.chatInterface.createLog({
self.services.createLog({
"workflowId": workflow.id,
"message": "Workflow stopped by user",
"type": "warning",
@ -376,14 +381,14 @@ class WorkflowManager:
"taskProgress": "fail",
"actionProgress": "fail"
}
message = self.chatInterface.createMessage(error_message)
message = self.services.createMessage(error_message)
if message:
workflow.messages.append(message)
# Update workflow status to failed
workflow.status = "failed"
workflow.lastActivity = get_utc_timestamp()
self.chatInterface.updateWorkflow(workflow.id, {
self.services.updateWorkflow(workflow.id, {
"status": "failed",
"lastActivity": workflow.lastActivity,
"totalTasks": workflow.totalTasks,
@ -391,7 +396,7 @@ class WorkflowManager:
})
# Add failed log entry
self.chatInterface.createLog({
self.services.createLog({
"workflowId": workflow.id,
"message": f"Workflow failed: {workflow_result.error or 'Unknown error'}",
"type": "error",
@ -423,14 +428,14 @@ class WorkflowManager:
"taskProgress": "fail",
"actionProgress": "fail"
}
message = self.chatInterface.createMessage(error_message)
message = self.services.createMessage(error_message)
if message:
workflow.messages.append(message)
# Update workflow status to failed
workflow.status = "failed"
workflow.lastActivity = get_utc_timestamp()
self.chatInterface.updateWorkflow(workflow.id, {
self.services.updateWorkflow(workflow.id, {
"status": "failed",
"lastActivity": workflow.lastActivity,
"totalTasks": workflow.totalTasks,
@ -468,7 +473,7 @@ class WorkflowManager:
}
# Create message using interface
message = self.chatInterface.createMessage(messageData)
message = self.services.createMessage(messageData)
if message:
workflow.messages.append(message)
@ -477,13 +482,13 @@ class WorkflowManager:
workflow.lastActivity = get_utc_timestamp()
# Update workflow in database
self.chatInterface.updateWorkflow(workflow.id, {
self.services.updateWorkflow(workflow.id, {
"status": "completed",
"lastActivity": workflow.lastActivity
})
# Add completion log entry
self.chatInterface.createLog({
self.services.createLog({
"workflowId": workflow.id,
"message": "Workflow completed",
"type": "success",
@ -529,7 +534,7 @@ class WorkflowManager:
# Update workflow status to stopped
workflow.status = "stopped"
workflow.lastActivity = get_utc_timestamp()
self.chatInterface.updateWorkflow(workflow.id, {
self.services.updateWorkflow(workflow.id, {
"status": "stopped",
"lastActivity": workflow.lastActivity,
"totalTasks": workflow.totalTasks,
@ -554,12 +559,12 @@ class WorkflowManager:
"taskProgress": "pending",
"actionProgress": "pending"
}
message = self.chatInterface.createMessage(stopped_message)
message = self.services.createMessage(stopped_message)
if message:
workflow.messages.append(message)
# Add log entry
self.chatInterface.createLog({
self.services.createLog({
"workflowId": workflow.id,
"message": "Workflow stopped by user",
"type": "warning",
@ -574,7 +579,7 @@ class WorkflowManager:
# Update workflow status to failed
workflow.status = "failed"
workflow.lastActivity = get_utc_timestamp()
self.chatInterface.updateWorkflow(workflow.id, {
self.services.updateWorkflow(workflow.id, {
"status": "failed",
"lastActivity": workflow.lastActivity,
"totalTasks": workflow.totalTasks,
@ -599,12 +604,12 @@ class WorkflowManager:
"taskProgress": "fail",
"actionProgress": "fail"
}
message = self.chatInterface.createMessage(error_message)
message = self.services.createMessage(error_message)
if message:
workflow.messages.append(message)
# Add error log entry
self.chatInterface.createLog({
self.services.createLog({
"workflowId": workflow.id,
"message": f"Workflow failed: {str(error)}",
"type": "error",
@ -613,3 +618,32 @@ class WorkflowManager:
})
raise
async def _processFileIds(self, fileIds: List[str], messageId: str = None) -> List[ChatDocument]:
"""Process file IDs from existing files and return ChatDocument objects"""
documents = []
for fileId in fileIds:
try:
# Get file info from service
fileInfo = self.handlingTasks.service.methodService.getFileInfo(fileId)
if fileInfo:
# Create document directly with all file attributes
document = ChatDocument(
id=str(uuid.uuid4()),
messageId=messageId or "", # Use provided messageId or empty string as fallback
fileId=fileId,
fileName=fileInfo.get("fileName", "unknown"),
fileSize=fileInfo.get("size", 0),
mimeType=fileInfo.get("mimeType", "application/octet-stream")
)
documents.append(document)
logger.info(f"Processed file ID {fileId} -> {document.fileName}")
else:
logger.warning(f"No file info found for file ID {fileId}")
except Exception as e:
logger.error(f"Error processing file ID {fileId}: {str(e)}")
return documents
def _setUserLanguage(self, language: str) -> None:
"""Set user language for the service center"""
self.handlingTasks.service.user.language = language

View file

@ -13,7 +13,7 @@ def parse_line(line: str) -> Tuple[Optional[str], Optional[str], Optional[dateti
Extract (logger, function, timestamp) from a log line.
Expected format examples (single line):
2025-09-18 16:35:04 - INFO - modules.workflows._transfer.handlingTasks - Task 1 - Starting action 3/4 - D:\\Athi\\...\\handlingTasks.py:572 - executeTask
2025-09-18 16:35:04 - INFO - modules.workflows.processing.handlingTasks - Task 1 - Starting action 3/4 - D:\\Athi\\...\\handlingTasks.py:572 - executeTask
Returns (logger, function, timestamp_dt) or (None, None, None) if not matched.
"""