cleaned key handling and security

This commit is contained in:
ValueOn AG 2025-09-22 00:39:15 +02:00
parent 875b188238
commit 168d66d167
51 changed files with 2468 additions and 2119 deletions

96
app.py
View file

@ -8,19 +8,79 @@ from zoneinfo import ZoneInfo
import logging
from logging.handlers import RotatingFileHandler
from datetime import timedelta
from datetime import timedelta, datetime
import pathlib
from modules.shared.configuration import APP_CONFIG
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
class DailyRotatingFileHandler(RotatingFileHandler):
"""
A rotating file handler that automatically switches to a new file when the date changes.
The log file name includes the current date and switches at midnight.
"""
def __init__(self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs):
self.log_dir = log_dir
self.filename_prefix = filename_prefix
self.current_date = None
self.current_file = None
# Initialize with today's file
self._update_file_if_needed()
# Call parent constructor with current file
super().__init__(self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs)
def _update_file_if_needed(self):
"""Update the log file if the date has changed"""
today = datetime.now().strftime("%Y%m%d")
if self.current_date != today:
self.current_date = today
new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log")
if self.current_file != new_file:
self.current_file = new_file
return True
return False
def emit(self, record):
"""Emit a log record, switching files if date has changed"""
# Check if we need to switch to a new file
if self._update_file_if_needed():
# Close current file and open new one
if self.stream:
self.stream.close()
self.stream = None
# Update the baseFilename for the parent class
self.baseFilename = self.current_file
# Reopen the stream
if not self.delay:
self.stream = self._open()
# Call parent emit method
super().emit(record)
def initLogging():
"""Initialize logging with configuration from APP_CONFIG"""
# Get log level from config (default to INFO if not found)
logLevelName = APP_CONFIG.get("APP_LOGGING_LOG_LEVEL", "WARNING")
logLevel = getattr(logging, logLevelName)
# Get log directory from config
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
if not os.path.isabs(logDir):
# If relative path, make it relative to the gateway directory
gatewayDir = os.path.dirname(os.path.abspath(__file__))
logDir = os.path.join(gatewayDir, logDir)
# Ensure log directory exists
os.makedirs(logDir, exist_ok=True)
# Create formatters - using single line format
consoleFormatter = logging.Formatter(
fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
@ -89,25 +149,15 @@ def initLogging():
# Add file handler if enabled
if APP_CONFIG.get("APP_LOGGING_FILE_ENABLED", True):
# Get log file path and ensure it's absolute
logFile = APP_CONFIG.get("APP_LOGGING_LOG_FILE", "app.log")
if not os.path.isabs(logFile):
# If relative path, make it relative to the gateway directory
gatewayDir = os.path.dirname(os.path.abspath(__file__))
logFile = os.path.join(gatewayDir, logFile)
# Ensure log directory exists
logDir = os.path.dirname(logFile)
if logDir:
os.makedirs(logDir, exist_ok=True)
# Create daily application log file with automatic date switching
rotationSize = int(APP_CONFIG.get("APP_LOGGING_ROTATION_SIZE", 10485760)) # Default: 10MB
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
fileHandler = RotatingFileHandler(
logFile,
maxBytes=rotationSize,
backupCount=backupCount
fileHandler = DailyRotatingFileHandler(
log_dir=logDir,
filename_prefix="log_app",
max_bytes=rotationSize,
backup_count=backupCount
)
fileHandler.setFormatter(fileFormatter)
fileHandler.addFilter(ChromeDevToolsFilter())
@ -133,7 +183,15 @@ def initLogging():
# Log the current logging configuration
logger = logging.getLogger(__name__)
logger.info(f"Logging initialized with level {logLevelName}")
logger.info(f"Log file: {logFile if APP_CONFIG.get('APP_LOGGING_FILE_ENABLED', True) else 'disabled'}")
logger.info(f"Log directory: {logDir}")
if APP_CONFIG.get('APP_LOGGING_FILE_ENABLED', True):
today = datetime.now().strftime("%Y%m%d")
appLogFile = os.path.join(logDir, f"log_app_{today}.log")
logger.info(f"Application log file: {appLogFile} (auto-switches daily)")
else:
logger.info("Application log file: disabled")
logger.info(f"Console logging: {'enabled' if APP_CONFIG.get('APP_LOGGING_CONSOLE_ENABLED', True) else 'disabled'}")
# Initialize logging
@ -154,7 +212,7 @@ async def lifespan(app: FastAPI):
# Setup APScheduler for JIRA sync
scheduler = AsyncIOScheduler(timezone=ZoneInfo("Europe/Zurich"))
try:
from modules.services.serviceDeltaSync import perform_sync_jira_delta_group
from modules.features.featureSyncDelta import perform_sync_jira_delta_group
# Schedule sync every 20 minutes (at minutes 00, 20, 40)
scheduler.add_job(
perform_sync_jira_delta_group,

View file

@ -5,21 +5,6 @@
Auth_ALGORITHM = HS256
Auth_TOKEN_TYPE = bearer
# OpenAI configuration
Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions
Connector_AiOpenai_API_SECRET = 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
Connector_AiOpenai_MODEL_NAME = gpt-4o
Connector_AiOpenai_TEMPERATURE = 0.2
Connector_AiOpenai_MAX_TOKENS = 2000
# Anthropic configuration
Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages
Connector_AiAnthropic_API_SECRET_OLD = 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
Connector_AiAnthropic_API_SECRET = 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
Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
Connector_AiAnthropic_TEMPERATURE = 0.2
Connector_AiAnthropic_MAX_TOKENS = 2000
# File management configuration
File_Management_MAX_UPLOAD_SIZE_MB = 50
File_Management_CLEANUP_INTERVAL = 240
@ -36,33 +21,6 @@ Security_LOCK_DURATION_MINUTES = 30
# Content Neutralization configuration
Content_Neutralization_ENABLED = False
# Agent Mail configuration
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_CLIENT_SECRET = Kxf8Q~2lJIteZ~JaI32kMf1lfaWKATqxXiNiFbzV
Service_MSFT_TENANT_ID = common
# Google Service configuration
Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_CLIENT_SECRET = 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
# Tavily Web Search configuration
Connector_WebTavily_API_KEY = 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
# Google Cloud Speech Services configuration
Connector_GoogleSpeech_API_KEY = {
"type": "service_account",
"project_id": "poweronid",
"private_key_id": "88db66e4248326e9baeac4231bc196fd46a9a441",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDTnJuxA+xBL3LA\nPgFILYCsGuppkkdO6d153Q36f2jTj6zpH3OhKMVsaaTBknG2o2+D0Whlk6Yh5rOw\nkWzpMC3y81leRLm5kucERMkBUgd2GL4v16k6m+QGuC3BFlt/XeyuckJNW0V6v/Dy\n3+bSYM7/5o1ftPNWJeAIEWoE/V4wKCYde8RE4Vp1LO5YwhgcM4rRuPmF2OhekpA+\npteYwkY/8/gTTRpZIc8OTsBYRbaMwsjoDj5riuL3boVtkwZwKRb+ZLvupXeU7Ds7\n1305odTcZUwnImHiHfuq83ZJViQiLRNhUAFnQIXPrYLwEpCmzRBGzYHaRlb69ga/\nzqUbKnclAgMBAAECggEAH6W9qHehubioPMAJM7Y6bC2KU/JLNS4csBZd+idb52gG\nwBwIEFjR+H4ZjymhAA4+pe7c4h7MKyh0RI/l7eoFX98Cb+rEq/r1udm1BhGH3s2h\n2UiI8qRQh1YRjF2/nrN5VjhDBOFa6W9opaopZy/l8AzsT8f21zIgPen8z8o6GpFg\n64fJFcbqCGk2ykN2+x2pIOT04tmCszrfbXZP8LEs4xrUB/XwlHL1vT/M3EWIKbnj\njDaIMjw7q/KRgNUvmKS6SU9b3fnOLcQCz9f5cKdiWACKIU/UvuiWhWJ9ou6BWLWU\nva1A6Fi4XJjhW7s3po58/ioQfl0A9p/L92lGg4ST8QKBgQDx8LIM1g0dh9Ql6LmH\nBUGCOewNNXTs+y3ZznUfvVMoyyZK5w/pzeUvkmOwzbRGnZJ9WyCghq8aezyEpo2D\nPL7Odf988IeHmvhyZIM4PLJYgDvSwGXyf/gh6gJkf/4wpx+tx/yQYNBm3Rht7sA0\npSaLehK0E0kW1uyBzHGKgyQOhwKBgQDf6LiZ7hSQqh54vIU1XMDRth0UOo/s/HGi\nDoij29KjmHjLkm8vOlCo83e79X0WhcnyB5kM7nWFegwcM1PJ0Dl8gidUuTlOVDtM\n5u2AaxDoyXAUL457U5dGFAIW+R653ZDkzMfCglacP8HixXEyIpL1cTLqiCAgzszS\nLcSWwoAr8wKBgQC4CGm3X97sFpTmHSd6sCHLaDnJNl9xoAKZifUHpqCqCBVhpm8x\nXp+11vmj1GULzfJPDlE8Khbp4tH+6R39tOhC7fjgVaoSGWxgv1odHfZfYXOf9R/X\nHUZmrbUSM1XsNkPfkZ7pR+teQ1HA1Xo40WMHd1zgw0a2a9fNR/EZ9nUn4wKBgGaK\nUEgGNRrPHadTRnnaoV8o1IZYD2OLdIqvtzm7SOqsv90SkaKCRUAqR5InaYKwAHy7\nqAa5Cc73xqX/h4arujff7x0ouiq5/nJIa0ndPmAtKAvGf6zQ6j0ompBkxAKAioON\nmInmYL2roSI2I5G/LagDkDrB3lzH+Brk5NvZ9RKrAoGAGox462GGGb/NbGdDkahN\ndifzYYvq4FPiWFFo0ynKAulxCBWLXO/N45XNuAyen433d8eREcAYz1Dzax44+MdQ\nHo9dU7YcZvFyt6iZsYeQF8dluHui3vzMpUe0KbqpZC5KMOSw53ZdNIwzo8NTAK59\n+uv3dHGj7sS8fhDo3yCifzc=\n-----END PRIVATE KEY-----\n",
"client_email": "poweron-voice-services@poweronid.iam.gserviceaccount.com",
"client_id": "116641749406798186404",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/poweron-voice-services%40poweronid.iam.gserviceaccount.com",
"universe_domain": "googleapis.com"
}
# Web Search configuration
Web_Search_MAX_QUERY_LENGTH = 400
Web_Search_MAX_RESULTS = 20

View file

@ -4,51 +4,31 @@
APP_ENV_TYPE = dev
APP_ENV_LABEL = Development Instance Patrick
APP_API_URL = http://localhost:8000
# Database Configuration for Application
# JSON File Storage (current)
# DB_APP_HOST=D:/Temp/_powerondb
# DB_APP_DATABASE=app
# DB_APP_USER=dev_user
# DB_APP_PASSWORD_SECRET=dev_password
APP_KEY_SYSVAR = D:/Athi/Local/Web/poweron/local/key.txt
# PostgreSQL Storage (new)
DB_APP_HOST=localhost
DB_APP_DATABASE=poweron_app_dev
DB_APP_DATABASE=poweron_app
DB_APP_USER=poweron_dev
DB_APP_PASSWORD_SECRET=dev_password
DB_APP_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNU2ZKVk41bU5HbmJOREJ6ZmZ1cTcwZ3ZXQlcxY0dTcjVTUEgxemlRVmtUYWlmWXdicW1JcDFUQkRHamFZVUJSUlg4ZTlHaWZIUGhzVUUtTEFiYkxZeXN6NEtrSjZubjFzN0g2OG5SdjdnQm89
DB_APP_PORT=5432
# Database Configuration Chat
# JSON File Storage (current)
# DB_CHAT_HOST=D:/Temp/_powerondb
# DB_CHAT_DATABASE=chat
# DB_CHAT_USER=dev_user
# DB_CHAT_PASSWORD_SECRET=dev_password
# PostgreSQL Storage (new)
DB_CHAT_HOST=localhost
DB_CHAT_DATABASE=poweron_chat_dev
DB_CHAT_DATABASE=poweron_chat
DB_CHAT_USER=poweron_dev
DB_CHAT_PASSWORD_SECRET=dev_password
DB_CHAT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNZk1fUE1Pa01QM1c0WDdaTnQ0ZWRhTExmZm5iR2R0SEZlMDI2VmJvQ2Nrc0RDY1Z3NG9CSVJucUxkX1B4Qk45bkxvN05XYmZXY1NGa2gtWWxuaFg5bmFnR3d0ZmdYS1A5V2xSeFFYTm5ialE9
DB_CHAT_PORT=5432
# Database Configuration Management
# JSON File Storage (current)
# DB_MANAGEMENT_HOST=D:/Temp/_powerondb
# DB_MANAGEMENT_DATABASE=management
# DB_MANAGEMENT_USER=dev_user
# DB_MANAGEMENT_PASSWORD_SECRET=dev_password
# PostgreSQL Storage (new)
DB_MANAGEMENT_HOST=localhost
DB_MANAGEMENT_DATABASE=poweron_management_dev
DB_MANAGEMENT_DATABASE=poweron_management
DB_MANAGEMENT_USER=poweron_dev
DB_MANAGEMENT_PASSWORD_SECRET=dev_password
DB_MANAGEMENT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNVFd0WkdsZGlLYjcxOUpaM2szUGZyWkZseHBCM1JaYm5fMnJNQ1hVLUIwVVlMaTAtZlBkZ0hsTVM5eXVjZkoxamdmWU00dUU5TEs5Zzlhd0RXYVJGR2twV2hLbjFoN2RsUkVjSGd5NExqV1U9
DB_MANAGEMENT_PORT=5432
# Security Configuration
APP_JWT_SECRET_SECRET=rotated_jwt_secret_2025_09_17_f8a3b6c2-7d4e-45b6-9a1f-3c0b9a1d2e7f
APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNMnRzSGtvR1Uyd1RmVm01MTJUTkFlQVRYVHJNVmVhSEpaY2k4YTdIUUtvalhLXzJaeDJVQkhlRHZ2MnExR2k4b09ScnF5U2xubnZtWmRUNmx1b2c4bmItbmdMWmc2eVU2X1pFVmE0UzR0d0xzOG52SkVlSi1uZGZoYVdqMGN3Y0tIVUR1bGtyLW9hNEdRemwtSlJJc1RGbWxJdlpxdHhtMldJTjRDWTE4MFhjPQ==
APP_TOKEN_EXPIRY=300
# CORS Configuration
@ -56,7 +36,7 @@ APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
APP_LOGGING_LOG_FILE = poweron.log
APP_LOGGING_LOG_DIR = D:/Athi/Local/Web/poweron/local/logs
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
APP_LOGGING_CONSOLE_ENABLED = True
@ -67,3 +47,32 @@ APP_LOGGING_BACKUP_COUNT = 5
# Service Redirects
Service_MSFT_REDIRECT_URI = http://localhost:8000/api/msft/auth/callback
Service_GOOGLE_REDIRECT_URI = http://localhost:8000/api/google/auth/callback
# OpenAI configuration
Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions
Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNdndUSzRSc0l6UGdRYzNkVlJWZjF1ZG1Id3RwTFhPRnBkX3BhN0NlMHk0a2NkQmk2bmhnemNwY1FtanFEemZUd21zcVFYUTRGWUhpeTlOSEgyUWdZVVBneTYxT2RZQTEyZk1XQ3Y5MDhDd3JnMXRwbVVfaVpDOWF2TDU3Mjl2YURvR0daLW92dDdmUktkQ2VOei0tdHdBPT0=
Connector_AiOpenai_MODEL_NAME = gpt-4o
Connector_AiOpenai_TEMPERATURE = 0.2
Connector_AiOpenai_MAX_TOKENS = 2000
# Anthropic configuration
Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages
Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNRW9tR094QlB6bU9na1lGc0RIYzZOX3g5ZGh4dC1NaXZnUExFWDhnWURQdmNRTi1vc2F6RExGZTFZRU5BUjVjV1NTb3hURS1UY1NYdVhBUVRPemptZXZIclRhOG8wLVkxTGc4R01RTG95THFET2ZJRGlSeWMzcVdwejdVcjIyR0VoUzRaVUsyLVVsQ0sxckxoc2MwWmFfSjBvOTNMaGtCajFpRGpqYm5Sc0Zud08xb2dWdXhOYzQ3ZXZySUNrRVZmYnpyQ0tQdjNjbVExelA2UXNzOENzQT09
Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
Connector_AiAnthropic_TEMPERATURE = 0.2
Connector_AiAnthropic_MAX_TOKENS = 2000
# Agent Mail configuration
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNay04TEUzbmRHR29jNWp0Nm1MZEtjUkNKRVVTU1p0QUNHMC1vWHpFcTR5eHNDMDBYbnVzRWpEdWVQeE1FRkJDMGlWRWNXZHZfc3M5aG1UdmRYd1J0cElWZGY0aVZ1OWNUMndZTWNXNm9fQ0hCemNwMWdUQW9ya0owOEVUMG1kLUk=
Service_MSFT_TENANT_ID = common
# Google Service configuration
Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNOVNWNjV1SXllM1ZnWVRmQTdXZmY1YnBjXzl6Q2lKR3R6SzA4SHFvWU96QWVyWG4wc2tLaGQ2SkVOM0tNMUpXaHNNTjEyOWRGeWVtSjdycHBOSjFlRU5XWVFKV0o5Z2l3THU5SHJLaHJXZC1ST1FGdVhwdXBaMFFmQ0lzUmplQmo=
# Tavily Web Search configuration
Connector_WebTavily_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNSEJ2YmVieFRaWk5yR1kwVzJ2ajlvTVVZN3dzV2pBT25nTmRsa0NXZEM2eHhqRXhBZ19VMTlFWkQ4ZzlnTUY1M0h0SUpWenZLR3JtZDBVOXZuT1JFV3UxMkJCdjZ2YjB1cE1jYlBOVzZsSHVXa19kcTNiVzZIRUZFdVZCeXJ5YUQ=
# Google Cloud Speech Services configuration
Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNSFFITGlUVzF3NE5Ldk10d3o5MS10Q2o4aEJGM250WF9CeWxFQVNaNHBhMk1hS3E5YXRrakh5dmx0VDJuZ3BsWGVMTC0tbU9wWFRWZWM1N25ibWpkeF84enJ1Y2ViMVd1V0plUWdxN3VId1VRUzBhN3MzLVBkSXEwM1BHT2Z2c3JBalh6eXVKMUNFX2pfbGdGYUg1ZUFfcXhSRnJyT0tzdWVVdG1HSHBZOUgwLUVPMVQ4YkZUc3dMcFlLWjRxQUM1X05OWm5ndmJGcjFETV9UM1FoLWt2RVVEem92UGhvZlRFXzNxOVRzQkhyV0hqeTRWQXdMdDVDbEMwOWFkTnV3UXpsYWZwRENaRzd4QjlwTjJUWHhHLVZPTzd1eXNhSWh5ajNwelgxSDRlNUx0N05yTlI1N1RjSzdIZGhFLXBOMjEwMkxsT0daSVhiWVpQZUtfNVdwdGVrazVMM2NkUGZPOHBuNjM3YXdFcGFPdlVtY01ReGhsVENwNnRvNGhJejNHd3hFOFA0bWgxalFFNDVoQ2xYTG5VN1dDZGhndEdWRlFjYzBRMUgwbzRfS2N3VVgyaXJpYmJfZzNadmx5cTFxS2Vja1I5Qm1UT0hDM1FuNk5JRmYtT2p3RWp2SWxTWGZuU1psOUN4NEJTOHkweWIzY2NjbTJRZG5oRjVxNGh4LTUwZE1zZi1zLU43Ulk4UGtmR0N6dU5RcVVvRF9DQlE5Sk1FR1YtOE84WnVuTDlOUHhQR1JLT2g0VkNIT2ctWTBuMXIwNHhSSjcxNnNWRFhQc18zSm1UR1M0Mm54TGxsRG5uX2tDSWhBNDRGaHFObkhuVmtnVVlQU1FhVWhTdnpGUDRfcDQ1OWpERklHMmN5Y0RVWC1JYlItTUozaWY1dmxZUW12NXAtUEtsQWpqUFk4NzFwWVNfSUNqeDNkc25wMnJHN3c5NTB1dmxmUFZfU0NWS1hQMTc1NmdOTmEyREZRVXB0cmlyaldkT3B0Q3FQMFdpdWQ3WU1RZDZKYlFneDdnQ2NWWHFHSXl1c2xRN21LbDdyUGFUcWFxeVVTOWoxSkVJaFZiUHI2VFBHWEdvM2Q1cXdIVGYyc3Y2cVdRd00ydHdrME8tcDVqSmNLV193R291VElTNWFNa2pMQi1zX21VdnZ1R0tTbEJndndvbWRrVE52eW1aTFFzRURtdGItc3FJeXJDenVTWTlIZ0E1eG1yX2N1SHJSUWIxdm8wakdzaDIyaDQ0cE9UdDlhclp2MzVVamQ2em0zbmdLUzBJa1ZaRFpQaTBnZGpTWnRhRGZxUVNZWDg5VDFndWFmZlZnVG5SUEhlWkpfQnREWS0xbEZfNXd5OUpEUkZHa1NZNWtPbnBadFFialgzazlyM0dTb3ctR2x5LUozT3VDc3F1Tk5TbGN2MnRRS1hTb1gzWUNVSlJuUl85azhxaGxCMzVNQUQzVGg1cDZHalRaOUFrM1JPSGJKaGlKRTAwbnV4TmxIZnhkMF9FODVKUk1GZGlWZk1ScnhmQnJXWmRxMTk3SWhIdnBjSVJJOElkalRUWXFRTFNvQXZpdFpFOUdDWkhHOTRLVmN2cEh0X2JpYjNvRjhvUHFVQVNQdXY4OWxQSWNvcUNfZW5HYy10dEFicldhRHZLS1ktY2RGczQta2lGWXkxb2RhNUZMNExabWx0dXdhR3BSWGpSYVUxRXJZVTNBYmdNVFd5NW1vY2s0T0RlV3hqZjNSMHhJakY1TDBackV5bmM2V1o2SEJlT3RSbnpPR0VXbmhQTUtPMzYyU1RjbFRmQUlWTUZjVGRheXBuekZJN3NNZVFFZ3JHenNnOFdQVWxsbFBoYTVvQUd1NGx2SDdYcGhrdUpSWlRIRWVVUkpxdjJSZV9zb0J3N3o4QnRpYXpTRHdkZ1pqSWswSjdJMjVEZDZUNzZuWDVXWkNxUDRtQ1p1dnk2ZEx0S0NKT2ZUc3B5eEdRdEpnTlZQMkt5OHFjQ3FfcHpzUFZEY3Z5WDdEQkt4cEN2MFg2eXF4bDZFeHZFWk5tMFpUR0xDZi1JVjN4eUtRaXlNXzBJUFV2N19MVTRhMWtxWnd6d0Y2bVNFQUJSdEU5Z01FTjEtZDJmWkpEYUlsTVJnTEJYdU1iVFoySEttd3libURrSUNJelVic2Mzb0t5ZzNDX0hjZUtfOFQ1QkxRWmx2dmhnbDhNZllla1dNa0Y5akVpNDRKdHRSUU9fTE9sYVUzdzZtTkJEYTBWdkxkRURSa01TOGxWcVZkUmxkWTA1QjJjS1pOUjJEQTZxeDdSVXhNWldXbnE1V1J2STVCNkt2VHRuNEdtaHUweWdEbUZyMlhWd09FWWI0UUFyQVpUeDE3QXdfQkMtcjdpUU5GUTQzUEczNWg1Wm5rVEgwRW11RFowVnFxYnpGNUYwYks1Y3JPbTdUc2ZXS1ZfYzdhcno3U1ZXZUVkblRoOVl5XzZpTUgwRXFZeFd6NXdqTGlvNm1QeXgxS2ZFTVJSV1JVejliWFBVRGU1MWVudEZzRDFwSW94YlU1Y3JmallsVldXcHdvTmFQdnU5UE0tNHNHMXhPWE1JQUxCNC1WVVRJNmNJcTM3a1dUWWwzSVptTFg3OXlWLWxITkdiR0MyTmRzRWFOeHBMZEVzbms3RC1MTFo1TVhKeURhUW9peHk1bHhJbHphVzR4RmxiUkJwcmkzcWZ3S3dWV0Jkb2VaZ3pMTXdUNUJmZjZfVEVXeDFNMnBvemM0TUJNeUQ2SE1aeWczc0V6M0NUMHFGdURMbTRka3AzZ1d1TUh2V1c5RzBKQVVlTEstWEthOTdaWUZHTlRHaVNmbEFJRFU3M0l2TWlBNF9kaFpJUXlxMHJYa2lxOGFRbDNqMTA1RDFFclFTcGxmb0g2WVI3Z0NrLWN4cUNzNWVuR2VMaE41dWRqMnR5eWNuM0gwUmIwcTFEQ09qbmJCUFIwbjM4MGF6TlhxQWpKOFZXWGNKdnl2Wi1zU1BsZU5NYWpsbzVKMGxTLUJKckd6enJnZWhXemstenN3NGNqUk9HeGlGaFNhSl83TlUzLTVZWW9zYVZZTTZzSjNfd3JkVDNaZVp4dk1GQVMxblJBRW1BWUZLU1VKUFkyQ1dPbndUNjYwdll2U0JxN1FQNk5OaGVYR3U5TXdGNGFVZGVXcS1tS2dwbVc1V3hEeXhVNkJ2cjdGX2FpY1NvOTJhcWFyOUVGOFpOdmd0R29Rb2RIaU01R05LeWRxUE00WlhOQVlMbkZxZDNyUFRXdUFGZ0lOUmp2RzIyaDlzMGxNQk40VzFzYjAwMEhjRVlrNWJ5cFhpVWYxQkxYQ25rUDJ3RTY1VlVFLThiNG1nY1hkdnZTMGoyVlN6dkJleFhndDNCODhlOVl1ZHBkci1hd3l0NGNXeWZ6aUp4S3pHS1c4aDM3WElBTjBwYlNSbmJoMk5SNF81VVNqd0dXY1JUejVsZnpGS1Z5dHFPNUVVM1I5eGhjblZjMV9idFJkc3NZaUdHRlIzQWJQdHhzT01qVW8xUUwxNHZmY3Q1aHBnNHhXTGRjb1BmTmM2X0NmdkpxNS1JMHNQNVg1N0xsd0pmdE8wNktkUGpuX0F3LURyaGhyajg3eWNDdkozUFZIYmpJTTZ3WWVCVFZUd1AtRklFUUxTNXkzalpfdlc4VE1tOHU1Q0MtUWdLbEdYRzdVU1RkM3gyeEY3eXBWLUhXVVo4VkZoUHVkakJPNk0tNTJKTU1JZjVISlR3SmJBQkVhRW51UHg3UjBOMVRPRnF2dzIwRkgxczBBUWZpemFFMzFTeDJfWHZhSkhsTzBhcFIzVmZRODEzRUl1b1ZDUGFqYUxjN2JsbkhYdHVPT00yYlUwbmpVbkU0RkJXbWx5UVFJdHNvNUdxQzMyQnQycDJpMjlnd2xwb3huRUJiZUg5dkhaMjhMV2R5T0NsU0N4WjdBX2ZfODhOdTZOZ0x6WlRIUGI3MzR1ZkJicHN6NzUzRzlsUmVkNlR6MjZjTTA3c290Qzh4ejRiWERHbmFtV1BQV2ZKb2pGU0F1OGsySG9hNHdtSkkxTWpwV2gyaVpWcFpsRWs5a0hSY3UzMk4wQ0dkZWtMbG4xOFZ6TXdEOXBob3I0NjNkT28tZk5IcW5FUkg4YnBtUVFLY1Q5M1lzYzhrRGZOaDF6SnpnejRuM1Y3SW1xMUJmLXpJdEM0UjNHU0t5OEhoamxxLXRmWmtyOS1ud09XeGFzc3VFXzNPWWNGcXFwdHN2cVFEZ0dWdUNKbF9Lc3d6dVhPb3NLMlNEaW1xd3JPLUViYV9GTnNRPT0=

View file

@ -4,6 +4,7 @@
APP_ENV_TYPE = int
APP_ENV_LABEL = Integration Instance
APP_API_URL = https://gateway-int.poweron-center.net
APP_KEY_SYSVAR = CONFIG_KEY
# PostgreSQL Storage (new)
DB_APP_HOST=gateway-int-server.postgres.database.azure.com
@ -27,7 +28,7 @@ DB_MANAGEMENT_PASSWORD_SECRET=VkAjgECESbEVQ$Tu
DB_MANAGEMENT_PORT=5432
# Security Configuration
APP_JWT_SECRET_SECRET=rotated_jwt_secret_2025_09_17_2c5f8e7a-1b3d-49c7-ae5d-9f0a2c3d4b5e
APP_JWT_KEY_SECRET=rotated_jwt_secret_2025_09_17_2c5f8e7a-1b3d-49c7-ae5d-9f0a2c3d4b5e
APP_TOKEN_EXPIRY=300
# CORS Configuration
@ -35,7 +36,7 @@ APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
APP_LOGGING_LOG_FILE = /home/site/wwwroot/poweron.log
APP_LOGGING_LOG_DIR = /home/site/wwwroot/
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
APP_LOGGING_CONSOLE_ENABLED = True
@ -46,3 +47,45 @@ APP_LOGGING_BACKUP_COUNT = 5
# Service Redirects
Service_MSFT_REDIRECT_URI = https://gateway-int.poweron-center.net/api/msft/auth/callback
Service_GOOGLE_REDIRECT_URI = https://gateway-int.poweron-center.net/api/google/auth/callback
# OpenAI configuration
Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions
Connector_AiOpenai_API_SECRET = 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
Connector_AiOpenai_MODEL_NAME = gpt-4o
Connector_AiOpenai_TEMPERATURE = 0.2
Connector_AiOpenai_MAX_TOKENS = 2000
# Anthropic configuration
Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages
Connector_AiAnthropic_API_SECRET = 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
Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
Connector_AiAnthropic_TEMPERATURE = 0.2
Connector_AiAnthropic_MAX_TOKENS = 2000
# Agent Mail configuration
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_CLIENT_SECRET = Kxf8Q~2lJIteZ~JaI32kMf1lfaWKATqxXiNiFbzV
Service_MSFT_TENANT_ID = common
# Google Service configuration
Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_CLIENT_SECRET = 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
# Tavily Web Search configuration
Connector_WebTavily_API_KEY_SECRET = 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
# Google Cloud Speech Services configuration
Connector_GoogleSpeech_API_KEY_SECRET = {
"type": "service_account",
"project_id": "poweronid",
"private_key_id": "88db66e4248326e9baeac4231bc196fd46a9a441",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDTnJuxA+xBL3LA\nPgFILYCsGuppkkdO6d153Q36f2jTj6zpH3OhKMVsaaTBknG2o2+D0Whlk6Yh5rOw\nkWzpMC3y81leRLm5kucERMkBUgd2GL4v16k6m+QGuC3BFlt/XeyuckJNW0V6v/Dy\n3+bSYM7/5o1ftPNWJeAIEWoE/V4wKCYde8RE4Vp1LO5YwhgcM4rRuPmF2OhekpA+\npteYwkY/8/gTTRpZIc8OTsBYRbaMwsjoDj5riuL3boVtkwZwKRb+ZLvupXeU7Ds7\n1305odTcZUwnImHiHfuq83ZJViQiLRNhUAFnQIXPrYLwEpCmzRBGzYHaRlb69ga/\nzqUbKnclAgMBAAECggEAH6W9qHehubioPMAJM7Y6bC2KU/JLNS4csBZd+idb52gG\nwBwIEFjR+H4ZjymhAA4+pe7c4h7MKyh0RI/l7eoFX98Cb+rEq/r1udm1BhGH3s2h\n2UiI8qRQh1YRjF2/nrN5VjhDBOFa6W9opaopZy/l8AzsT8f21zIgPen8z8o6GpFg\n64fJFcbqCGk2ykN2+x2pIOT04tmCszrfbXZP8LEs4xrUB/XwlHL1vT/M3EWIKbnj\njDaIMjw7q/KRgNUvmKS6SU9b3fnOLcQCz9f5cKdiWACKIU/UvuiWhWJ9ou6BWLWU\nva1A6Fi4XJjhW7s3po58/ioQfl0A9p/L92lGg4ST8QKBgQDx8LIM1g0dh9Ql6LmH\nBUGCOewNNXTs+y3ZznUfvVMoyyZK5w/pzeUvkmOwzbRGnZJ9WyCghq8aezyEpo2D\nPL7Odf988IeHmvhyZIM4PLJYgDvSwGXyf/gh6gJkf/4wpx+tx/yQYNBm3Rht7sA0\npSaLehK0E0kW1uyBzHGKgyQOhwKBgQDf6LiZ7hSQqh54vIU1XMDRth0UOo/s/HGi\nDoij29KjmHjLkm8vOlCo83e79X0WhcnyB5kM7nWFegwcM1PJ0Dl8gidUuTlOVDtM\n5u2AaxDoyXAUL457U5dGFAIW+R653ZDkzMfCglacP8HixXEyIpL1cTLqiCAgzszS\nLcSWwoAr8wKBgQC4CGm3X97sFpTmHSd6sCHLaDnJNl9xoAKZifUHpqCqCBVhpm8x\nXp+11vmj1GULzfJPDlE8Khbp4tH+6R39tOhC7fjgVaoSGWxgv1odHfZfYXOf9R/X\nHUZmrbUSM1XsNkPfkZ7pR+teQ1HA1Xo40WMHd1zgw0a2a9fNR/EZ9nUn4wKBgGaK\nUEgGNRrPHadTRnnaoV8o1IZYD2OLdIqvtzm7SOqsv90SkaKCRUAqR5InaYKwAHy7\nqAa5Cc73xqX/h4arujff7x0ouiq5/nJIa0ndPmAtKAvGf6zQ6j0ompBkxAKAioON\nmInmYL2roSI2I5G/LagDkDrB3lzH+Brk5NvZ9RKrAoGAGox462GGGb/NbGdDkahN\ndifzYYvq4FPiWFFo0ynKAulxCBWLXO/N45XNuAyen433d8eREcAYz1Dzax44+MdQ\nHo9dU7YcZvFyt6iZsYeQF8dluHui3vzMpUe0KbqpZC5KMOSw53ZdNIwzo8NTAK59\n+uv3dHGj7sS8fhDo3yCifzc=\n-----END PRIVATE KEY-----\n",
"client_email": "poweron-voice-services@poweronid.iam.gserviceaccount.com",
"client_id": "116641749406798186404",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/poweron-voice-services%40poweronid.iam.gserviceaccount.com",
"universe_domain": "googleapis.com"
}

View file

@ -4,6 +4,7 @@
APP_ENV_TYPE = prod
APP_ENV_LABEL = Production Instance
APP_API_URL = https://gateway.poweron-center.net
APP_KEY_SYSVAR = CONFIG_KEY
# PostgreSQL Storage (new)
DB_APP_HOST=gateway-prod-server.postgres.database.azure.com
@ -27,7 +28,7 @@ DB_MANAGEMENT_PASSWORD_SECRET=prod_password_very_secure.2025
DB_MANAGEMENT_PORT=5432
# Security Configuration
APP_JWT_SECRET_SECRET=rotated_jwt_secret_2025_09_17_prod_e1a9c4d7-6b8f-4f2e-9c1a-7e3d2a1b9c5f
APP_JWT_KEY_SECRET=rotated_jwt_secret_2025_09_17_prod_e1a9c4d7-6b8f-4f2e-9c1a-7e3d2a1b9c5f
APP_TOKEN_EXPIRY=300
# CORS Configuration
@ -35,7 +36,7 @@ APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,
# Logging configuration
APP_LOGGING_LOG_LEVEL = DEBUG
APP_LOGGING_LOG_FILE = /home/site/wwwroot/poweron.log
APP_LOGGING_LOG_DIR = /home/site/wwwroot/
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
APP_LOGGING_CONSOLE_ENABLED = True
@ -46,3 +47,44 @@ APP_LOGGING_BACKUP_COUNT = 5
# Service Redirects
Service_MSFT_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/msft/auth/callback
Service_GOOGLE_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/google/auth/callback
# OpenAI configuration
Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions
Connector_AiOpenai_API_SECRET = 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
Connector_AiOpenai_MODEL_NAME = gpt-4o
Connector_AiOpenai_TEMPERATURE = 0.2
Connector_AiOpenai_MAX_TOKENS = 2000
# Anthropic configuration
Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages
Connector_AiAnthropic_API_SECRET = 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
Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
Connector_AiAnthropic_TEMPERATURE = 0.2
Connector_AiAnthropic_MAX_TOKENS = 2000
# Agent Mail configuration
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
Service_MSFT_CLIENT_SECRET = Kxf8Q~2lJIteZ~JaI32kMf1lfaWKATqxXiNiFbzV
Service_MSFT_TENANT_ID = common
# Google Service configuration
Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
Service_GOOGLE_CLIENT_SECRET = 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
# Tavily Web Search configuration
Connector_WebTavily_API_KEY_SECRET = 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
# Google Cloud Speech Services configuration
Connector_GoogleSpeech_API_KEY_SECRET = {
"type": "service_account",
"project_id": "poweronid",
"private_key_id": "88db66e4248326e9baeac4231bc196fd46a9a441",
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDTnJuxA+xBL3LA\nPgFILYCsGuppkkdO6d153Q36f2jTj6zpH3OhKMVsaaTBknG2o2+D0Whlk6Yh5rOw\nkWzpMC3y81leRLm5kucERMkBUgd2GL4v16k6m+QGuC3BFlt/XeyuckJNW0V6v/Dy\n3+bSYM7/5o1ftPNWJeAIEWoE/V4wKCYde8RE4Vp1LO5YwhgcM4rRuPmF2OhekpA+\npteYwkY/8/gTTRpZIc8OTsBYRbaMwsjoDj5riuL3boVtkwZwKRb+ZLvupXeU7Ds7\n1305odTcZUwnImHiHfuq83ZJViQiLRNhUAFnQIXPrYLwEpCmzRBGzYHaRlb69ga/\nzqUbKnclAgMBAAECggEAH6W9qHehubioPMAJM7Y6bC2KU/JLNS4csBZd+idb52gG\nwBwIEFjR+H4ZjymhAA4+pe7c4h7MKyh0RI/l7eoFX98Cb+rEq/r1udm1BhGH3s2h\n2UiI8qRQh1YRjF2/nrN5VjhDBOFa6W9opaopZy/l8AzsT8f21zIgPen8z8o6GpFg\n64fJFcbqCGk2ykN2+x2pIOT04tmCszrfbXZP8LEs4xrUB/XwlHL1vT/M3EWIKbnj\njDaIMjw7q/KRgNUvmKS6SU9b3fnOLcQCz9f5cKdiWACKIU/UvuiWhWJ9ou6BWLWU\nva1A6Fi4XJjhW7s3po58/ioQfl0A9p/L92lGg4ST8QKBgQDx8LIM1g0dh9Ql6LmH\nBUGCOewNNXTs+y3ZznUfvVMoyyZK5w/pzeUvkmOwzbRGnZJ9WyCghq8aezyEpo2D\nPL7Odf988IeHmvhyZIM4PLJYgDvSwGXyf/gh6gJkf/4wpx+tx/yQYNBm3Rht7sA0\npSaLehK0E0kW1uyBzHGKgyQOhwKBgQDf6LiZ7hSQqh54vIU1XMDRth0UOo/s/HGi\nDoij29KjmHjLkm8vOlCo83e79X0WhcnyB5kM7nWFegwcM1PJ0Dl8gidUuTlOVDtM\n5u2AaxDoyXAUL457U5dGFAIW+R653ZDkzMfCglacP8HixXEyIpL1cTLqiCAgzszS\nLcSWwoAr8wKBgQC4CGm3X97sFpTmHSd6sCHLaDnJNl9xoAKZifUHpqCqCBVhpm8x\nXp+11vmj1GULzfJPDlE8Khbp4tH+6R39tOhC7fjgVaoSGWxgv1odHfZfYXOf9R/X\nHUZmrbUSM1XsNkPfkZ7pR+teQ1HA1Xo40WMHd1zgw0a2a9fNR/EZ9nUn4wKBgGaK\nUEgGNRrPHadTRnnaoV8o1IZYD2OLdIqvtzm7SOqsv90SkaKCRUAqR5InaYKwAHy7\nqAa5Cc73xqX/h4arujff7x0ouiq5/nJIa0ndPmAtKAvGf6zQ6j0ompBkxAKAioON\nmInmYL2roSI2I5G/LagDkDrB3lzH+Brk5NvZ9RKrAoGAGox462GGGb/NbGdDkahN\ndifzYYvq4FPiWFFo0ynKAulxCBWLXO/N45XNuAyen433d8eREcAYz1Dzax44+MdQ\nHo9dU7YcZvFyt6iZsYeQF8dluHui3vzMpUe0KbqpZC5KMOSw53ZdNIwzo8NTAK59\n+uv3dHGj7sS8fhDo3yCifzc=\n-----END PRIVATE KEY-----\n",
"client_email": "poweron-voice-services@poweronid.iam.gserviceaccount.com",
"client_id": "116641749406798186404",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/poweron-voice-services%40poweronid.iam.gserviceaccount.com",
"universe_domain": "googleapis.com"
}

View file

@ -9,7 +9,7 @@ from pathlib import Path
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
import uuid
from .documentUtility import (
from modules.chat.documents.documentUtility import (
getFileExtension,
getMimeTypeFromExtension,
detectMimeTypeFromContent,

View file

@ -3,7 +3,7 @@ from typing import Any, Dict, List, Optional
from datetime import datetime, UTC
import re
from modules.shared.timezoneUtils import get_utc_timestamp
from .documentUtility import (
from modules.chat.documents.documentUtility import (
getFileExtension,
getMimeTypeFromExtension,
detectMimeTypeFromContent,

View file

@ -10,9 +10,10 @@ from datetime import datetime, UTC
from modules.interfaces.interfaceChatModel import (
TaskStatus, TaskStep, TaskContext, TaskAction, ReviewResult, TaskPlan, WorkflowResult, TaskResult, ReviewContext, ActionResult
)
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
from modules.shared.timezoneUtils import get_utc_timestamp
from .executionState import TaskExecutionState
from .promptFactory import (
from modules.chat.handling.executionState import TaskExecutionState
from modules.chat.handling.promptFactory import (
createTaskPlanningPrompt,
createActionDefinitionPrompt,
createResultReviewPrompt
@ -27,11 +28,13 @@ class WorkflowStoppedException(Exception):
pass
class HandlingTasks:
def __init__(self, chatInterface, service, workflow=None):
def __init__(self, chatInterface, currentUser, workflow=None):
self.chatInterface = chatInterface
self.service = service
self.currentUser = currentUser
self.workflow = workflow
self.documentGenerator = DocumentGenerator(service)
from modules.chat.serviceCenter import ServiceCenter
self.service = ServiceCenter(currentUser, workflow)
self.documentGenerator = DocumentGenerator(self.service)
def _checkWorkflowStopped(self):
"""
@ -63,7 +66,6 @@ class HandlingTasks:
logger.info(f"=== STARTING TASK PLAN GENERATION ===")
logger.info(f"Workflow ID: {workflow.id}")
logger.info(f"User Input: {userInput}")
available_docs = self.service.getAvailableDocuments(workflow)
# Check workflow status before calling AI service
self._checkWorkflowStopped()
@ -83,8 +85,8 @@ class HandlingTasks:
task_step=planning_task_step,
workflow=workflow,
workflow_id=workflow.id,
available_documents=available_docs,
available_connections=[],
available_documents=None,
available_connections=None,
previous_results=[],
previous_handover=None,
improvements=[],
@ -105,10 +107,10 @@ class HandlingTasks:
# Generate the task planning prompt
task_planning_prompt = createTaskPlanningPrompt(task_planning_context, self.service)
# Log the full task planning prompt being sent to AI for debugging
# Log task planning prompt sent to AI
logger.info("=== TASK PLANNING PROMPT SENT TO AI ===")
logger.info(f"User Input: {userInput}")
logger.info(f"Available Documents: {available_docs}")
# Trace task planning prompt
self.service.writeTraceLog("Task Plan Prompt", task_planning_prompt)
prompt = await self.service.callAiTextAdvanced(task_planning_prompt)
@ -116,12 +118,11 @@ class HandlingTasks:
if not prompt:
raise ValueError("AI service returned no response for task planning")
# Log the full AI response for task planning
# Log task planning response received
logger.info("=== TASK PLANNING AI RESPONSE RECEIVED ===")
logger.info(f"Response length: {len(prompt) if prompt else 0}")
logger.debug("=== FULL TASK PLANNING AI RESPONSE ===")
logger.debug(prompt)
logger.debug("=== END TASK PLANNING AI RESPONSE ===")
# Trace task planning response
self.service.writeTraceLog("Task Plan Response", prompt)
# Inline _parseTaskPlanResponse logic
try:
@ -297,27 +298,15 @@ class HandlingTasks:
if enhanced_context and enhanced_context.retry_count > 0:
logger.info("=== RETRY CONTEXT FOR ACTION GENERATION ===")
logger.info(f"Retry Count: {enhanced_context.retry_count}")
logger.info(f"Previous Improvements: {enhanced_context.improvements}")
logger.info(f"Previous Review Result: {enhanced_context.previous_review_result}")
logger.info(f"Failure Patterns: {enhanced_context.failure_patterns}")
logger.info(f"Failed Actions: {enhanced_context.failed_actions}")
logger.info(f"Successful Actions: {enhanced_context.successful_actions}")
logger.debug(f"Previous Improvements: {enhanced_context.improvements}")
logger.debug(f"Previous Review Result: {enhanced_context.previous_review_result}")
logger.debug(f"Failure Patterns: {enhanced_context.failure_patterns}")
logger.debug(f"Failed Actions: {enhanced_context.failed_actions}")
logger.debug(f"Successful Actions: {enhanced_context.successful_actions}")
logger.info("=== END RETRY CONTEXT ===")
available_docs = self.service.getAvailableDocuments(workflow)
available_connections = self.service.getConnectionReferenceList()
# Log available resources for debugging
logger.info("=== AVAILABLE RESOURCES FOR ACTION GENERATION ===")
logger.info(f"Available Documents: {available_docs}")
# Note: available_docs is now a string description, not a list
logger.info(f"Available Connections: {len(available_connections) if available_connections else 0}")
if available_connections:
for i, conn in enumerate(available_connections[:5]): # Show first 5
logger.info(f" Conn {i+1}: {conn}")
if len(available_connections) > 5:
logger.info(f" ... and {len(available_connections) - 5} more connections")
logger.info("=== END AVAILABLE RESOURCES ===")
# Log that we're starting action generation
logger.info("=== STARTING ACTION GENERATION ===")
# Create proper context object for action definition
if enhanced_context and isinstance(enhanced_context, TaskContext):
@ -326,8 +315,8 @@ class HandlingTasks:
task_step=enhanced_context.task_step,
workflow=enhanced_context.workflow,
workflow_id=enhanced_context.workflow_id,
available_documents=enhanced_context.available_documents or available_docs,
available_connections=enhanced_context.available_connections or available_connections,
available_documents=enhanced_context.available_documents,
available_connections=enhanced_context.available_connections,
previous_results=enhanced_context.previous_results or previous_results or [],
previous_handover=enhanced_context.previous_handover,
improvements=enhanced_context.improvements or [],
@ -346,8 +335,8 @@ class HandlingTasks:
task_step=task_step,
workflow=workflow,
workflow_id=workflow.id,
available_documents=available_docs,
available_connections=available_connections,
available_documents=None,
available_connections=None,
previous_results=previous_results or [],
previous_handover=None,
improvements=[],
@ -364,30 +353,22 @@ class HandlingTasks:
# Check workflow status before calling AI service
self._checkWorkflowStopped()
# Log the final action context being sent to AI
logger.info("=== FINAL ACTION CONTEXT FOR AI ===")
logger.info(f"Task Step ID: {action_context.task_step.id if action_context.task_step else 'None'}")
logger.info(f"Task Step Objective: {action_context.task_step.objective if action_context.task_step else 'None'}")
logger.info(f"Workflow ID: {action_context.workflow_id}")
logger.info(f"Available Documents: {action_context.available_documents or 'No documents available'}")
logger.info(f"Available Connections Count: {len(action_context.available_connections) if action_context.available_connections else 0}")
logger.info(f"Previous Results Count: {len(action_context.previous_results) if action_context.previous_results else 0}")
logger.info(f"Retry Count: {action_context.retry_count}")
logger.info(f"Is Regeneration: {action_context.is_regeneration}")
logger.info("=== END ACTION CONTEXT ===")
# Generate the action definition prompt
action_prompt = await createActionDefinitionPrompt(action_context, self.service)
# Trace action planning prompt
self.service.writeTraceLog("Action Plan Prompt", action_prompt)
prompt = await self.service.callAiTextAdvanced(action_prompt)
# Check if AI response is valid
if not prompt:
raise ValueError("AI service returned no response")
# Log the full AI response for debugging
logger.debug("=== FULL AI RESPONSE ===")
logger.debug(prompt)
logger.debug("=== END AI RESPONSE ===")
# Log action response received
logger.info("=== ACTION PLAN AI RESPONSE RECEIVED ===")
logger.info(f"Response length: {len(prompt) if prompt else 0}")
# Trace action planning response
self.service.writeTraceLog("Action Plan Response", prompt)
# Inline parseActionResponse logic here
json_start = prompt.find('{')
@ -875,23 +856,21 @@ class HandlingTasks:
# Use promptFactory for review prompt
prompt = createResultReviewPrompt(review_context, self.service)
# Log the full result review prompt being sent to AI for debugging
# Log result review prompt sent to AI
logger.info("=== RESULT REVIEW PROMPT SENT TO AI ===")
logger.info(f"Task: {task_step.objective}")
logger.info(f"Action Results Count: {len(review_context.action_results) if review_context.action_results else 0}")
logger.info(f"Task Actions Count: {len(review_context.task_actions) if review_context.task_actions else 0}")
logger.info("=== FULL RESULT REVIEW PROMPT ===")
logger.info(prompt)
logger.info("=== END RESULT REVIEW PROMPT ===")
# Trace result review prompt
self.service.writeTraceLog("Result Review Prompt", prompt)
response = await self.service.callAiTextAdvanced(prompt)
# Log the full AI response for result review
# Log result review response received
logger.info("=== RESULT REVIEW AI RESPONSE RECEIVED ===")
logger.info(f"Response length: {len(response) if response else 0}")
logger.debug("=== FULL RESULT REVIEW AI RESPONSE ===")
logger.debug(response)
logger.debug("=== END RESULT REVIEW AI RESPONSE ===")
# Trace result review response
self.service.writeTraceLog("Result Review Response", response)
# Inline parseReviewResponse logic here
json_start = response.find('{')
@ -1095,6 +1074,17 @@ class HandlingTasks:
)
result_label = action.execResultLabel
# Trace action result (without document data)
action_result_trace = {
"method": action.execMethod,
"action": action.execAction,
"success": result.success,
"error": result.error,
"resultLabel": result_label,
"documentsCount": len(result.documents) if result.documents else 0
}
self.service.writeTraceLog("Action Result", action_result_trace)
# Process documents from the action result
created_documents = []
if result.success:

View file

@ -3,14 +3,68 @@
import json
import logging
from typing import Any, Dict
from typing import Any, Dict, List
from modules.interfaces.interfaceChatModel import TaskContext, ReviewContext
from modules.chat.documents.documentUtility import getFileExtension
# Set up logger
logger = logging.getLogger(__name__)
# Prompt creation helpers extracted from managerChat.py
def _getAvailableDocuments(workflow) -> str:
"""
Get simple description of available documents for task planning.
Args:
workflow: ChatWorkflow object
Returns:
str: Simple description of document availability
"""
total_documents = 0
document_types = set()
for message in workflow.messages:
if message.documents:
total_documents += len(message.documents)
for doc in message.documents:
try:
file_extension = getFileExtension(doc.fileName)
if file_extension:
document_types.add(file_extension.upper())
except:
pass
if total_documents == 0:
return "No documents available"
elif len(document_types) == 0:
return f"{total_documents} document(s) available"
else:
types_str = ", ".join(sorted(document_types))
return f"{total_documents} document(s) available ({types_str} files)"
def _getConnectionReferenceList(service) -> List[str]:
"""Get list of all UserConnection objects as references with enhanced state information"""
connections = []
# Get user connections through AppObjects interface
user_connections = service.interfaceApp.getUserConnections(service.user.id)
refreshed_count = 0
for conn in user_connections:
# Get enhanced connection reference with state information
enhanced_ref = service.getConnectionReferenceFromUserConnection(conn)
connections.append(enhanced_ref)
# Count refreshed tokens
if "refreshed" in enhanced_ref:
refreshed_count += 1
# Sort by connection reference
if refreshed_count > 0:
logger.info(f"Refreshed {refreshed_count} connection tokens while building action planning prompt")
return sorted(connections)
def _getPreviousRoundContext(service, workflow) -> str:
"""Get context from previous workflow rounds to help understand follow-up prompts"""
try:
@ -98,8 +152,8 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str:
# Extract user request from context - use Pydantic model directly
user_request = context.task_step.objective if context.task_step else 'No request specified'
# Extract available documents from context - use Pydantic model directly
available_documents = context.available_documents or "No documents available"
# Get available documents using generic function
available_documents = _getAvailableDocuments(context.workflow) if context.workflow else "No documents available"
# Get previous workflow round context for better understanding of follow-up prompts
previous_round_context = _getPreviousRoundContext(service, context.workflow)
@ -226,7 +280,9 @@ async def createActionDefinitionPrompt(context: TaskContext, service) -> str:
# Get enhanced document context using the new method
available_documents_str = service.getEnhancedDocumentContext()
connRefs = service.getConnectionReferenceList()
# Get available documents and connections using generic functions
available_docs_summary = _getAvailableDocuments(context.workflow)
connRefs = _getConnectionReferenceList(service)
# Create a structured JSON format for better AI parsing
# This replaces the old hard-to-read format with a clean JSON structure
@ -603,7 +659,8 @@ IMPORTANT NOTES:
- Always include a user-friendly userMessage for each action in the user's language ({user_language}).
- The examples above show German user messages as reference - adapt the language to match the USER LANGUAGE specified above."""
logging.debug(f"[ACTION PLAN PROMPT] Enhanced Document Context:\n{available_documents_str}\nUser Connections Section:\n{available_connections_str}\nAvailable Methods (detailed):\n{available_methods_str}")
# Removed sensitive data from debug logging
logging.debug(f"[ACTION PLAN PROMPT] Document context and methods prepared")
return prompt

View file

@ -2,9 +2,8 @@ import logging
from typing import Dict, Any, List
from modules.interfaces.interfaceAppModel import User
from modules.interfaces.interfaceChatModel import ChatWorkflow, UserInputRequest, TaskStep, TaskAction, ActionResult, ReviewResult, TaskPlan, WorkflowResult, TaskContext
from modules.chat.serviceCenter import ServiceCenter
from modules.interfaces.interfaceChatObjects import ChatObjects
from .handling.handlingTasks import HandlingTasks, WorkflowStoppedException
from modules.chat.handling.handlingTasks import HandlingTasks, WorkflowStoppedException
logger = logging.getLogger(__name__)
@ -16,21 +15,19 @@ class ChatManager:
def __init__(self, currentUser: User, chatInterface: ChatObjects):
self.currentUser = currentUser
self.chatInterface = chatInterface
self.service: ServiceCenter = None
self.workflow: ChatWorkflow = None
self.handlingTasks: HandlingTasks = None
async def initialize(self, workflow: ChatWorkflow) -> None:
"""Initialize chat manager with workflow"""
self.workflow = workflow
self.service = ServiceCenter(self.currentUser, self.workflow)
self.handlingTasks = HandlingTasks(self.chatInterface, self.service, self.workflow)
self.handlingTasks = HandlingTasks(self.chatInterface, self.currentUser, self.workflow)
async def executeUnifiedWorkflow(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> WorkflowResult:
"""Unified Workflow Execution"""
try:
logger.info(f"Starting unified workflow execution for workflow {workflow.id}")
logger.debug(f"User request: {userInput.prompt}")
# Phase 1: High-Level Task Planning
logger.info("Phase 1: Generating task plan")
@ -54,8 +51,8 @@ class ChatManager:
task_step=task_step,
workflow=workflow,
workflow_id=workflow.id,
available_documents=self.service.getAvailableDocuments(workflow),
available_connections=self.service.getConnectionReferenceList(),
available_documents=None,
available_connections=None,
previous_results=previous_results,
previous_handover=None,
improvements=[],

View file

@ -15,7 +15,7 @@ from modules.interfaces.interfaceComponentObjects import getInterface as getComp
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
from modules.chat.documents.documentExtraction import DocumentExtraction
from modules.chat.documents.documentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
from modules.chat.methodBase import MethodBase
from modules.methods.methodBase import MethodBase
from modules.shared.timezoneUtils import get_utc_timestamp
import uuid
@ -586,37 +586,6 @@ Please provide a comprehensive summary of this conversation."""
# ===== Functions for Prompts + Actions: Connection References generation and resolution =====
def getConnectionReferenceList(self) -> List[str]:
"""Get list of all UserConnection objects as references with enhanced state information"""
connections = []
# Get user connections through AppObjects interface
logger.debug(f"getConnectionReferenceList: Service center user ID: {self.user.id}")
logger.debug(f"getConnectionReferenceList: Service center user type: {type(self.user)}")
logger.debug(f"getConnectionReferenceList: Service center user object: {self.user}")
user_connections = self.interfaceApp.getUserConnections(self.user.id)
logger.debug(f"getConnectionReferenceList: User ID: {self.user.id}")
logger.debug(f"getConnectionReferenceList: Raw user connections: {user_connections}")
logger.debug(f"getConnectionReferenceList: User connections type: {type(user_connections)}")
logger.debug(f"getConnectionReferenceList: User connections length: {len(user_connections) if user_connections else 0}")
refreshed_count = 0
for conn in user_connections:
# Get enhanced connection reference with state information
enhanced_ref = self.getConnectionReferenceFromUserConnection(conn)
logger.debug(f"getConnectionReferenceList: Enhanced ref for connection {conn.id}: {enhanced_ref}")
connections.append(enhanced_ref)
# Count refreshed tokens
if "refreshed" in enhanced_ref:
refreshed_count += 1
# Sort by connection reference
logger.debug(f"getConnectionReferenceList: Final connections list: {connections}")
if refreshed_count > 0:
logger.info(f"Refreshed {refreshed_count} connection tokens while building action planning prompt")
return sorted(connections)
def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str:
"""Get connection reference from UserConnection with enhanced state information"""
# Get token information to check if it's expired
@ -692,12 +661,12 @@ Please provide a comprehensive summary of this conversation."""
# Try advanced AI first, with retries
for attempt in range(max_retries):
try:
prompt_size = self.calculateObjectSize(prompt)
prompt_size = self._calculateObjectSize(prompt)
if context:
prompt_size += self.calculateObjectSize(context)
prompt_size += self._calculateObjectSize(context)
response = await self.interfaceAiCalls.callAiTextAdvanced(prompt, context)
response_size = self.calculateObjectSize(response)
self.updateWorkflowStats(eventLabel="aicall.anthropic.text", bytesSent=prompt_size, bytesReceived=response_size)
response_size = self._calculateObjectSize(response)
self._updateWorkflowStats(eventLabel="aicall.anthropic.text", bytesSent=prompt_size, bytesReceived=response_size)
return response
except Exception as e:
last_error = e
@ -726,12 +695,12 @@ Please provide a comprehensive summary of this conversation."""
last_error = None
for attempt in range(max_retries):
try:
prompt_size = self.calculateObjectSize(prompt)
prompt_size = self._calculateObjectSize(prompt)
if context:
prompt_size += self.calculateObjectSize(context)
prompt_size += self._calculateObjectSize(context)
response = await self.interfaceAiCalls.callAiTextBasic(prompt, context)
response_size = self.calculateObjectSize(response)
self.updateWorkflowStats(eventLabel="aicall.openai.text", bytesSent=prompt_size, bytesReceived=response_size)
response_size = self._calculateObjectSize(response)
self._updateWorkflowStats(eventLabel="aicall.openai.text", bytesSent=prompt_size, bytesReceived=response_size)
return response
except Exception as e:
last_error = e
@ -745,34 +714,34 @@ Please provide a comprehensive summary of this conversation."""
async def callAiImageBasic(self, prompt: str, imageData: str, mimeType: str) -> str:
"""Basic image processing using OpenAI"""
# Calculate prompt size for stats
prompt_size = self.calculateObjectSize(prompt)
prompt_size += self.calculateObjectSize(imageData)
prompt_size = self._calculateObjectSize(prompt)
prompt_size += self._calculateObjectSize(imageData)
# Call AI
response = await self.interfaceAiCalls.callAiImageBasic(prompt, imageData, mimeType)
# Calculate response size for stats
response_size = self.calculateObjectSize(response)
response_size = self._calculateObjectSize(response)
# Update stats
self.updateWorkflowStats(eventLabel="aicall.openai.image", bytesSent=prompt_size, bytesReceived=response_size)
self._updateWorkflowStats(eventLabel="aicall.openai.image", bytesSent=prompt_size, bytesReceived=response_size)
return response
async def callAiImageAdvanced(self, prompt: str, imageData: str, mimeType: str) -> str:
"""Advanced image processing using Anthropic"""
# Calculate prompt size for stats
prompt_size = self.calculateObjectSize(prompt)
prompt_size += self.calculateObjectSize(imageData)
prompt_size = self._calculateObjectSize(prompt)
prompt_size += self._calculateObjectSize(imageData)
# Call AI
response = await self.interfaceAiCalls.callAiImageAdvanced(prompt, imageData, mimeType)
# Calculate response size for stats
response_size = self.calculateObjectSize(response)
response_size = self._calculateObjectSize(response)
# Update stats
self.updateWorkflowStats(eventLabel="aicall.anthropic.image", bytesSent=prompt_size, bytesReceived=response_size)
self._updateWorkflowStats(eventLabel="aicall.anthropic.image", bytesSent=prompt_size, bytesReceived=response_size)
return response
@ -957,9 +926,9 @@ Please provide a comprehensive summary of this conversation."""
return document
# ===== Internal public helper functions =====
# ===== Internal helper functions =====
def updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None:
def _updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None:
"""
Centralized function to update workflow statistics in database and running workflow.
@ -983,7 +952,7 @@ Please provide a comprehensive summary of this conversation."""
except Exception as e:
logger.error(f"Error updating workflow stats: {str(e)}")
def calculateObjectSize(self, obj: Any) -> int:
def _calculateObjectSize(self, obj: Any) -> int:
"""
Calculate the size of an object in bytes.
@ -1008,38 +977,6 @@ Please provide a comprehensive summary of this conversation."""
logger.error(f"Error calculating object size: {str(e)}")
return 0
def getAvailableDocuments(self, workflow) -> str:
"""
Get simple description of available documents for task planning.
Args:
workflow: ChatWorkflow object
Returns:
str: Simple description of document availability
"""
total_documents = 0
document_types = set()
for message in workflow.messages:
if message.documents:
total_documents += len(message.documents)
for doc in message.documents:
try:
file_extension = getFileExtension(doc.fileName)
if file_extension:
document_types.add(file_extension.upper())
except:
pass
if total_documents == 0:
return "No documents available"
elif len(document_types) == 0:
return f"{total_documents} document(s) available"
else:
types_str = ", ".join(sorted(document_types))
return f"{total_documents} document(s) available ({types_str} files)"
# ===== Functions for Manager: Execution Tools =====
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
@ -1090,6 +1027,85 @@ Please provide a comprehensive summary of this conversation."""
"""Set user language for the service center"""
self.user.language = language
def writeTraceLog(self, contextText: str, data: Any) -> None:
"""Write trace data to configured trace file if in debug mode"""
try:
import logging
import os
from datetime import datetime, UTC
from modules.shared.configuration import APP_CONFIG
# Only write if logger is in debug mode
if logger.level > logging.DEBUG:
return
# Get log directory from configuration
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
if not os.path.isabs(logDir):
# If relative path, make it relative to the gateway directory
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
logDir = os.path.join(gatewayDir, logDir)
# Ensure log directory exists
os.makedirs(logDir, exist_ok=True)
# Create trace file path
trace_file = os.path.join(logDir, "log_trace.log")
# Format the trace entry
timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
trace_entry = f"[{timestamp}] {contextText}\n"
# Add data if provided
if data is not None:
if isinstance(data, (dict, list)):
import json
trace_entry += f"Data: {json.dumps(data, indent=2, default=str)}\n"
else:
trace_entry += f"Data: {str(data)}\n"
trace_entry += "-" * 80 + "\n\n"
# Write to trace file
with open(trace_file, "a", encoding="utf-8") as f:
f.write(trace_entry)
except Exception as e:
# Don't log trace errors to avoid recursion
pass
def clearTraceLog(self) -> None:
"""Clear the trace log file"""
try:
import logging
import os
from modules.shared.configuration import APP_CONFIG
# Get log directory from configuration
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
if not os.path.isabs(logDir):
# If relative path, make it relative to the gateway directory
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
logDir = os.path.join(gatewayDir, logDir)
# Create trace file path
trace_file = os.path.join(logDir, "log_trace.log")
# Only clear if logger is in debug mode
if logger.level > logging.DEBUG:
# Delete file if not in debug mode
if os.path.exists(trace_file):
os.remove(trace_file)
return
# Create empty file if in debug mode
with open(trace_file, "w", encoding="utf-8") as f:
f.write("")
except Exception as e:
# Don't log trace errors to avoid recursion
pass
# ===== Functions for Manager: Workflow Tools =====
def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):

View file

@ -29,10 +29,10 @@ class ConnectorGoogleSpeech:
"""
try:
# Get JSON key from config.ini
api_key = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY")
api_key = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")
if not api_key or api_key == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE":
raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY in config.ini with the full service account JSON key")
raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY_SECRET in config.ini with the full service account JSON key")
# Parse the JSON key and set up authentication
try:

View file

@ -69,9 +69,9 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
@classmethod
async def create(cls):
api_key = APP_CONFIG.get("Connector_WebTavily_API_KEY")
api_key = APP_CONFIG.get("Connector_WebTavily_API_KEY_SECRET")
if not api_key:
raise ValueError("Tavily API key not configured. Please set Connector_WebTavily_API_KEY in config.ini")
raise ValueError("Tavily API key not configured. Please set Connector_WebTavily_API_KEY_SECRET in config.ini")
return cls(client=AsyncTavilyClient(api_key=api_key))
async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult:

View file

@ -30,7 +30,7 @@ class WorkflowManager:
await self.chatManager.initialize(workflow)
# Set user language
self.chatManager.service.setUserLanguage(userInput.userLanguage)
self.chatManager.handlingTasks.service.setUserLanguage(userInput.userLanguage)
# Send first message
message = await self._sendFirstMessage(userInput, workflow)
@ -170,10 +170,13 @@ class WorkflowManager:
if message:
workflow.messages.append(message)
# Clear trace log for new workflow session
self.chatManager.handlingTasks.service.clearTraceLog()
# Add documents if any, now with messageId
if userInput.listFileId:
# Process file IDs and add to message data
documents = await self.chatManager.service.processFileIds(userInput.listFileId, message.id)
documents = await self.chatManager.handlingTasks.service.processFileIds(userInput.listFileId, message.id)
message.documents = documents
# Update the message with documents in database
self.chatInterface.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]})

View file

@ -1155,7 +1155,7 @@ class ChatObjects:
# Remove the 'Workflow started' log entry
# Start workflow processing
from modules.services.serviceValueonChat import WorkflowManager
from modules.features.featureChatPlayground import WorkflowManager
workflowManager = WorkflowManager(self, currentUser)
# Start the workflow processing asynchronously

View file

@ -7,7 +7,7 @@ import logging
from typing import Dict, Any, List, Optional
from datetime import datetime, UTC
from modules.chat.methodBase import MethodBase, action
from modules.methods.methodBase import MethodBase, action
from modules.interfaces.interfaceChatModel import ActionResult
from modules.shared.timezoneUtils import get_utc_timestamp

View file

@ -9,7 +9,7 @@ import re
from typing import Dict, Any, List, Optional
from datetime import datetime, UTC
from modules.chat.methodBase import MethodBase, action
from modules.methods.methodBase import MethodBase, action
from modules.interfaces.interfaceChatModel import ActionResult
from modules.shared.timezoneUtils import get_utc_timestamp

View file

@ -81,7 +81,7 @@ from datetime import datetime, UTC
import json
import uuid
from modules.chat.methodBase import MethodBase, action
from modules.methods.methodBase import MethodBase, action
from modules.interfaces.interfaceChatModel import ActionResult
from modules.interfaces.interfaceAppModel import ConnectionStatus
from modules.shared.timezoneUtils import get_utc_timestamp

View file

@ -13,7 +13,7 @@ from urllib.parse import urlparse
import aiohttp
import asyncio
from modules.chat.methodBase import MethodBase, action
from modules.methods.methodBase import MethodBase, action
from modules.interfaces.interfaceChatModel import ActionResult
from modules.shared.timezoneUtils import get_utc_timestamp

View file

@ -2,7 +2,7 @@ import logging
import csv
import io
from typing import Any, Dict
from modules.chat.methodBase import MethodBase, action
from modules.methods.methodBase import MethodBase, action
from modules.interfaces.interfaceChatModel import ActionResult, ActionDocument
from modules.interfaces.interfaceWebObjects import WebInterface
from modules.interfaces.interfaceWebModel import (

View file

@ -4,481 +4,109 @@ Unterstützt TXT, JSON, CSV, Excel und Word-Dateien
Mehrsprachig: DE, EN, FR, IT
"""
import re
import json
import pandas as pd
import docx
from pathlib import Path
from typing import Dict, List, Tuple, Any, Union, Optional
from dataclasses import dataclass
import uuid
import logging
import traceback
import csv
from datetime import datetime
import xml.etree.ElementTree as ET
import os
import random
from io import StringIO
from modules.neutralizer.patterns import Pattern, HeaderPatterns, DataPatterns, get_pattern_for_header, find_patterns_in_text, TextTablePatterns
import base64
from typing import Dict, List, Any
# Import all necessary classes and functions
from modules.neutralizer.subProcessCommon import ProcessResult, CommonUtils
from modules.neutralizer.subProcessText import TextProcessor, PlainText
from modules.neutralizer.subProcessList import ListProcessor, TableData
from modules.neutralizer.subProcessBinary import BinaryProcessor, BinaryData
from modules.neutralizer.subParseString import StringParser
from modules.neutralizer.subPatterns import Pattern, HeaderPatterns, DataPatterns, TextTablePatterns
# Configure logging
logger = logging.getLogger(__name__)
@dataclass
class TableData:
"""Repräsentiert Tabellendaten"""
headers: List[str]
rows: List[List[str]]
source_type: str # 'csv', 'json', 'xml', 'text_table'
@dataclass
class PlainText:
"""Repräsentiert normalen Text"""
content: str
source_type: str # 'txt', 'docx', 'text_plain'
@dataclass
class ProcessResult:
"""Result of content processing"""
data: Any
mapping: Dict[str, str]
replaced_fields: List[str]
processed_info: Dict[str, Any] # Additional processing information
# Export all classes and functions for external use
__all__ = [
'DataAnonymizer',
'ProcessResult',
'CommonUtils',
'TextProcessor',
'PlainText',
'ListProcessor',
'TableData',
'BinaryProcessor',
'BinaryData',
'StringParser',
'Pattern',
'HeaderPatterns',
'DataPatterns',
'TextTablePatterns'
]
class DataAnonymizer:
"""Hauptklasse für die Datenanonymisierung"""
def __init__(self, names_to_parse: List[str] = None):
"""Initialize the anonymizer with patterns and custom names
"""Initialize the anonymizer with specialized processors
Args:
names_to_parse: List of names to parse and replace (case-insensitive)
"""
self.header_patterns = HeaderPatterns.patterns
self.data_patterns = DataPatterns.patterns
self.names_to_parse = names_to_parse or []
self.replaced_fields = set()
self.mapping = {}
self.processing_info = []
def _normalize_whitespace(self, text: str) -> str:
"""Normalize whitespace in text"""
text = re.sub(r'\s+', ' ', text)
text = text.replace('\r\n', '\n').replace('\r', '\n')
return text.strip()
# Initialize specialized processors
self.text_processor = TextProcessor(names_to_parse)
self.list_processor = ListProcessor(names_to_parse)
self.binary_processor = BinaryProcessor()
# Common utilities
self.common_utils = CommonUtils()
def _is_table_line(self, line: str) -> bool:
"""Check if a line represents a table row"""
return bool(re.match(r'^\s*[^:]+:\s*[^:]+$', line) or
re.match(r'^\s*[^\t]+\t[^\t]+$', line))
def _extract_tables_from_text(self, content: str) -> Tuple[List[TableData], List[PlainText]]:
"""
Extract tables and plain text from content
Args:
content: Content to process
Returns:
Tuple of (list of tables, list of plain text sections)
"""
tables = []
plain_texts = []
# Process the entire content as plain text
plain_texts.append(PlainText(content=content, source_type='text_plain'))
return tables, plain_texts
def _anonymize_table(self, table: TableData) -> TableData:
"""Anonymize table data"""
try:
anonymized_table = TableData(
headers=table.headers.copy(),
rows=[row.copy() for row in table.rows],
source_type=table.source_type
)
for i, header in enumerate(anonymized_table.headers):
pattern = get_pattern_for_header(header, self.header_patterns)
if pattern:
for row in anonymized_table.rows:
if row[i] is not None:
original = str(row[i])
if original not in self.mapping:
self.mapping[original] = pattern.replacement_template.format(len(self.mapping) + 1)
row[i] = self.mapping[original]
return anonymized_table
except Exception as e:
logger.error(f"Error anonymizing table: {str(e)}")
raise
def _anonymize_plain_text(self, text: PlainText) -> PlainText:
"""Anonymize plain text content using simple search-and-replace approach"""
try:
current_text = text.content
# Step 1: Replace custom names first (simple regex search-and-replace)
for name in self.names_to_parse:
if not name.strip():
continue
# Create case-insensitive regex pattern with word boundaries
pattern = re.compile(r'\b' + re.escape(name.strip()) + r'\b', re.IGNORECASE)
# Find all matches for this name
matches = list(pattern.finditer(current_text))
# Replace each match with a placeholder
for match in reversed(matches): # Process from right to left to avoid position shifts
matched_text = match.group()
if matched_text not in self.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
self.mapping[matched_text] = f"[name.{placeholder_id}]"
replacement = self.mapping[matched_text]
start, end = match.span()
current_text = current_text[:start] + replacement + current_text[end:]
# Step 2: Replace pattern-based matches (emails, phones, etc.)
# Use the same simple approach for patterns
pattern_matches = find_patterns_in_text(current_text, self.data_patterns)
# Process pattern matches from right to left to avoid position shifts
for pattern_name, matched_text, start, end in reversed(pattern_matches):
# Skip if already a placeholder
if re.match(r'\[[a-z]+\.[a-f0-9-]+\]', matched_text):
continue
# Skip if contains placeholder characters
if '[' in matched_text or ']' in matched_text:
continue
if matched_text not in self.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
type_mapping = {
'email': 'email',
'phone': 'phone',
'address': 'address',
'id': 'id'
}
placeholder_type = type_mapping.get(pattern_name, 'data')
self.mapping[matched_text] = f"[{placeholder_type}.{placeholder_id}]"
replacement = self.mapping[matched_text]
current_text = current_text[:start] + replacement + current_text[end:]
return PlainText(content=current_text, source_type=text.source_type)
except Exception as e:
logger.error(f"Error anonymizing plain text: {str(e)}")
raise
def _anonymize_json_value(self, value: Any, key: str = None) -> Any:
"""
Recursively anonymize JSON values based on their keys and content
Args:
value: Value to anonymize
key: Key name (if part of a key-value pair)
Returns:
Anonymized value
"""
if isinstance(value, dict):
return {k: self._anonymize_json_value(v, k) for k, v in value.items()}
elif isinstance(value, list):
return [self._anonymize_json_value(item) for item in value]
elif isinstance(value, str):
# Check if this is a key we should process
if key:
pattern = get_pattern_for_header(key, self.header_patterns)
if pattern:
if value not in self.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
type_mapping = {
'email': 'email',
'phone': 'phone',
'name': 'name',
'address': 'address',
'id': 'id'
}
placeholder_type = type_mapping.get(pattern.name, 'data')
self.mapping[value] = f"[{placeholder_type}.{placeholder_id}]"
return self.mapping[value]
# Check if the value itself matches any patterns
pattern_matches = find_patterns_in_text(value, self.data_patterns)
custom_name_matches = self._find_custom_names(value)
if pattern_matches or custom_name_matches:
# Use the first match's pattern or custom name
if pattern_matches:
pattern_name = pattern_matches[0][0]
if value not in self.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
type_mapping = {
'email': 'email',
'phone': 'phone',
'name': 'name',
'address': 'address',
'id': 'id'
}
placeholder_type = type_mapping.get(pattern_name, 'data')
self.mapping[value] = f"[{placeholder_type}.{placeholder_id}]"
elif custom_name_matches:
if value not in self.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
self.mapping[value] = f"[name.{placeholder_id}]"
return self.mapping[value]
return value
else:
return value
def _anonymize_xml_element(self, element: ET.Element, indent: str = '') -> str:
"""
Recursively process XML element and return formatted string
Args:
element: XML element to process
indent: Current indentation level
Returns:
Formatted XML string
"""
# Process attributes
processed_attrs = {}
for attr_name, attr_value in element.attrib.items():
# Check if attribute name matches any header patterns
pattern = get_pattern_for_header(attr_name, self.header_patterns)
if pattern:
if attr_value not in self.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
type_mapping = {
'email': 'email',
'phone': 'phone',
'name': 'name',
'address': 'address',
'id': 'id'
}
placeholder_type = type_mapping.get(pattern.name, 'data')
self.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
processed_attrs[attr_name] = self.mapping[attr_value]
else:
# Check if attribute value matches any data patterns
matches = find_patterns_in_text(attr_value, self.data_patterns)
if matches:
pattern_name = matches[0][0]
pattern = next((p for p in self.data_patterns if p.name == pattern_name), None)
if pattern:
if attr_value not in self.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
type_mapping = {
'email': 'email',
'phone': 'phone',
'name': 'name',
'address': 'address',
'id': 'id'
}
placeholder_type = type_mapping.get(pattern_name, 'data')
self.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
processed_attrs[attr_name] = self.mapping[attr_value]
else:
processed_attrs[attr_name] = attr_value
else:
processed_attrs[attr_name] = attr_value
attrs = ' '.join(f'{k}="{v}"' for k, v in processed_attrs.items())
attrs = f' {attrs}' if attrs else ''
# Process text content
text = element.text.strip() if element.text and element.text.strip() else ''
if text:
# Check if text matches any patterns or custom names
pattern_matches = find_patterns_in_text(text, self.data_patterns)
custom_name_matches = self._find_custom_names(text)
if pattern_matches or custom_name_matches:
if pattern_matches:
pattern_name = pattern_matches[0][0]
pattern = next((p for p in self.data_patterns if p.name == pattern_name), None)
if pattern:
if text not in self.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
type_mapping = {
'email': 'email',
'phone': 'phone',
'name': 'name',
'address': 'address',
'id': 'id'
}
placeholder_type = type_mapping.get(pattern_name, 'data')
self.mapping[text] = f"[{placeholder_type}.{placeholder_id}]"
text = self.mapping[text]
elif custom_name_matches:
if text not in self.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
self.mapping[text] = f"[name.{placeholder_id}]"
text = self.mapping[text]
# Process child elements
children = []
for child in element:
child_str = self._anonymize_xml_element(child, indent + ' ')
children.append(child_str)
# Build element string
if not children and not text:
return f"{indent}<{element.tag}{attrs}/>"
elif not children:
return f"{indent}<{element.tag}{attrs}>{text}</{element.tag}>"
else:
result = [f"{indent}<{element.tag}{attrs}>"]
if text:
result.append(f"{indent} {text}")
result.extend(children)
result.append(f"{indent}</{element.tag}>")
return '\n'.join(result)
def process_content(self, content: str, content_type: str) -> ProcessResult:
def process_content(self, content: str, content_type: str = None) -> ProcessResult:
"""
Process content and return anonymized data
Args:
content: Content to process
content_type: Type of content ('csv', 'json', 'xml', 'text')
content_type: Type of content ('csv', 'json', 'xml', 'text', 'binary')
If None, will auto-detect
Returns:
ProcessResult: Contains anonymized data, mapping, replaced fields and processing info
"""
try:
# Auto-detect content type if not provided
if content_type is None:
content_type = self.common_utils.detect_content_type(content)
# Check if content is binary data
is_binary = False
try:
# First, check if content looks like base64 (contains only base64 characters)
if re.match(r'^[A-Za-z0-9+/]*={0,2}$', content.strip()):
# Try to decode base64 if it looks like base64
try:
decoded = base64.b64decode(content)
# If it's not valid text, consider it binary
decoded.decode('utf-8')
is_binary = True
except (base64.binascii.Error, UnicodeDecodeError):
is_binary = False
else:
is_binary = False
except Exception as e:
is_binary = False
if is_binary:
# TODO: Implement binary data neutralization
# This would require:
# 1. Detecting binary data types (images, audio, video, etc.)
# 2. Implementing specific neutralization for each type
# 3. Handling metadata and embedded content
# 4. Preserving binary integrity while removing sensitive data
return ProcessResult(content, self.mapping, [], {'type': 'binary', 'status': 'not_implemented'})
replaced_fields = []
processed_info = {}
if self.binary_processor.is_binary_content(content):
return self.binary_processor.process_binary_content(content)
# Route to appropriate processor based on content type
if content_type in ['csv', 'json', 'xml']:
# Handle as table
if content_type == 'csv':
df = pd.read_csv(StringIO(content), encoding='utf-8')
table = TableData(
headers=df.columns.tolist(),
rows=df.values.tolist(),
source_type='csv'
)
processed_info['type'] = 'table'
processed_info['headers'] = table.headers
processed_info['row_count'] = len(table.rows)
result, mapping, replaced_fields, processed_info = self.list_processor.process_csv_content(content)
elif content_type == 'json':
data = json.loads(content)
# Process JSON recursively
result = self._anonymize_json_value(data)
processed_info['type'] = 'json'
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
result, mapping, replaced_fields, processed_info = self.list_processor.process_json_content(content)
else: # xml
root = ET.fromstring(content)
# Process XML recursively with proper formatting
result = self._anonymize_xml_element(root)
processed_info['type'] = 'xml'
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
result, mapping, replaced_fields, processed_info = self.list_processor.process_xml_content(content)
if not table.rows:
return ProcessResult(None, self.mapping, [], processed_info)
anonymized_table = self._anonymize_table(table)
# Track replaced fields
for i, header in enumerate(anonymized_table.headers):
for orig_row, anon_row in zip(table.rows, anonymized_table.rows):
if anon_row[i] != orig_row[i]:
replaced_fields.append(header)
# Convert back to original format
if content_type == 'csv':
result = pd.DataFrame(anonymized_table.rows, columns=anonymized_table.headers)
elif content_type == 'json':
if len(anonymized_table.headers) == 1 and anonymized_table.headers[0] == 'value':
result = anonymized_table.rows[0][0]
else:
result = dict(zip(anonymized_table.headers, anonymized_table.rows[0]))
else: # xml
result = ET.tostring(root, encoding='unicode')
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
return ProcessResult(result, mapping, replaced_fields, processed_info)
else:
# Handle as text
# First, identify what needs to be replaced using table detection
tables, plain_texts = self._extract_tables_from_text(content)
processed_info['type'] = 'text'
processed_info['tables'] = [{'headers': t.headers, 'row_count': len(t.rows)} for t in tables]
# Process plain text sections
anonymized_texts = [self._anonymize_plain_text(text) for text in plain_texts]
# Combine all processed content
result = content
for i, (text, anonymized_text) in enumerate(zip(plain_texts, anonymized_texts)):
if text.content != anonymized_text.content:
result = result.replace(text.content, anonymized_text.content)
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
result, mapping, replaced_fields, processed_info = self.text_processor.process_text_content(content)
return ProcessResult(result, mapping, replaced_fields, processed_info)
except Exception as e:
logger.error(f"Error processing content: {str(e)}")
return ProcessResult(None, self.mapping, [], {'type': 'error', 'error': str(e)})
return ProcessResult(None, {}, [], {'type': 'error', 'error': str(e)})
def get_mapping(self) -> Dict[str, str]:
"""
Get the combined mapping from all processors
Returns:
Dict[str, str]: Combined mapping dictionary
"""
text_mapping = self.text_processor.get_mapping()
list_mapping = self.list_processor.get_mapping()
return self.common_utils.merge_mappings(text_mapping, list_mapping)
def clear_mapping(self):
"""Clear the mapping in all processors"""
self.text_processor.clear_mapping()
self.list_processor.clear_mapping()

View file

@ -0,0 +1,91 @@
# Neutralizer Module Structure
This module provides DSGVO-compliant data anonymization for AI agent systems. The code has been refactored into specialized sub-modules for better maintainability and code reuse.
## Module Overview
### Core Module
- **`neutralizer.py`** - Main DataAnonymizer class that orchestrates all processing
### Specialized Processors
- **`subProcessText.py`** - Handles plain text processing without header information
- **`subProcessList.py`** - Handles structured data with headers (CSV, JSON, XML)
- **`subProcessBinary.py`** - Handles binary data types (images, audio, video, etc.)
### Utility Modules
- **`subParseString.py`** - String parsing and replacement utilities for emails, phones, addresses, IDs and names
- **`subProcessCommon.py`** - Common utilities and data structures shared across modules
- **`patterns.py`** - Pattern definitions for data anonymization
## Key Features
### 1. Modular Architecture
- **Separation of Concerns**: Each module handles a specific type of data processing
- **Code Reuse**: Common functionality is centralized in utility modules
- **Maintainability**: Easier to modify and extend individual components
### 2. Processing Order
1. **Pattern-based matches** (emails, phones, addresses, etc.) are processed FIRST
2. **Custom names** from the user list are processed SECOND
3. **Already anonymized content** (placeholders) is skipped
### 3. Supported Data Types
- **Text**: Plain text documents, emails, etc.
- **Structured Data**: CSV, JSON, XML with headers
- **Binary Data**: Images, audio, video (framework ready, implementation pending)
### 4. Placeholder Protection
- Prevents re-anonymization of already processed content
- Uses format `[tag.uuid]` for placeholders
- Validates placeholder format before processing
## Usage Example
```python
from modules.neutralizer import DataAnonymizer
# Initialize with custom names
anonymizer = DataAnonymizer(names_to_parse=['John Doe', 'Jane Smith'])
# Process content (auto-detects type)
result = anonymizer.process_content(content, content_type='text')
# Or specify content type explicitly
result = anonymizer.process_content(content, content_type='csv')
# Get mapping of original values to placeholders
mapping = anonymizer.get_mapping()
```
## Module Dependencies
```
neutralizer.py
├── subProcessCommon.py (ProcessResult, CommonUtils)
├── subProcessText.py (TextProcessor)
├── subProcessList.py (ListProcessor)
├── subProcessBinary.py (BinaryProcessor)
└── patterns.py (Pattern definitions)
subProcessText.py
└── subParseString.py (StringParser)
subProcessList.py
├── subParseString.py (StringParser)
└── patterns.py (HeaderPatterns)
subProcessBinary.py
└── (standalone)
subParseString.py
└── patterns.py (DataPatterns)
```
## Benefits of New Structure
1. **Single Responsibility**: Each module has one clear purpose
2. **DRY Principle**: No code duplication across modules
3. **Testability**: Individual modules can be tested in isolation
4. **Extensibility**: Easy to add new data types or processing methods
5. **Maintainability**: Changes to one module don't affect others
6. **Performance**: Specialized processors are optimized for their data types

View file

@ -0,0 +1,162 @@
"""
String parsing and replacement utilities for data anonymization
Handles pattern matching and replacement for emails, phones, addresses, IDs and names
"""
import re
import uuid
from typing import Dict, List, Tuple, Any
from modules.neutralizer.subPatterns import DataPatterns, find_patterns_in_text
class StringParser:
"""Handles string parsing and replacement operations"""
def __init__(self, names_to_parse: List[str] = None):
"""
Initialize the string parser
Args:
names_to_parse: List of names to parse and replace (case-insensitive)
"""
self.data_patterns = DataPatterns.patterns
self.names_to_parse = names_to_parse or []
self.mapping = {}
def is_placeholder(self, text: str) -> bool:
"""
Check if text is already a placeholder in format [tag.uuid]
Args:
text: Text to check
Returns:
bool: True if text is a placeholder
"""
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', text))
def replace_pattern_matches(self, text: str) -> str:
"""
Replace pattern-based matches (emails, phones, etc.) in text
Args:
text: Text to process
Returns:
str: Text with pattern matches replaced
"""
pattern_matches = find_patterns_in_text(text, self.data_patterns)
# Process pattern matches from right to left to avoid position shifts
for pattern_name, matched_text, start, end in reversed(pattern_matches):
# Skip if already a placeholder
if self.is_placeholder(matched_text):
continue
# Skip if contains placeholder characters
if '[' in matched_text or ']' in matched_text:
continue
if matched_text not in self.mapping:
# Generate a UUID for the placeholder
placeholder_id = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
type_mapping = {
'email': 'email',
'phone': 'phone',
'address': 'address',
'id': 'id'
}
placeholder_type = type_mapping.get(pattern_name, 'data')
self.mapping[matched_text] = f"[{placeholder_type}.{placeholder_id}]"
replacement = self.mapping[matched_text]
text = text[:start] + replacement + text[end:]
return text
def replace_custom_names(self, text: str) -> str:
"""
Replace custom names from the user list in text
Args:
text: Text to process
Returns:
str: Text with custom names replaced
"""
for name in self.names_to_parse:
if not name.strip():
continue
# Create case-insensitive regex pattern with word boundaries
pattern = re.compile(r'\b' + re.escape(name.strip()) + r'\b', re.IGNORECASE)
# Find all matches for this name
matches = list(pattern.finditer(text))
# Replace each match with a placeholder
for match in reversed(matches): # Process from right to left to avoid position shifts
matched_text = match.group()
if matched_text not in self.mapping:
# Generate a UUID for the placeholder
placeholder_id = str(uuid.uuid4())
self.mapping[matched_text] = f"[name.{placeholder_id}]"
replacement = self.mapping[matched_text]
start, end = match.span()
text = text[:start] + replacement + text[end:]
return text
def process_string(self, text: str) -> str:
"""
Process a string by replacing patterns first, then custom names
Args:
text: Text to process
Returns:
str: Processed text with replacements
"""
if self.is_placeholder(text):
return text
# Step 1: Replace pattern-based matches FIRST
text = self.replace_pattern_matches(text)
# Step 2: Replace custom names SECOND
text = self.replace_custom_names(text)
return text
def process_json_value(self, value: Any) -> Any:
"""
Process a JSON value for anonymization
Args:
value: Value to process
Returns:
Any: Processed value
"""
if isinstance(value, str):
return self.process_string(value)
elif isinstance(value, dict):
return {k: self.process_json_value(v) for k, v in value.items()}
elif isinstance(value, list):
return [self.process_json_value(item) for item in value]
else:
return value
def get_mapping(self) -> Dict[str, str]:
"""
Get the current mapping of original values to placeholders
Returns:
Dict[str, str]: Mapping dictionary
"""
return self.mapping.copy()
def clear_mapping(self):
"""Clear the current mapping"""
self.mapping.clear()

View file

@ -0,0 +1,101 @@
"""
Binary data processing module for data anonymization
Handles binary data types (images, audio, video, etc.)
"""
import base64
import re
from typing import Dict, Any, Tuple
from dataclasses import dataclass
@dataclass
class BinaryData:
"""Repräsentiert Binärdaten"""
content: str
data_type: str # 'image', 'audio', 'video', 'document', 'unknown'
encoding: str # 'base64', 'hex', 'raw'
class BinaryProcessor:
"""Handles binary data processing for anonymization"""
def __init__(self):
"""Initialize the binary processor"""
self.supported_types = {
'image': ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'],
'audio': ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a'],
'video': ['.mp4', '.avi', '.mov', '.wmv', '.flv', '.mkv', '.webm'],
'document': ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx']
}
def detect_binary_type(self, content: str) -> str:
"""
Detect if content is binary data and determine type
Args:
content: Content to analyze
Returns:
str: Binary type or 'text' if not binary
"""
# Check if content looks like base64
if re.match(r'^[A-Za-z0-9+/]*={0,2}$', content.strip()):
try:
decoded = base64.b64decode(content)
# Try to decode as text
decoded.decode('utf-8')
return 'text' # It's base64 encoded text
except (base64.binascii.Error, UnicodeDecodeError):
# It's binary data
return 'binary'
# Check for binary patterns
if len(content) > 100 and '\x00' in content:
return 'binary'
return 'text'
def is_binary_content(self, content: str) -> bool:
"""
Check if content is binary data
Args:
content: Content to check
Returns:
bool: True if content is binary
"""
return self.detect_binary_type(content) == 'binary'
def process_binary_content(self, content: str) -> Tuple[Any, Dict[str, str], list, Dict[str, Any]]:
"""
Process binary content for anonymization
Args:
content: Binary content to process
Returns:
Tuple of (processed_data, mapping, replaced_fields, processed_info)
"""
# TODO: Implement binary data neutralization
# This would require:
# 1. Detecting binary data types (images, audio, video, etc.)
# 2. Implementing specific neutralization for each type
# 3. Handling metadata and embedded content
# 4. Preserving binary integrity while removing sensitive data
processed_info = {
'type': 'binary',
'status': 'not_implemented',
'message': 'Binary data neutralization not yet implemented'
}
return content, {}, [], processed_info
def get_supported_types(self) -> Dict[str, list]:
"""
Get list of supported binary file types
Returns:
Dict[str, list]: Dictionary of supported types and their extensions
"""
return self.supported_types.copy()

View file

@ -0,0 +1,143 @@
"""
Common processing utilities for data anonymization
Shared functions and data structures
"""
import re
from typing import Dict, List, Any, Union, Optional
from dataclasses import dataclass
@dataclass
class ProcessResult:
"""Result of content processing"""
data: Any
mapping: Dict[str, str]
replaced_fields: List[str]
processed_info: Dict[str, Any] # Additional processing information
class CommonUtils:
"""Common utility functions for data processing"""
@staticmethod
def normalize_whitespace(text: str) -> str:
"""
Normalize whitespace in text
Args:
text: Text to normalize
Returns:
str: Normalized text
"""
text = re.sub(r'\s+', ' ', text)
text = text.replace('\r\n', '\n').replace('\r', '\n')
return text.strip()
@staticmethod
def is_table_line(line: str) -> bool:
"""
Check if a line represents a table row
Args:
line: Line to check
Returns:
bool: True if line is a table row
"""
return bool(re.match(r'^\s*[^:]+:\s*[^:]+$', line) or
re.match(r'^\s*[^\t]+\t[^\t]+$', line))
@staticmethod
def detect_content_type(content: str) -> str:
"""
Detect the type of content based on its structure
Args:
content: Content to analyze
Returns:
str: Content type ('csv', 'json', 'xml', 'text', 'binary')
"""
content = content.strip()
# Check for JSON
if content.startswith('{') and content.endswith('}'):
return 'json'
if content.startswith('[') and content.endswith(']'):
return 'json'
# Check for XML
if content.startswith('<') and content.endswith('>'):
return 'xml'
# Check for CSV (has commas and newlines)
if ',' in content and '\n' in content:
lines = content.split('\n')
if len(lines) > 1 and all(',' in line for line in lines[:3]):
return 'csv'
# Check for binary
if len(content) > 100 and '\x00' in content:
return 'binary'
# Default to text
return 'text'
@staticmethod
def merge_mappings(*mappings: Dict[str, str]) -> Dict[str, str]:
"""
Merge multiple mapping dictionaries
Args:
*mappings: Mapping dictionaries to merge
Returns:
Dict[str, str]: Merged mapping dictionary
"""
merged = {}
for mapping in mappings:
merged.update(mapping)
return merged
@staticmethod
def create_placeholder(placeholder_type: str, placeholder_id: str) -> str:
"""
Create a placeholder string in the format [type.uuid]
Args:
placeholder_type: Type of placeholder (email, phone, name, etc.)
placeholder_id: Unique identifier for the placeholder
Returns:
str: Formatted placeholder string
"""
return f"[{placeholder_type}.{placeholder_id}]"
@staticmethod
def validate_placeholder(placeholder: str) -> bool:
"""
Validate if a string is a valid placeholder
Args:
placeholder: String to validate
Returns:
bool: True if valid placeholder
"""
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', placeholder))
@staticmethod
def extract_placeholder_info(placeholder: str) -> Optional[tuple]:
"""
Extract type and ID from a placeholder
Args:
placeholder: Placeholder string
Returns:
Optional[tuple]: (type, id) or None if invalid
"""
match = re.match(r'^\[([a-z]+)\.([a-f0-9-]+)\]$', placeholder)
if match:
return match.group(1), match.group(2)
return None

View file

@ -0,0 +1,279 @@
"""
List processing module for data anonymization
Handles structured data with headers (CSV, JSON, XML)
"""
import json
import pandas as pd
import xml.etree.ElementTree as ET
from typing import Dict, List, Any, Union
from dataclasses import dataclass
from io import StringIO
from modules.neutralizer.subParseString import StringParser
from modules.neutralizer.subPatterns import get_pattern_for_header, HeaderPatterns
@dataclass
class TableData:
"""Repräsentiert Tabellendaten"""
headers: List[str]
rows: List[List[str]]
source_type: str # 'csv', 'json', 'xml', 'text_table'
class ListProcessor:
"""Handles structured data processing with headers for anonymization"""
def __init__(self, names_to_parse: List[str] = None):
"""
Initialize the list processor
Args:
names_to_parse: List of names to parse and replace
"""
self.string_parser = StringParser(names_to_parse)
self.header_patterns = HeaderPatterns.patterns
def anonymize_table(self, table: TableData) -> TableData:
"""
Anonymize table data based on headers
Args:
table: TableData object to anonymize
Returns:
TableData: Anonymized table
"""
anonymized_table = TableData(
headers=table.headers.copy(),
rows=[row.copy() for row in table.rows],
source_type=table.source_type
)
for i, header in enumerate(anonymized_table.headers):
pattern = get_pattern_for_header(header, self.header_patterns)
if pattern:
for row in anonymized_table.rows:
if row[i] is not None:
original = str(row[i])
if original not in self.string_parser.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
self.string_parser.mapping[original] = pattern.replacement_template.format(len(self.string_parser.mapping) + 1)
row[i] = self.string_parser.mapping[original]
return anonymized_table
def process_csv_content(self, content: str) -> tuple:
"""
Process CSV content
Args:
content: CSV content to process
Returns:
Tuple of (processed_data, mapping, replaced_fields, processed_info)
"""
df = pd.read_csv(StringIO(content), encoding='utf-8')
table = TableData(
headers=df.columns.tolist(),
rows=df.values.tolist(),
source_type='csv'
)
if not table.rows:
return None, self.string_parser.get_mapping(), [], {'type': 'table', 'headers': table.headers, 'row_count': 0}
anonymized_table = self.anonymize_table(table)
# Track replaced fields
replaced_fields = []
for i, header in enumerate(anonymized_table.headers):
for orig_row, anon_row in zip(table.rows, anonymized_table.rows):
if anon_row[i] != orig_row[i]:
replaced_fields.append(header)
# Convert back to DataFrame
result = pd.DataFrame(anonymized_table.rows, columns=anonymized_table.headers)
processed_info = {
'type': 'table',
'headers': table.headers,
'row_count': len(table.rows)
}
return result, self.string_parser.get_mapping(), replaced_fields, processed_info
def process_json_content(self, content: str) -> tuple:
"""
Process JSON content
Args:
content: JSON content to process
Returns:
Tuple of (processed_data, mapping, replaced_fields, processed_info)
"""
data = json.loads(content)
# Process JSON recursively using string parser
result = self.string_parser.process_json_value(data)
processed_info = {'type': 'json'}
return result, self.string_parser.get_mapping(), [], processed_info
def anonymize_xml_element(self, element: ET.Element, indent: str = '') -> str:
"""
Recursively process XML element and return formatted string
Args:
element: XML element to process
indent: Current indentation level
Returns:
Formatted XML string
"""
# Process attributes
processed_attrs = {}
for attr_name, attr_value in element.attrib.items():
# Check if attribute name matches any header patterns
pattern = get_pattern_for_header(attr_name, self.header_patterns)
if pattern:
if attr_value not in self.string_parser.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
type_mapping = {
'email': 'email',
'phone': 'phone',
'name': 'name',
'address': 'address',
'id': 'id'
}
placeholder_type = type_mapping.get(pattern.name, 'data')
self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
processed_attrs[attr_name] = self.string_parser.mapping[attr_value]
else:
# Check if attribute value matches any data patterns
from modules.neutralizer.subPatterns import find_patterns_in_text, DataPatterns
matches = find_patterns_in_text(attr_value, DataPatterns.patterns)
if matches:
pattern_name = matches[0][0]
pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None)
if pattern:
if attr_value not in self.string_parser.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
type_mapping = {
'email': 'email',
'phone': 'phone',
'name': 'name',
'address': 'address',
'id': 'id'
}
placeholder_type = type_mapping.get(pattern_name, 'data')
self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
processed_attrs[attr_name] = self.string_parser.mapping[attr_value]
else:
processed_attrs[attr_name] = attr_value
else:
processed_attrs[attr_name] = attr_value
attrs = ' '.join(f'{k}="{v}"' for k, v in processed_attrs.items())
attrs = f' {attrs}' if attrs else ''
# Process text content
text = element.text.strip() if element.text and element.text.strip() else ''
if text:
# Skip if already a placeholder
if not self.string_parser.is_placeholder(text):
# Check if text matches any patterns
from modules.neutralizer.subPatterns import find_patterns_in_text, DataPatterns
pattern_matches = find_patterns_in_text(text, DataPatterns.patterns)
if pattern_matches:
pattern_name = pattern_matches[0][0]
pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None)
if pattern:
if text not in self.string_parser.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
# Create placeholder in format [type.uuid]
type_mapping = {
'email': 'email',
'phone': 'phone',
'name': 'name',
'address': 'address',
'id': 'id'
}
placeholder_type = type_mapping.get(pattern_name, 'data')
self.string_parser.mapping[text] = f"[{placeholder_type}.{placeholder_id}]"
text = self.string_parser.mapping[text]
else:
# Check if text matches any custom names from the user list
for name in self.string_parser.names_to_parse:
if not name.strip():
continue
if text.lower().strip() == name.lower().strip():
if text not in self.string_parser.mapping:
# Generate a UUID for the placeholder
import uuid
placeholder_id = str(uuid.uuid4())
self.string_parser.mapping[text] = f"[name.{placeholder_id}]"
text = self.string_parser.mapping[text]
break
# Process child elements
children = []
for child in element:
child_str = self.anonymize_xml_element(child, indent + ' ')
children.append(child_str)
# Build element string
if not children and not text:
return f"{indent}<{element.tag}{attrs}/>"
elif not children:
return f"{indent}<{element.tag}{attrs}>{text}</{element.tag}>"
else:
result = [f"{indent}<{element.tag}{attrs}>"]
if text:
result.append(f"{indent} {text}")
result.extend(children)
result.append(f"{indent}</{element.tag}>")
return '\n'.join(result)
def process_xml_content(self, content: str) -> tuple:
"""
Process XML content
Args:
content: XML content to process
Returns:
Tuple of (processed_data, mapping, replaced_fields, processed_info)
"""
root = ET.fromstring(content)
# Process XML recursively with proper formatting
result = self.anonymize_xml_element(root)
processed_info = {'type': 'xml'}
return result, self.string_parser.get_mapping(), [], processed_info
def get_mapping(self) -> Dict[str, str]:
"""
Get the current mapping of original values to placeholders
Returns:
Dict[str, str]: Mapping dictionary
"""
return self.string_parser.get_mapping()
def clear_mapping(self):
"""Clear the current mapping"""
self.string_parser.clear_mapping()

View file

@ -0,0 +1,101 @@
"""
Text processing module for data anonymization
Handles plain text processing without header information
"""
from typing import Dict, List, Any
from dataclasses import dataclass
from modules.neutralizer.subParseString import StringParser
@dataclass
class PlainText:
"""Repräsentiert normalen Text"""
content: str
source_type: str # 'txt', 'docx', 'text_plain'
class TextProcessor:
"""Handles plain text processing for anonymization"""
def __init__(self, names_to_parse: List[str] = None):
"""
Initialize the text processor
Args:
names_to_parse: List of names to parse and replace
"""
self.string_parser = StringParser(names_to_parse)
def extract_tables_from_text(self, content: str) -> tuple:
"""
Extract tables and plain text from content
Args:
content: Content to process
Returns:
Tuple of (list of tables, list of plain text sections)
"""
# For now, process the entire content as plain text
# This can be extended later to detect table-like structures
tables = []
plain_texts = [PlainText(content=content, source_type='text_plain')]
return tables, plain_texts
def anonymize_plain_text(self, text: PlainText) -> PlainText:
"""
Anonymize plain text content
Args:
text: PlainText object to anonymize
Returns:
PlainText: Anonymized text
"""
# Use the string parser to process the content
anonymized_content = self.string_parser.process_string(text.content)
return PlainText(content=anonymized_content, source_type=text.source_type)
def process_text_content(self, content: str) -> tuple:
"""
Process text content and return anonymized data
Args:
content: Text content to process
Returns:
Tuple of (anonymized_content, mapping, replaced_fields, processed_info)
"""
# Extract tables and plain text sections
tables, plain_texts = self.extract_tables_from_text(content)
# Process plain text sections
anonymized_texts = [self.anonymize_plain_text(text) for text in plain_texts]
# Combine all processed content
result = content
for text, anonymized_text in zip(plain_texts, anonymized_texts):
if text.content != anonymized_text.content:
result = result.replace(text.content, anonymized_text.content)
# Get processing information
processed_info = {
'type': 'text',
'tables': [{'headers': t.headers, 'row_count': len(t.rows)} for t in tables] if hasattr(tables[0], 'headers') else []
}
return result, self.string_parser.get_mapping(), [], processed_info
def get_mapping(self) -> Dict[str, str]:
"""
Get the current mapping of original values to placeholders
Returns:
Dict[str, str]: Mapping dictionary
"""
return self.string_parser.get_mapping()
def clear_mapping(self):
"""Clear the current mapping"""
self.string_parser.clear_mapping()

View file

@ -18,7 +18,7 @@ import modules.interfaces.interfaceComponentObjects as interfaceComponentObjects
from modules.interfaces.interfaceComponentModel import FileItem, FilePreview
from modules.shared.attributeUtils import getModelAttributeDefinitions, AttributeResponse, AttributeDefinition
from modules.interfaces.interfaceAppModel import User, DataNeutraliserConfig, DataNeutralizerAttributes
from modules.services.serviceNeutralization import NeutralizationService
from modules.features.featureNeutralizePlayground import NeutralizationService
# Configure logger
logger = logging.getLogger(__name__)

View file

@ -590,6 +590,20 @@ async def logout(
try:
appInterface = getInterface(currentUser)
appInterface.logout()
# Log successful logout
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_user_access(
user_id=str(currentUser.id),
mandate_id=str(currentUser.mandateId),
action="logout",
success_info="google_auth_logout"
)
except Exception:
# Don't fail if audit logging fails
pass
return {"message": "Logged out successfully"}
except Exception as e:
logger.error(f"Error during logout: {str(e)}")

View file

@ -124,6 +124,19 @@ async def login(
# Save access token
userInterface.saveAccessToken(token)
# Log successful login
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_user_access(
user_id=str(user.id),
mandate_id=str(user.mandateId),
action="login",
success_info="local_auth_success"
)
except Exception:
# Don't fail if audit logging fails
pass
# Create response data
response_data = {
"type": "local_auth_success",
@ -138,6 +151,20 @@ async def login(
# Handle authentication errors
error_msg = str(e)
logger.warning(f"Authentication failed for user {formData.username}: {error_msg}")
# Log failed login attempt
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_user_access(
user_id="unknown",
mandate_id="unknown",
action="login",
success_info=f"failed: {error_msg}"
)
except Exception:
# Don't fail if audit logging fails
pass
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail=error_msg,
@ -253,6 +280,19 @@ async def logout(request: Request, currentUser: User = Depends(getCurrentUser))
appInterface.revokeTokenById(jti, revokedBy=currentUser.id, reason="logout")
revoked = 1
# Log successful logout
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_user_access(
user_id=str(currentUser.id),
mandate_id=str(currentUser.mandateId),
action="logout",
success_info=f"revoked_tokens: {revoked}"
)
except Exception:
# Don't fail if audit logging fails
pass
return JSONResponse({
"message": "Successfully logged out",
"revokedTokens": revoked

View file

@ -463,6 +463,20 @@ async def logout(
try:
appInterface = getInterface(currentUser)
appInterface.logout()
# Log successful logout
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_user_access(
user_id=str(currentUser.id),
mandate_id=str(currentUser.mandateId),
action="logout",
success_info="microsoft_auth_logout"
)
except Exception:
# Don't fail if audit logging fails
pass
return {"message": "Logged out successfully"}
except Exception as e:
logger.error(f"Error during logout: {str(e)}")

View file

@ -161,12 +161,12 @@ async def realtime_interpreter(
logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
# Save audio file for debugging with correct extension
file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav"
debug_filename = f"debug_audio/audio_google_{audio_file.filename.replace('.wav', '.webm')}"
os.makedirs("debug_audio", exist_ok=True)
with open(debug_filename, "wb") as f:
f.write(audio_content)
logger.info(f"💾 Saved audio file for debugging: {debug_filename}")
# file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav"
# debug_filename = f"debug_audio/audio_google_{audio_file.filename.replace('.wav', '.webm')}"
# os.makedirs("debug_audio", exist_ok=True)
# with open(debug_filename, "wb") as f:
# f.write(audio_content)
# logger.info(f"💾 Saved audio file for debugging: {debug_filename}")
# Validate audio format
connector = get_google_speech_connector()

View file

@ -19,7 +19,7 @@ from modules.interfaces.interfaceAppObjects import getRootInterface
from modules.interfaces.interfaceAppModel import User, AuthAuthority, Token
# Get Config Data
SECRET_KEY = APP_CONFIG.get("APP_JWT_SECRET_SECRET")
SECRET_KEY = APP_CONFIG.get("APP_JWT_KEY_SECRET")
ALGORITHM = APP_CONFIG.get("Auth_ALGORITHM")
ACCESS_TOKEN_EXPIRE_MINUTES = int(APP_CONFIG.get("APP_TOKEN_EXPIRY"))
REFRESH_TOKEN_EXPIRE_DAYS = int(APP_CONFIG.get("APP_REFRESH_TOKEN_EXPIRY", "7"))

View file

@ -0,0 +1,202 @@
"""
Audit Logging System for PowerOn Gateway
This module provides centralized audit logging functionality for security events,
user actions, and system access patterns.
"""
import logging
import os
from datetime import datetime
from typing import Optional, Dict, Any
from logging.handlers import RotatingFileHandler
from modules.shared.configuration import APP_CONFIG
class DailyRotatingFileHandler(RotatingFileHandler):
"""
A rotating file handler that automatically switches to a new file when the date changes.
The log file name includes the current date and switches at midnight.
"""
def __init__(self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs):
self.log_dir = log_dir
self.filename_prefix = filename_prefix
self.current_date = None
self.current_file = None
# Initialize with today's file
self._update_file_if_needed()
# Call parent constructor with current file
super().__init__(self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs)
def _update_file_if_needed(self):
"""Update the log file if the date has changed"""
today = datetime.now().strftime("%Y%m%d")
if self.current_date != today:
self.current_date = today
new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log")
if self.current_file != new_file:
self.current_file = new_file
return True
return False
def emit(self, record):
"""Emit a log record, switching files if date has changed"""
# Check if we need to switch to a new file
if self._update_file_if_needed():
# Close current file and open new one
if self.stream:
self.stream.close()
self.stream = None
# Update the baseFilename for the parent class
self.baseFilename = self.current_file
# Reopen the stream
if not self.delay:
self.stream = self._open()
# Call parent emit method
super().emit(record)
class AuditLogger:
"""Centralized audit logging system"""
def __init__(self):
self.logger = None
self._setup_audit_logger()
def _setup_audit_logger(self):
"""Setup the audit logger with daily file rotation"""
try:
# Get log directory from config
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
if not os.path.isabs(logDir):
# If relative path, make it relative to the gateway directory
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
logDir = os.path.join(gatewayDir, logDir)
# Ensure log directory exists
os.makedirs(logDir, exist_ok=True)
# Create audit logger
self.logger = logging.getLogger('audit')
self.logger.setLevel(logging.INFO)
# Remove any existing handlers to avoid duplicates
for handler in self.logger.handlers[:]:
self.logger.removeHandler(handler)
# Create daily rotating file handler for audit log
rotationSize = int(APP_CONFIG.get("APP_LOGGING_ROTATION_SIZE", 10485760)) # Default: 10MB
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
fileHandler = DailyRotatingFileHandler(
log_dir=logDir,
filename_prefix="log_audit",
max_bytes=rotationSize,
backup_count=backupCount
)
# Create formatter for audit log
auditFormatter = logging.Formatter(
fmt="%(asctime)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)
fileHandler.setFormatter(auditFormatter)
# Add handler to logger
self.logger.addHandler(fileHandler)
# Prevent propagation to root logger
self.logger.propagate = False
except Exception as e:
# Fallback to standard logger if audit setup fails
self.logger = logging.getLogger(__name__)
self.logger.error(f"Failed to setup audit logger: {str(e)}")
def log_event(self,
user_id: str,
mandate_id: str,
category: str,
action: str,
details: str = "",
timestamp: Optional[datetime] = None) -> None:
"""
Log an audit event
Args:
user_id: User identifier
mandate_id: Mandate identifier (can be empty if not applicable)
category: Event category (e.g., 'key', 'access', 'data')
action: Specific action (e.g., 'decode', 'login', 'logout')
details: Additional details about the event
timestamp: Optional custom timestamp (defaults to current time)
"""
try:
if not self.logger:
return
# Use provided timestamp or current time
if timestamp is None:
timestamp = datetime.now()
# Format the audit log entry
# Format: timestamp | userid | mandateid | category | action | details
audit_entry = f"{user_id} | {mandate_id} | {category} | {action} | {details}"
# Log the event
self.logger.info(audit_entry)
except Exception as e:
# Use standard logger as fallback
logging.getLogger(__name__).error(f"Failed to log audit event: {str(e)}")
def log_key_access(self, user_id: str, mandate_id: str, key_name: str, action: str) -> None:
"""Log key access events (decode/encode)"""
self.log_event(
user_id=user_id,
mandate_id=mandate_id,
category="key",
action=action,
details=key_name
)
def log_user_access(self, user_id: str, mandate_id: str, action: str, success_info: str = "") -> None:
"""Log user access events (login/logout)"""
self.log_event(
user_id=user_id,
mandate_id=mandate_id,
category="access",
action=action,
details=success_info
)
def log_data_access(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None:
"""Log data access events"""
self.log_event(
user_id=user_id,
mandate_id=mandate_id,
category="data",
action=action,
details=details
)
def log_security_event(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None:
"""Log security-related events"""
self.log_event(
user_id=user_id,
mandate_id=mandate_id,
category="security",
action=action,
details=details
)
# Global audit logger instance
audit_logger = AuditLogger()

View file

@ -7,8 +7,14 @@ config.ini files and environment variables stored in .env files, using a flat st
import os
import logging
import json
import base64
import time
from typing import Any, Dict, Optional
from pathlib import Path
from cryptography.fernet import Fernet
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
# Set up basic logging for configuration loading
logging.basicConfig(
@ -119,21 +125,44 @@ class Configuration:
try:
with open(envPath, 'r') as f:
for line in f:
line = line.strip()
# Skip empty lines and comments
if not line or line.startswith('#'):
continue
lines = f.readlines()
# Parse key-value pairs
if '=' in line:
key, value = line.split('=', 1)
key = key.strip()
value = value.strip()
i = 0
while i < len(lines):
line = lines[i].strip()
# Add directly to data dictionary
# Skip empty lines and comments
if not line or line.startswith('#'):
i += 1
continue
# Parse key-value pairs
if '=' in line:
key, value = line.split('=', 1)
key = key.strip()
value = value.strip()
# Check if value starts with { (JSON object)
if value.startswith('{'):
# Collect all lines until we find the closing }
json_lines = [value]
i += 1
brace_count = value.count('{') - value.count('}')
while i < len(lines) and brace_count > 0:
json_lines.append(lines[i].rstrip('\n'))
brace_count += lines[i].count('{') - lines[i].count('}')
i += 1
# Join all lines and create the full JSON value
full_json_value = '\n'.join(json_lines)
self._data[key] = full_json_value
else:
# Single line value
self._data[key] = value
i += 1
logger.info(f"Loaded environment variables from {envPath.absolute()}")
# Also load system environment variables (don't override existing)
@ -158,7 +187,7 @@ class Configuration:
logger.info("Environment file has changed, reloading...")
self._loadEnv()
def get(self, key: str, default: Any = None) -> Any:
def get(self, key: str, default: Any = None, user_id: str = "system") -> Any:
"""Get configuration value with optional default"""
self.checkForUpdates() # Check for file changes
@ -166,10 +195,24 @@ class Configuration:
value = self._data[key]
# Handle secrets (keys ending with _SECRET)
if key.endswith("_SECRET"):
return handleSecret(value)
# Handle JSON secrets (keys ending with _API_KEY that contain JSON)
elif key.endswith("_API_KEY") and value.startswith("{"):
return handleJsonSecret(value)
# Log audit event for secret key access
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_key_access(
user_id=user_id,
mandate_id="system",
key_name=key,
action="decode"
)
except Exception:
# Don't fail if audit logging fails
pass
if value.startswith("{") and value.endswith("}"):
# Handle JSON secrets (keys ending with _API_KEY that contain JSON)
return handleSecretJson(value, user_id, key)
else:
return handleSecretText(value, user_id, key)
return value
return default
@ -177,7 +220,7 @@ class Configuration:
"""Enable attribute-style access to configuration"""
self.checkForUpdates() # Check for file changes
value = self.get(name)
value = self.get(name, user_id="system")
if value is None:
raise AttributeError(f"Configuration key '{name}' not found")
return value
@ -191,42 +234,306 @@ class Configuration:
"""Set a configuration value (for testing/overrides)"""
self._data[key] = value
def handleSecret(value: str) -> str:
def handleSecretText(value: str, user_id: str = "system", key_name: str = "unknown") -> str:
"""
Handle secret values. Currently just returns the plain text value,
but can be enhanced to provide actual decryption in the future.
Handle secret values with encryption/decryption support.
Args:
value: The secret value to handle
value: The secret value to handle (may be encrypted)
user_id: The user ID making the request (default: "system")
key_name: The name of the key being decrypted (default: "unknown")
Returns:
str: Processed secret value
str: Processed secret value (decrypted if encrypted)
"""
# For now, just return the value as-is
# In the future, this could be enhanced to decrypt values
if _is_encrypted_value(value):
return decrypt_value(value, user_id, key_name)
return value
def handleJsonSecret(value: str) -> str:
def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unknown") -> str:
"""
Handle JSON secret values (like Google service account keys).
Validates that the value is valid JSON.
Handle JSON secret values (like Google service account keys) with encryption/decryption support.
Validates that the value is valid JSON after decryption.
Args:
value: The JSON secret value to handle
value: The JSON secret value to handle (may be encrypted)
user_id: The user ID making the request (default: "system")
key_name: The name of the key being decrypted (default: "unknown")
Returns:
str: Processed JSON secret value
str: Processed JSON secret value (decrypted if encrypted)
Raises:
ValueError: If the value is not valid JSON
ValueError: If the value is not valid JSON after decryption
"""
import json
# Decrypt if encrypted
if _is_encrypted_value(value):
decrypted_value = decrypt_value(value, user_id, key_name)
else:
decrypted_value = value
try:
# Validate that it's valid JSON
json.loads(value)
return value
json.loads(decrypted_value)
return decrypted_value
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in secret value: {e}")
# Global rate limiting tracking
# Structure: {user_id: {key_name: [timestamps]}}
_decryption_attempts = {}
def _get_master_key() -> bytes:
"""
Get the master key for the current environment.
Returns:
bytes: The master key for encryption/decryption
Raises:
ValueError: If no master key is found
"""
# Get the key location from config
key_location = APP_CONFIG.get('APP_KEY_SYSVAR')
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
if not key_location:
raise ValueError("APP_KEY_SYSVAR not configured")
# First try to get from environment variable
master_key = os.environ.get(key_location)
if master_key:
# If found in environment, use it directly
return master_key.encode('utf-8')
# If not in environment, try to read from file
if os.path.exists(key_location):
try:
with open(key_location, 'r') as f:
content = f.read().strip()
# Parse the key file format: env = key
lines = content.split('\n')
for line in lines:
line = line.strip()
if not line or line.startswith('#'):
continue
if '=' in line:
key_env, key_value = line.split('=', 1)
key_env = key_env.strip()
key_value = key_value.strip()
if key_env == env_type:
return key_value.encode('utf-8')
raise ValueError(f"No key found for environment '{env_type}' in {key_location}")
except Exception as e:
raise ValueError(f"Error reading key file {key_location}: {e}")
raise ValueError(f"Master key not found. Checked environment variable '{key_location}' and file path")
def _derive_encryption_key(master_key: bytes) -> bytes:
"""
Derive a 32-byte encryption key from the master key using PBKDF2.
Args:
master_key: The master key bytes
Returns:
bytes: 32-byte derived key suitable for Fernet
"""
# Use a fixed salt for consistency (in production, consider using a random salt stored separately)
salt = b'poweron_config_salt_2025'
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=32,
salt=salt,
iterations=100000,
)
return base64.urlsafe_b64encode(kdf.derive(master_key))
def _is_encrypted_value(value: str) -> bool:
"""
Check if a value is encrypted (starts with environment-specific prefix).
Args:
value: The value to check
Returns:
bool: True if encrypted, False otherwise
"""
if not value or not isinstance(value, str):
return False
# Check for environment-specific encryption prefixes
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev').upper()
expected_prefix = f"{env_type}_ENC:"
return value.startswith(expected_prefix)
def _get_encryption_prefix(env_type: str) -> str:
"""
Get the encryption prefix for the given environment type.
Args:
env_type: The environment type (dev, int, prod, etc.)
Returns:
str: The encryption prefix
"""
return f"{env_type.upper()}_ENC:"
def _check_decryption_rate_limit(user_id: str, key_name: str, max_per_second: int = 10) -> bool:
"""
Check if decryption is allowed based on rate limiting (max 10 per second per user per key).
Args:
user_id: The user ID making the request
key_name: The name of the key being decrypted
max_per_second: Maximum decryptions per second (default: 10)
Returns:
bool: True if allowed, False if rate limited
"""
current_time = time.time()
# Initialize tracking for this user if not exists
if user_id not in _decryption_attempts:
_decryption_attempts[user_id] = {}
# Initialize tracking for this key if not exists
if key_name not in _decryption_attempts[user_id]:
_decryption_attempts[user_id][key_name] = []
# Clean old attempts (older than 1 second)
_decryption_attempts[user_id][key_name] = [
timestamp for timestamp in _decryption_attempts[user_id][key_name]
if current_time - timestamp < 1.0
]
# Check if we're within rate limit
if len(_decryption_attempts[user_id][key_name]) >= max_per_second:
logger.warning(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' ({max_per_second}/sec)")
return False
# Record this attempt
_decryption_attempts[user_id][key_name].append(current_time)
return True
def encrypt_value(value: str, env_type: str = None, user_id: str = "system", key_name: str = "unknown") -> str:
"""
Encrypt a value using the master key for the specified environment.
Args:
value: The plain text value to encrypt
env_type: The environment type (dev, int, prod). If None, uses current environment.
user_id: The user ID making the request (default: "system")
key_name: The name of the key being encrypted (default: "unknown")
Returns:
str: The encrypted value with prefix
Raises:
ValueError: If encryption fails
"""
if env_type is None:
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
try:
master_key = _get_master_key()
derived_key = _derive_encryption_key(master_key)
fernet = Fernet(derived_key)
# Encrypt the value
encrypted_bytes = fernet.encrypt(value.encode('utf-8'))
encrypted_b64 = base64.urlsafe_b64encode(encrypted_bytes).decode('utf-8')
# Add environment prefix
prefix = _get_encryption_prefix(env_type)
encrypted_value = f"{prefix}{encrypted_b64}"
# Log audit event for encryption
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_key_access(
user_id=user_id,
mandate_id="system",
key_name=key_name,
action="encrypt"
)
except Exception:
# Don't fail if audit logging fails
pass
return encrypted_value
except Exception as e:
raise ValueError(f"Encryption failed: {e}")
def decrypt_value(encrypted_value: str, user_id: str = "system", key_name: str = "unknown") -> str:
"""
Decrypt a value using the master key for the current environment.
Args:
encrypted_value: The encrypted value with prefix
user_id: The user ID making the request (default: "system")
key_name: The name of the key being decrypted (default: "unknown")
Returns:
str: The decrypted plain text value
Raises:
ValueError: If decryption fails
"""
if not _is_encrypted_value(encrypted_value):
return encrypted_value # Return as-is if not encrypted
# Check rate limiting (10 per second per user per key)
if not _check_decryption_rate_limit(user_id, key_name, max_per_second=10):
raise ValueError(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' (10/sec)")
try:
# Extract the encrypted part (remove prefix)
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
env_type_upper = env_type.upper()
expected_prefix = f"{env_type_upper}_ENC:"
if not encrypted_value.startswith(expected_prefix):
raise ValueError(f"Invalid encryption prefix. Expected {expected_prefix}")
encrypted_part = encrypted_value[len(expected_prefix):]
# Get master key and derive encryption key
master_key = _get_master_key()
derived_key = _derive_encryption_key(master_key)
fernet = Fernet(derived_key)
# Decode and decrypt
encrypted_bytes = base64.urlsafe_b64decode(encrypted_part.encode('utf-8'))
decrypted_bytes = fernet.decrypt(encrypted_bytes)
decrypted_value = decrypted_bytes.decode('utf-8')
# Log audit event for decryption
try:
from modules.shared.auditLogger import audit_logger
audit_logger.log_key_access(
user_id=user_id,
mandate_id="system",
key_name=key_name,
action="decrypt"
)
except Exception:
# Don't fail if audit logging fails
pass
return decrypted_value
except Exception as e:
raise ValueError(f"Decryption failed: {e}")
# Create the global APP_CONFIG instance
APP_CONFIG = Configuration()

File diff suppressed because it is too large Load diff

View file

@ -1,48 +0,0 @@
MERMAID DIAGRAM:
can you make chart "wiki/diagramm_komponenten.mermaid". produce an component diagram, based on current code in poweron/*
if document existsadd missing components, remove obsolete components.
in box texts to use <br> instead of \n
for all subgraphs to to add path on a separate line to find the module in the code.
read all code modules caerfully to identify all components and their relations.
connectors without texts, only lines.
to add connector between frontend and backend (apiCalls.js -> app.py)
to connect app.py (Main application module) with the route*.py
to put all items of frontend into subgraph "Frontend"
to put all items of gateway into subgraph "Gateway"
to put following boxes to a dedicated subgraph within their existing subgraph:
- workflowManager.py, workflowAgentsRegistry.py, documentProcessor.py, --> "Workflow"
- mimeUtils.py, defAttributes.py, configuration.py, autho.py --> "Shared"
- agent*.py --> "Agents"
- workflow*.js --> "Workflow"
- all *.js in js/modules/ not starting with workflow* --> "Administration"
- formGeneric.js not to put to subgraph "Shared", but to a separated subgraph "Shared
to connect the main.js (main app in the frontend) to nativation.js, globalState.js, login.js, register.js, msftCall.js, config.js
to connect navigation.js to moduleLoader.js
to connect moduleLoader.js to workflow.js, and all *.js in js/modules/ not starting with workflow*
to connect all *.js in js/modules/ not starting with workflow* --> formGeneric.js
to connect fomrGeneric.js --> apiCalls.js
to use underscores (e.g. Backend_Python, Workflow_Modules, etc.) for all subgraph titles.
if adding legend, then to give same colors like references to legend

View file

@ -1,39 +0,0 @@
### Launch APP
cd .\frontend_agents\
cls; python ./server.py
conda activate C:\Users\pmots\anaconda3\envs\poweron
cd .\gateway\
cls; uvicorn app:app --host 0.0.0.0 --port 8000
### git permanent login with vs code
git remote set-url origin https://valueon@github.com/valueonag/gateway
git remote set-url origin https://valueon@github.com/valueonag/frontend_agents
git remote set-url origin https://valueon@github.com/valueonag/wiki
git remote set-url origin https://valueon@github.com/valueonag/customer-svbe
git remote set-url origin https://valueon@github.com/valueonag/customer-althaus
### git delete workflow runs (cleanup)
gh auth login
Navigate to your repository folder (if not already there):
bash: cd /path/to/your/repository
List workflow runs:
bash: gh run list
Delete a specific workflow run:
bash: gh run delete [RUN_ID]
Delete all completed workflow runs (to clear up space):
bash: gh run list --status completed --json databaseId -q '.[].databaseId' | xargs -I{} gh run delete {}
powershell:
$runs = gh run list --status completed --json databaseId -q ".[].databaseId" | ConvertFrom-Json
foreach ($run in $runs) {
Write-Host "Deleting run $run"
echo "y" | gh run delete $run
}

1
query
View file

@ -1 +0,0 @@
postgresql

View file

@ -10,6 +10,7 @@ slowapi==0.1.8 # For rate limiting
## Authentication & Security
python-jose[cryptography]==3.3.0 # For JWT tokens
cryptography>=41.0.0 # For encryption/decryption of configuration values
passlib==1.7.4
argon2-cffi>=21.3.0 # Für Passwort-Hashing in gateway_interface.py
google-auth-oauthlib==1.2.0 # Für Google OAuth

View file

@ -1,77 +0,0 @@
#!/usr/bin/env python3
"""
Test script to verify the Excel header parsing fix
"""
import sys
import os
import pandas as pd
from io import BytesIO
# Add the gateway modules to the path
sys.path.append(os.path.join(os.path.dirname(__file__), 'modules'))
from modules.interfaces.interfaceTicketObjects import TicketSharepointSyncInterface
def test_excel_header_parsing():
"""Test the Excel header parsing fix"""
print("=== Testing Excel Header Parsing Fix ===\n")
# Create a mock interface instance
interface = TicketSharepointSyncInterface(
connector_ticket=None,
connector_sharepoint=None,
task_sync_definition={
"ID": ["get", ["id"]],
"Summary": ["get", ["fields", "summary"]],
"Status": ["get", ["fields", "status", "name"]],
"Assignee": ["put", ["fields", "assignee", "displayName"]]
},
sync_folder="test",
sync_file="test.xlsx",
backup_folder="backup",
audit_folder="audit",
site_id="test"
)
# Test data
test_data = [
{"ID": "TEST-1", "Summary": "Test Issue 1", "Status": "Open", "Assignee": "John Doe"},
{"ID": "TEST-2", "Summary": "Test Issue 2", "Status": "Closed", "Assignee": "Jane Smith"},
]
# Create Excel content
print("1. Creating Excel content...")
excel_content = interface._create_excel_content(test_data)
print(f" ✓ Created Excel content: {len(excel_content)} bytes")
# Parse it back
print("2. Parsing Excel content...")
try:
parsed_data, parsed_headers = interface._parse_excel_content(excel_content)
print(f" ✓ Parsed Excel content: {len(parsed_data)} records")
print(f" ✓ Headers type: header1={type(parsed_headers['header1'])}, header2={type(parsed_headers['header2'])}")
print(f" ✓ Headers content: header1='{parsed_headers['header1']}', header2='{parsed_headers['header2']}'")
# Test creating content with the parsed headers
print("3. Testing round-trip with parsed headers...")
new_excel_content = interface._create_excel_content(test_data, parsed_headers)
print(f" ✓ Created new Excel content: {len(new_excel_content)} bytes")
# Parse the new content
final_data, final_headers = interface._parse_excel_content(new_excel_content)
print(f" ✓ Final parse successful: {len(final_data)} records")
print(f" ✓ Final headers: header1='{final_headers['header1']}', header2='{final_headers['header2']}'")
print("\n✅ All tests passed! The header parsing fix works correctly.")
return True
except Exception as e:
print(f" ✗ Error during parsing: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = test_excel_header_parsing()
exit(0 if success else 1)

View file

@ -0,0 +1,375 @@
#!/usr/bin/env python3
"""
Tool for encrypting configuration values.
This tool allows developers to encrypt secret values for use in configuration files.
It supports both text and JSON values and automatically determines the environment.
It can also encrypt all *_SECRET keys in an environment file at once.
Usage:
# Encrypt a single value
python tool_encrypt_config_value.py --value "my_secret_value" --env dev
python tool_encrypt_config_value.py --file "path/to/file.json" --env prod
# Encrypt all secrets in a file
python tool_encrypt_config_value.py --encrypt-all env_dev.env --env dev
python tool_encrypt_config_value.py --encrypt-all env_prod.env --env prod --dry-run
# Decrypt a value (for testing)
python tool_encrypt_config_value.py --decrypt "DEV_ENC:encrypted_value"
"""
import sys
import os
import json
import argparse
import shutil
from pathlib import Path
from datetime import datetime
# Add the modules directory to the Python path
sys.path.insert(0, str(Path(__file__).parent / 'modules'))
from shared.configuration import encrypt_value, decrypt_value, _is_encrypted_value
def find_secret_keys_in_file(file_path: Path) -> list:
"""
Find all *_SECRET keys in an environment file that are not encrypted.
Args:
file_path: Path to the environment file
Returns:
list: List of tuples (line_number, key, value, full_line)
"""
secret_keys = []
if not file_path.exists():
return secret_keys
try:
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
i = 0
while i < len(lines):
line = lines[i].strip()
# Skip empty lines and comments
if not line or line.startswith('#'):
i += 1
continue
# Check if line contains a key-value pair
if '=' in line:
key, value = line.split('=', 1)
key = key.strip()
value = value.strip()
# Check if it's a secret key and not already encrypted
if key.endswith('_SECRET') and value and not _is_encrypted_value(value):
# Check if value starts with { (JSON object)
if value.startswith('{'):
# Collect all lines until we find the closing }
json_lines = [value]
start_line = i + 1
i += 1
brace_count = value.count('{') - value.count('}')
while i < len(lines) and brace_count > 0:
json_lines.append(lines[i].rstrip('\n'))
brace_count += lines[i].count('{') - lines[i].count('}')
i += 1
# Join all lines and create the full JSON value
full_json_value = '\n'.join(json_lines)
secret_keys.append((start_line, key, full_json_value, line))
i -= 1 # Adjust for the loop increment
else:
# Single line value
secret_keys.append((i + 1, key, value, line))
# Check if it's a secret key with multiline JSON (value is just "{")
elif key.endswith('_SECRET') and value == '{' and not _is_encrypted_value(value):
# Collect all lines until we find the closing }
json_lines = [value]
start_line = i + 1
i += 1
brace_count = 1 # We already have one opening brace
while i < len(lines) and brace_count > 0:
json_lines.append(lines[i].rstrip('\n'))
brace_count += lines[i].count('{') - lines[i].count('}')
i += 1
# Join all lines and create the full JSON value
full_json_value = '\n'.join(json_lines)
secret_keys.append((start_line, key, full_json_value, line))
i -= 1 # Adjust for the loop increment
i += 1
except Exception as e:
print(f"Error reading {file_path}: {e}")
return secret_keys
def backup_file(file_path: Path) -> Path:
"""
Create a backup of the file before modification.
Args:
file_path: Path to the file to backup
Returns:
Path: Path to the backup file
"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_path = file_path.with_suffix(f'.{timestamp}.backup')
shutil.copy2(file_path, backup_path)
return backup_path
def encrypt_all_secrets_in_file(file_path: Path, env_type: str, dry_run: bool = False, create_backup: bool = True) -> dict:
"""
Encrypt all non-encrypted secrets in a file.
Args:
file_path: Path to the environment file
env_type: The environment type
dry_run: If True, only show what would be changed
create_backup: If True, create a backup before modifying
Returns:
dict: Results of the encryption process
"""
results = {
'file': str(file_path),
'env_type': env_type,
'secrets_found': 0,
'secrets_encrypted': 0,
'errors': [],
'backup_created': None
}
# Find all secret keys
secret_keys = find_secret_keys_in_file(file_path)
results['secrets_found'] = len(secret_keys)
if not secret_keys:
return results
print(f"\n📁 Processing {file_path.name} ({env_type}):")
print(f" Found {len(secret_keys)} non-encrypted secrets")
if dry_run:
print(" [DRY RUN] Would encrypt the following secrets:")
for line_num, key, value, full_line in secret_keys:
print(f" Line {line_num}: {key} = {value[:50]}{'...' if len(value) > 50 else ''}")
return results
# Create backup if requested
if create_backup:
try:
backup_path = backup_file(file_path)
results['backup_created'] = str(backup_path)
print(f" 📋 Backup created: {backup_path.name}")
except Exception as e:
results['errors'].append(f"Failed to create backup: {e}")
print(f" ⚠️ Warning: Could not create backup: {e}")
# Read the file content
try:
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
except Exception as e:
results['errors'].append(f"Failed to read file: {e}")
return results
# Process each secret key
for line_num, key, value, full_line in secret_keys:
try:
print(f" 🔐 Encrypting {key}...")
# Encrypt the value using the existing function
encrypted_value = encrypt_value(value, env_type)
# Replace the line in the file content
new_line = f"{key} = {encrypted_value}\n"
lines[line_num - 1] = new_line
# If this was a multiline JSON, we need to remove the remaining lines
if value.startswith('{') and '\n' in value:
# Count how many lines the original JSON spanned
json_lines = value.split('\n')
lines_to_remove = len(json_lines) - 1 # -1 because we already replaced the first line
# Remove the remaining lines
for i in range(line_num, line_num + lines_to_remove):
if i < len(lines):
lines[i] = ""
results['secrets_encrypted'] += 1
print(f" ✓ Encrypted successfully")
except Exception as e:
error_msg = f"Failed to encrypt {key}: {e}"
results['errors'].append(error_msg)
print(f"{error_msg}")
# Write the modified content back to the file
if results['secrets_encrypted'] > 0:
try:
with open(file_path, 'w', encoding='utf-8') as f:
f.writelines(lines)
print(f" 💾 File updated successfully")
except Exception as e:
results['errors'].append(f"Failed to write file: {e}")
print(f" ✗ Failed to write file: {e}")
return results
def main():
parser = argparse.ArgumentParser(description='Encrypt configuration values')
parser.add_argument('--value', '-v', help='Plain text value to encrypt')
parser.add_argument('--file', '-f', help='File containing the value to encrypt')
parser.add_argument('--env', '-e', choices=['dev', 'int', 'prod'],
help='Environment type (default: current environment)')
parser.add_argument('--decrypt', '-d', help='Decrypt an encrypted value (for testing)')
parser.add_argument('--interactive', '-i', action='store_true',
help='Interactive mode - prompt for value')
parser.add_argument('--encrypt-all', '-a', help='Encrypt all *_SECRET keys in the specified file')
parser.add_argument('--dry-run', action='store_true',
help='Show what would be changed without making changes (for --encrypt-all)')
parser.add_argument('--no-backup', action='store_true',
help='Skip creating backup files (for --encrypt-all)')
args = parser.parse_args()
try:
# Handle encrypt-all functionality
if args.encrypt_all:
file_path = Path(args.encrypt_all)
if not file_path.exists():
print(f"Error: File not found: {file_path}")
return 1
if not args.env:
print("Error: --env is required when using --encrypt-all")
return 1
print("🔐 PowerOn Secret Encryption Tool")
print("=" * 50)
if args.dry_run:
print("🔍 DRY RUN MODE - No changes will be made")
print()
results = encrypt_all_secrets_in_file(
file_path,
args.env,
dry_run=args.dry_run,
create_backup=not args.no_backup
)
# Summary
print("\n" + "=" * 50)
print("📊 SUMMARY")
print("=" * 50)
print(f"File processed: {file_path.name}")
print(f"Secrets found: {results['secrets_found']}")
if not args.dry_run:
print(f"Secrets encrypted: {results['secrets_encrypted']}")
print(f"Errors: {len(results['errors'])}")
if len(results['errors']) == 0 and results['secrets_encrypted'] > 0:
print("\n🎉 All secrets encrypted successfully!")
elif len(results['errors']) > 0:
print(f"\n⚠️ Completed with {len(results['errors'])} errors")
else:
print("\n✅ No secrets needed encryption")
else:
print(f"Secrets that would be encrypted: {results['secrets_found']}")
# Show backup information
if results['backup_created']:
print(f"\n📋 Backup created: {Path(results['backup_created']).name}")
# Show errors if any
if results['errors']:
print(f"\n❌ Errors encountered:")
for error in results['errors']:
print(f" - {error}")
return 0 if len(results['errors']) == 0 else 1
# Handle decryption
if args.decrypt:
if _is_encrypted_value(args.decrypt):
decrypted = decrypt_value(args.decrypt)
print(f"Decrypted value: {decrypted}")
else:
print("Error: Value does not appear to be encrypted (missing ENV_ENC: prefix)")
return
# Determine the value to encrypt
value_to_encrypt = None
if args.value:
value_to_encrypt = args.value
elif args.file:
if not os.path.exists(args.file):
print(f"Error: File not found: {args.file}")
return
with open(args.file, 'r', encoding='utf-8') as f:
value_to_encrypt = f.read().strip()
elif args.interactive:
print("Enter the value to encrypt (press Ctrl+D when done):")
try:
value_to_encrypt = sys.stdin.read().strip()
except EOFError:
print("Error: No input provided")
return
else:
# Interactive mode by default
print("Enter the value to encrypt (press Ctrl+D when done):")
try:
value_to_encrypt = sys.stdin.read().strip()
except EOFError:
print("Error: No input provided")
return
if not value_to_encrypt:
print("Error: No value provided to encrypt")
return
# Validate JSON if it looks like JSON
if value_to_encrypt.strip().startswith('{'):
try:
json.loads(value_to_encrypt)
print("✓ Valid JSON detected")
except json.JSONDecodeError as e:
print(f"Warning: Value looks like JSON but is invalid: {e}")
response = input("Continue anyway? (y/N): ")
if response.lower() != 'y':
return
# Encrypt the value
encrypted_value = encrypt_value(value_to_encrypt, args.env)
print(f"\n✓ Encryption successful!")
print(f"Environment: {args.env or 'current'}")
print(f"Encrypted value:")
print(f"{encrypted_value}")
print(f"\nCopy the above value to your configuration file.")
# Show usage example
print(f"\nUsage in config file:")
print(f"MY_SECRET_KEY = {encrypted_value}")
except Exception as e:
print(f"Error: {e}")
sys.exit(1)
if __name__ == '__main__':
main()

View file

@ -0,0 +1,89 @@
#!/usr/bin/env python3
"""
Generate secure master keys for all environments.
This tool generates cryptographically secure 256-bit master keys for all environments
and updates the key.txt file with the new keys.
Usage:
python generate_master_keys.py
python generate_master_keys.py --output "path/to/key.txt"
"""
import sys
import os
import secrets
import base64
import argparse
from pathlib import Path
def generate_master_key():
"""Generate a secure 256-bit master key."""
# Generate 32 random bytes (256 bits)
key_bytes = secrets.token_bytes(32)
# Encode as base64 for easy storage
return base64.urlsafe_b64encode(key_bytes).decode('utf-8')
def main():
parser = argparse.ArgumentParser(description='Generate secure master keys for all environments')
parser.add_argument('--output', '-o',
default='../local/key.txt',
help='Output file path (default: ../local/key.txt)')
parser.add_argument('--force', '-f', action='store_true',
help='Overwrite existing key file without confirmation')
args = parser.parse_args()
# Convert to absolute path
output_path = Path(args.output).resolve()
# Check if file exists and get confirmation
if output_path.exists() and not args.force:
response = input(f"File {output_path} already exists. Overwrite? (y/N): ")
if response.lower() != 'y':
print("Operation cancelled.")
return
try:
# Generate keys for all environments
keys = {
'prod': generate_master_key(),
'int': generate_master_key(),
'dev': generate_master_key()
}
# Create output content
content = []
content.append("# PowerOn Master Keys")
content.append("# Generated on: " + str(Path(__file__).stat().st_mtime))
content.append("# WARNING: Keep this file secure and never commit to version control!")
content.append("")
for env, key in keys.items():
content.append(f"{env} = {key}")
# Ensure output directory exists
output_path.parent.mkdir(parents=True, exist_ok=True)
# Write to file
with open(output_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(content))
print("✓ Master keys generated successfully!")
print(f"Output file: {output_path}")
print("\nGenerated keys:")
for env, key in keys.items():
print(f" {env}: {key[:20]}...")
print(f"\n⚠️ IMPORTANT SECURITY NOTES:")
print(f" - Keep this file secure and never commit to version control")
print(f" - Store production keys in Azure environment variables")
print(f" - Share development keys securely with team members")
print(f" - Consider rotating keys regularly")
except Exception as e:
print(f"Error generating keys: {e}")
sys.exit(1)
if __name__ == '__main__':
main()