cleaned key handling and security
This commit is contained in:
parent
875b188238
commit
168d66d167
51 changed files with 2468 additions and 2119 deletions
96
app.py
96
app.py
|
|
@ -8,19 +8,79 @@ from zoneinfo import ZoneInfo
|
|||
|
||||
import logging
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from datetime import timedelta
|
||||
from datetime import timedelta, datetime
|
||||
import pathlib
|
||||
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
|
||||
|
||||
class DailyRotatingFileHandler(RotatingFileHandler):
|
||||
"""
|
||||
A rotating file handler that automatically switches to a new file when the date changes.
|
||||
The log file name includes the current date and switches at midnight.
|
||||
"""
|
||||
|
||||
def __init__(self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs):
|
||||
self.log_dir = log_dir
|
||||
self.filename_prefix = filename_prefix
|
||||
self.current_date = None
|
||||
self.current_file = None
|
||||
|
||||
# Initialize with today's file
|
||||
self._update_file_if_needed()
|
||||
|
||||
# Call parent constructor with current file
|
||||
super().__init__(self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs)
|
||||
|
||||
def _update_file_if_needed(self):
|
||||
"""Update the log file if the date has changed"""
|
||||
today = datetime.now().strftime("%Y%m%d")
|
||||
|
||||
if self.current_date != today:
|
||||
self.current_date = today
|
||||
new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log")
|
||||
|
||||
if self.current_file != new_file:
|
||||
self.current_file = new_file
|
||||
return True
|
||||
return False
|
||||
|
||||
def emit(self, record):
|
||||
"""Emit a log record, switching files if date has changed"""
|
||||
# Check if we need to switch to a new file
|
||||
if self._update_file_if_needed():
|
||||
# Close current file and open new one
|
||||
if self.stream:
|
||||
self.stream.close()
|
||||
self.stream = None
|
||||
|
||||
# Update the baseFilename for the parent class
|
||||
self.baseFilename = self.current_file
|
||||
# Reopen the stream
|
||||
if not self.delay:
|
||||
self.stream = self._open()
|
||||
|
||||
# Call parent emit method
|
||||
super().emit(record)
|
||||
|
||||
def initLogging():
|
||||
"""Initialize logging with configuration from APP_CONFIG"""
|
||||
# Get log level from config (default to INFO if not found)
|
||||
logLevelName = APP_CONFIG.get("APP_LOGGING_LOG_LEVEL", "WARNING")
|
||||
logLevel = getattr(logging, logLevelName)
|
||||
|
||||
# Get log directory from config
|
||||
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
|
||||
if not os.path.isabs(logDir):
|
||||
# If relative path, make it relative to the gateway directory
|
||||
gatewayDir = os.path.dirname(os.path.abspath(__file__))
|
||||
logDir = os.path.join(gatewayDir, logDir)
|
||||
|
||||
# Ensure log directory exists
|
||||
os.makedirs(logDir, exist_ok=True)
|
||||
|
||||
# Create formatters - using single line format
|
||||
consoleFormatter = logging.Formatter(
|
||||
fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
||||
|
|
@ -89,25 +149,15 @@ def initLogging():
|
|||
|
||||
# Add file handler if enabled
|
||||
if APP_CONFIG.get("APP_LOGGING_FILE_ENABLED", True):
|
||||
# Get log file path and ensure it's absolute
|
||||
logFile = APP_CONFIG.get("APP_LOGGING_LOG_FILE", "app.log")
|
||||
if not os.path.isabs(logFile):
|
||||
# If relative path, make it relative to the gateway directory
|
||||
gatewayDir = os.path.dirname(os.path.abspath(__file__))
|
||||
logFile = os.path.join(gatewayDir, logFile)
|
||||
|
||||
# Ensure log directory exists
|
||||
logDir = os.path.dirname(logFile)
|
||||
if logDir:
|
||||
os.makedirs(logDir, exist_ok=True)
|
||||
|
||||
# Create daily application log file with automatic date switching
|
||||
rotationSize = int(APP_CONFIG.get("APP_LOGGING_ROTATION_SIZE", 10485760)) # Default: 10MB
|
||||
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
|
||||
|
||||
fileHandler = RotatingFileHandler(
|
||||
logFile,
|
||||
maxBytes=rotationSize,
|
||||
backupCount=backupCount
|
||||
fileHandler = DailyRotatingFileHandler(
|
||||
log_dir=logDir,
|
||||
filename_prefix="log_app",
|
||||
max_bytes=rotationSize,
|
||||
backup_count=backupCount
|
||||
)
|
||||
fileHandler.setFormatter(fileFormatter)
|
||||
fileHandler.addFilter(ChromeDevToolsFilter())
|
||||
|
|
@ -133,7 +183,15 @@ def initLogging():
|
|||
# Log the current logging configuration
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info(f"Logging initialized with level {logLevelName}")
|
||||
logger.info(f"Log file: {logFile if APP_CONFIG.get('APP_LOGGING_FILE_ENABLED', True) else 'disabled'}")
|
||||
logger.info(f"Log directory: {logDir}")
|
||||
|
||||
if APP_CONFIG.get('APP_LOGGING_FILE_ENABLED', True):
|
||||
today = datetime.now().strftime("%Y%m%d")
|
||||
appLogFile = os.path.join(logDir, f"log_app_{today}.log")
|
||||
logger.info(f"Application log file: {appLogFile} (auto-switches daily)")
|
||||
else:
|
||||
logger.info("Application log file: disabled")
|
||||
|
||||
logger.info(f"Console logging: {'enabled' if APP_CONFIG.get('APP_LOGGING_CONSOLE_ENABLED', True) else 'disabled'}")
|
||||
|
||||
# Initialize logging
|
||||
|
|
@ -154,7 +212,7 @@ async def lifespan(app: FastAPI):
|
|||
# Setup APScheduler for JIRA sync
|
||||
scheduler = AsyncIOScheduler(timezone=ZoneInfo("Europe/Zurich"))
|
||||
try:
|
||||
from modules.services.serviceDeltaSync import perform_sync_jira_delta_group
|
||||
from modules.features.featureSyncDelta import perform_sync_jira_delta_group
|
||||
# Schedule sync every 20 minutes (at minutes 00, 20, 40)
|
||||
scheduler.add_job(
|
||||
perform_sync_jira_delta_group,
|
||||
|
|
|
|||
42
config.ini
42
config.ini
|
|
@ -5,21 +5,6 @@
|
|||
Auth_ALGORITHM = HS256
|
||||
Auth_TOKEN_TYPE = bearer
|
||||
|
||||
# OpenAI configuration
|
||||
Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions
|
||||
Connector_AiOpenai_API_SECRET = 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
|
||||
Connector_AiOpenai_MODEL_NAME = gpt-4o
|
||||
Connector_AiOpenai_TEMPERATURE = 0.2
|
||||
Connector_AiOpenai_MAX_TOKENS = 2000
|
||||
|
||||
# Anthropic configuration
|
||||
Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages
|
||||
Connector_AiAnthropic_API_SECRET_OLD = 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
|
||||
Connector_AiAnthropic_API_SECRET = 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
|
||||
Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
|
||||
Connector_AiAnthropic_TEMPERATURE = 0.2
|
||||
Connector_AiAnthropic_MAX_TOKENS = 2000
|
||||
|
||||
# File management configuration
|
||||
File_Management_MAX_UPLOAD_SIZE_MB = 50
|
||||
File_Management_CLEANUP_INTERVAL = 240
|
||||
|
|
@ -36,33 +21,6 @@ Security_LOCK_DURATION_MINUTES = 30
|
|||
# Content Neutralization configuration
|
||||
Content_Neutralization_ENABLED = False
|
||||
|
||||
# Agent Mail configuration
|
||||
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
Service_MSFT_CLIENT_SECRET = Kxf8Q~2lJIteZ~JaI32kMf1lfaWKATqxXiNiFbzV
|
||||
Service_MSFT_TENANT_ID = common
|
||||
|
||||
# Google Service configuration
|
||||
Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||
Service_GOOGLE_CLIENT_SECRET = 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
|
||||
|
||||
# Tavily Web Search configuration
|
||||
Connector_WebTavily_API_KEY = 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
|
||||
|
||||
# Google Cloud Speech Services configuration
|
||||
Connector_GoogleSpeech_API_KEY = {
|
||||
"type": "service_account",
|
||||
"project_id": "poweronid",
|
||||
"private_key_id": "88db66e4248326e9baeac4231bc196fd46a9a441",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDTnJuxA+xBL3LA\nPgFILYCsGuppkkdO6d153Q36f2jTj6zpH3OhKMVsaaTBknG2o2+D0Whlk6Yh5rOw\nkWzpMC3y81leRLm5kucERMkBUgd2GL4v16k6m+QGuC3BFlt/XeyuckJNW0V6v/Dy\n3+bSYM7/5o1ftPNWJeAIEWoE/V4wKCYde8RE4Vp1LO5YwhgcM4rRuPmF2OhekpA+\npteYwkY/8/gTTRpZIc8OTsBYRbaMwsjoDj5riuL3boVtkwZwKRb+ZLvupXeU7Ds7\n1305odTcZUwnImHiHfuq83ZJViQiLRNhUAFnQIXPrYLwEpCmzRBGzYHaRlb69ga/\nzqUbKnclAgMBAAECggEAH6W9qHehubioPMAJM7Y6bC2KU/JLNS4csBZd+idb52gG\nwBwIEFjR+H4ZjymhAA4+pe7c4h7MKyh0RI/l7eoFX98Cb+rEq/r1udm1BhGH3s2h\n2UiI8qRQh1YRjF2/nrN5VjhDBOFa6W9opaopZy/l8AzsT8f21zIgPen8z8o6GpFg\n64fJFcbqCGk2ykN2+x2pIOT04tmCszrfbXZP8LEs4xrUB/XwlHL1vT/M3EWIKbnj\njDaIMjw7q/KRgNUvmKS6SU9b3fnOLcQCz9f5cKdiWACKIU/UvuiWhWJ9ou6BWLWU\nva1A6Fi4XJjhW7s3po58/ioQfl0A9p/L92lGg4ST8QKBgQDx8LIM1g0dh9Ql6LmH\nBUGCOewNNXTs+y3ZznUfvVMoyyZK5w/pzeUvkmOwzbRGnZJ9WyCghq8aezyEpo2D\nPL7Odf988IeHmvhyZIM4PLJYgDvSwGXyf/gh6gJkf/4wpx+tx/yQYNBm3Rht7sA0\npSaLehK0E0kW1uyBzHGKgyQOhwKBgQDf6LiZ7hSQqh54vIU1XMDRth0UOo/s/HGi\nDoij29KjmHjLkm8vOlCo83e79X0WhcnyB5kM7nWFegwcM1PJ0Dl8gidUuTlOVDtM\n5u2AaxDoyXAUL457U5dGFAIW+R653ZDkzMfCglacP8HixXEyIpL1cTLqiCAgzszS\nLcSWwoAr8wKBgQC4CGm3X97sFpTmHSd6sCHLaDnJNl9xoAKZifUHpqCqCBVhpm8x\nXp+11vmj1GULzfJPDlE8Khbp4tH+6R39tOhC7fjgVaoSGWxgv1odHfZfYXOf9R/X\nHUZmrbUSM1XsNkPfkZ7pR+teQ1HA1Xo40WMHd1zgw0a2a9fNR/EZ9nUn4wKBgGaK\nUEgGNRrPHadTRnnaoV8o1IZYD2OLdIqvtzm7SOqsv90SkaKCRUAqR5InaYKwAHy7\nqAa5Cc73xqX/h4arujff7x0ouiq5/nJIa0ndPmAtKAvGf6zQ6j0ompBkxAKAioON\nmInmYL2roSI2I5G/LagDkDrB3lzH+Brk5NvZ9RKrAoGAGox462GGGb/NbGdDkahN\ndifzYYvq4FPiWFFo0ynKAulxCBWLXO/N45XNuAyen433d8eREcAYz1Dzax44+MdQ\nHo9dU7YcZvFyt6iZsYeQF8dluHui3vzMpUe0KbqpZC5KMOSw53ZdNIwzo8NTAK59\n+uv3dHGj7sS8fhDo3yCifzc=\n-----END PRIVATE KEY-----\n",
|
||||
"client_email": "poweron-voice-services@poweronid.iam.gserviceaccount.com",
|
||||
"client_id": "116641749406798186404",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/poweron-voice-services%40poweronid.iam.gserviceaccount.com",
|
||||
"universe_domain": "googleapis.com"
|
||||
}
|
||||
|
||||
# Web Search configuration
|
||||
Web_Search_MAX_QUERY_LENGTH = 400
|
||||
Web_Search_MAX_RESULTS = 20
|
||||
|
|
|
|||
Binary file not shown.
67
env_dev.env
67
env_dev.env
|
|
@ -4,51 +4,31 @@
|
|||
APP_ENV_TYPE = dev
|
||||
APP_ENV_LABEL = Development Instance Patrick
|
||||
APP_API_URL = http://localhost:8000
|
||||
|
||||
# Database Configuration for Application
|
||||
# JSON File Storage (current)
|
||||
# DB_APP_HOST=D:/Temp/_powerondb
|
||||
# DB_APP_DATABASE=app
|
||||
# DB_APP_USER=dev_user
|
||||
# DB_APP_PASSWORD_SECRET=dev_password
|
||||
APP_KEY_SYSVAR = D:/Athi/Local/Web/poweron/local/key.txt
|
||||
|
||||
# PostgreSQL Storage (new)
|
||||
DB_APP_HOST=localhost
|
||||
DB_APP_DATABASE=poweron_app_dev
|
||||
DB_APP_DATABASE=poweron_app
|
||||
DB_APP_USER=poweron_dev
|
||||
DB_APP_PASSWORD_SECRET=dev_password
|
||||
DB_APP_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNU2ZKVk41bU5HbmJOREJ6ZmZ1cTcwZ3ZXQlcxY0dTcjVTUEgxemlRVmtUYWlmWXdicW1JcDFUQkRHamFZVUJSUlg4ZTlHaWZIUGhzVUUtTEFiYkxZeXN6NEtrSjZubjFzN0g2OG5SdjdnQm89
|
||||
DB_APP_PORT=5432
|
||||
|
||||
# Database Configuration Chat
|
||||
# JSON File Storage (current)
|
||||
# DB_CHAT_HOST=D:/Temp/_powerondb
|
||||
# DB_CHAT_DATABASE=chat
|
||||
# DB_CHAT_USER=dev_user
|
||||
# DB_CHAT_PASSWORD_SECRET=dev_password
|
||||
|
||||
# PostgreSQL Storage (new)
|
||||
DB_CHAT_HOST=localhost
|
||||
DB_CHAT_DATABASE=poweron_chat_dev
|
||||
DB_CHAT_DATABASE=poweron_chat
|
||||
DB_CHAT_USER=poweron_dev
|
||||
DB_CHAT_PASSWORD_SECRET=dev_password
|
||||
DB_CHAT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNZk1fUE1Pa01QM1c0WDdaTnQ0ZWRhTExmZm5iR2R0SEZlMDI2VmJvQ2Nrc0RDY1Z3NG9CSVJucUxkX1B4Qk45bkxvN05XYmZXY1NGa2gtWWxuaFg5bmFnR3d0ZmdYS1A5V2xSeFFYTm5ialE9
|
||||
DB_CHAT_PORT=5432
|
||||
|
||||
# Database Configuration Management
|
||||
# JSON File Storage (current)
|
||||
# DB_MANAGEMENT_HOST=D:/Temp/_powerondb
|
||||
# DB_MANAGEMENT_DATABASE=management
|
||||
# DB_MANAGEMENT_USER=dev_user
|
||||
# DB_MANAGEMENT_PASSWORD_SECRET=dev_password
|
||||
|
||||
# PostgreSQL Storage (new)
|
||||
DB_MANAGEMENT_HOST=localhost
|
||||
DB_MANAGEMENT_DATABASE=poweron_management_dev
|
||||
DB_MANAGEMENT_DATABASE=poweron_management
|
||||
DB_MANAGEMENT_USER=poweron_dev
|
||||
DB_MANAGEMENT_PASSWORD_SECRET=dev_password
|
||||
DB_MANAGEMENT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNVFd0WkdsZGlLYjcxOUpaM2szUGZyWkZseHBCM1JaYm5fMnJNQ1hVLUIwVVlMaTAtZlBkZ0hsTVM5eXVjZkoxamdmWU00dUU5TEs5Zzlhd0RXYVJGR2twV2hLbjFoN2RsUkVjSGd5NExqV1U9
|
||||
DB_MANAGEMENT_PORT=5432
|
||||
|
||||
# Security Configuration
|
||||
APP_JWT_SECRET_SECRET=rotated_jwt_secret_2025_09_17_f8a3b6c2-7d4e-45b6-9a1f-3c0b9a1d2e7f
|
||||
APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNMnRzSGtvR1Uyd1RmVm01MTJUTkFlQVRYVHJNVmVhSEpaY2k4YTdIUUtvalhLXzJaeDJVQkhlRHZ2MnExR2k4b09ScnF5U2xubnZtWmRUNmx1b2c4bmItbmdMWmc2eVU2X1pFVmE0UzR0d0xzOG52SkVlSi1uZGZoYVdqMGN3Y0tIVUR1bGtyLW9hNEdRemwtSlJJc1RGbWxJdlpxdHhtMldJTjRDWTE4MFhjPQ==
|
||||
APP_TOKEN_EXPIRY=300
|
||||
|
||||
# CORS Configuration
|
||||
|
|
@ -56,7 +36,7 @@ APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net
|
|||
|
||||
# Logging configuration
|
||||
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||
APP_LOGGING_LOG_FILE = poweron.log
|
||||
APP_LOGGING_LOG_DIR = D:/Athi/Local/Web/poweron/local/logs
|
||||
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
|
||||
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
|
||||
APP_LOGGING_CONSOLE_ENABLED = True
|
||||
|
|
@ -67,3 +47,32 @@ APP_LOGGING_BACKUP_COUNT = 5
|
|||
# Service Redirects
|
||||
Service_MSFT_REDIRECT_URI = http://localhost:8000/api/msft/auth/callback
|
||||
Service_GOOGLE_REDIRECT_URI = http://localhost:8000/api/google/auth/callback
|
||||
|
||||
# OpenAI configuration
|
||||
Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions
|
||||
Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNdndUSzRSc0l6UGdRYzNkVlJWZjF1ZG1Id3RwTFhPRnBkX3BhN0NlMHk0a2NkQmk2bmhnemNwY1FtanFEemZUd21zcVFYUTRGWUhpeTlOSEgyUWdZVVBneTYxT2RZQTEyZk1XQ3Y5MDhDd3JnMXRwbVVfaVpDOWF2TDU3Mjl2YURvR0daLW92dDdmUktkQ2VOei0tdHdBPT0=
|
||||
Connector_AiOpenai_MODEL_NAME = gpt-4o
|
||||
Connector_AiOpenai_TEMPERATURE = 0.2
|
||||
Connector_AiOpenai_MAX_TOKENS = 2000
|
||||
|
||||
# Anthropic configuration
|
||||
Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages
|
||||
Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNRW9tR094QlB6bU9na1lGc0RIYzZOX3g5ZGh4dC1NaXZnUExFWDhnWURQdmNRTi1vc2F6RExGZTFZRU5BUjVjV1NTb3hURS1UY1NYdVhBUVRPemptZXZIclRhOG8wLVkxTGc4R01RTG95THFET2ZJRGlSeWMzcVdwejdVcjIyR0VoUzRaVUsyLVVsQ0sxckxoc2MwWmFfSjBvOTNMaGtCajFpRGpqYm5Sc0Zud08xb2dWdXhOYzQ3ZXZySUNrRVZmYnpyQ0tQdjNjbVExelA2UXNzOENzQT09
|
||||
Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
|
||||
Connector_AiAnthropic_TEMPERATURE = 0.2
|
||||
Connector_AiAnthropic_MAX_TOKENS = 2000
|
||||
|
||||
# Agent Mail configuration
|
||||
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
Service_MSFT_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNay04TEUzbmRHR29jNWp0Nm1MZEtjUkNKRVVTU1p0QUNHMC1vWHpFcTR5eHNDMDBYbnVzRWpEdWVQeE1FRkJDMGlWRWNXZHZfc3M5aG1UdmRYd1J0cElWZGY0aVZ1OWNUMndZTWNXNm9fQ0hCemNwMWdUQW9ya0owOEVUMG1kLUk=
|
||||
Service_MSFT_TENANT_ID = common
|
||||
|
||||
# Google Service configuration
|
||||
Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||
Service_GOOGLE_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNOVNWNjV1SXllM1ZnWVRmQTdXZmY1YnBjXzl6Q2lKR3R6SzA4SHFvWU96QWVyWG4wc2tLaGQ2SkVOM0tNMUpXaHNNTjEyOWRGeWVtSjdycHBOSjFlRU5XWVFKV0o5Z2l3THU5SHJLaHJXZC1ST1FGdVhwdXBaMFFmQ0lzUmplQmo=
|
||||
|
||||
# Tavily Web Search configuration
|
||||
Connector_WebTavily_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNSEJ2YmVieFRaWk5yR1kwVzJ2ajlvTVVZN3dzV2pBT25nTmRsa0NXZEM2eHhqRXhBZ19VMTlFWkQ4ZzlnTUY1M0h0SUpWenZLR3JtZDBVOXZuT1JFV3UxMkJCdjZ2YjB1cE1jYlBOVzZsSHVXa19kcTNiVzZIRUZFdVZCeXJ5YUQ=
|
||||
|
||||
# Google Cloud Speech Services configuration
|
||||
Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNSFFITGlUVzF3NE5Ldk10d3o5MS10Q2o4aEJGM250WF9CeWxFQVNaNHBhMk1hS3E5YXRrakh5dmx0VDJuZ3BsWGVMTC0tbU9wWFRWZWM1N25ibWpkeF84enJ1Y2ViMVd1V0plUWdxN3VId1VRUzBhN3MzLVBkSXEwM1BHT2Z2c3JBalh6eXVKMUNFX2pfbGdGYUg1ZUFfcXhSRnJyT0tzdWVVdG1HSHBZOUgwLUVPMVQ4YkZUc3dMcFlLWjRxQUM1X05OWm5ndmJGcjFETV9UM1FoLWt2RVVEem92UGhvZlRFXzNxOVRzQkhyV0hqeTRWQXdMdDVDbEMwOWFkTnV3UXpsYWZwRENaRzd4QjlwTjJUWHhHLVZPTzd1eXNhSWh5ajNwelgxSDRlNUx0N05yTlI1N1RjSzdIZGhFLXBOMjEwMkxsT0daSVhiWVpQZUtfNVdwdGVrazVMM2NkUGZPOHBuNjM3YXdFcGFPdlVtY01ReGhsVENwNnRvNGhJejNHd3hFOFA0bWgxalFFNDVoQ2xYTG5VN1dDZGhndEdWRlFjYzBRMUgwbzRfS2N3VVgyaXJpYmJfZzNadmx5cTFxS2Vja1I5Qm1UT0hDM1FuNk5JRmYtT2p3RWp2SWxTWGZuU1psOUN4NEJTOHkweWIzY2NjbTJRZG5oRjVxNGh4LTUwZE1zZi1zLU43Ulk4UGtmR0N6dU5RcVVvRF9DQlE5Sk1FR1YtOE84WnVuTDlOUHhQR1JLT2g0VkNIT2ctWTBuMXIwNHhSSjcxNnNWRFhQc18zSm1UR1M0Mm54TGxsRG5uX2tDSWhBNDRGaHFObkhuVmtnVVlQU1FhVWhTdnpGUDRfcDQ1OWpERklHMmN5Y0RVWC1JYlItTUozaWY1dmxZUW12NXAtUEtsQWpqUFk4NzFwWVNfSUNqeDNkc25wMnJHN3c5NTB1dmxmUFZfU0NWS1hQMTc1NmdOTmEyREZRVXB0cmlyaldkT3B0Q3FQMFdpdWQ3WU1RZDZKYlFneDdnQ2NWWHFHSXl1c2xRN21LbDdyUGFUcWFxeVVTOWoxSkVJaFZiUHI2VFBHWEdvM2Q1cXdIVGYyc3Y2cVdRd00ydHdrME8tcDVqSmNLV193R291VElTNWFNa2pMQi1zX21VdnZ1R0tTbEJndndvbWRrVE52eW1aTFFzRURtdGItc3FJeXJDenVTWTlIZ0E1eG1yX2N1SHJSUWIxdm8wakdzaDIyaDQ0cE9UdDlhclp2MzVVamQ2em0zbmdLUzBJa1ZaRFpQaTBnZGpTWnRhRGZxUVNZWDg5VDFndWFmZlZnVG5SUEhlWkpfQnREWS0xbEZfNXd5OUpEUkZHa1NZNWtPbnBadFFialgzazlyM0dTb3ctR2x5LUozT3VDc3F1Tk5TbGN2MnRRS1hTb1gzWUNVSlJuUl85azhxaGxCMzVNQUQzVGg1cDZHalRaOUFrM1JPSGJKaGlKRTAwbnV4TmxIZnhkMF9FODVKUk1GZGlWZk1ScnhmQnJXWmRxMTk3SWhIdnBjSVJJOElkalRUWXFRTFNvQXZpdFpFOUdDWkhHOTRLVmN2cEh0X2JpYjNvRjhvUHFVQVNQdXY4OWxQSWNvcUNfZW5HYy10dEFicldhRHZLS1ktY2RGczQta2lGWXkxb2RhNUZMNExabWx0dXdhR3BSWGpSYVUxRXJZVTNBYmdNVFd5NW1vY2s0T0RlV3hqZjNSMHhJakY1TDBackV5bmM2V1o2SEJlT3RSbnpPR0VXbmhQTUtPMzYyU1RjbFRmQUlWTUZjVGRheXBuekZJN3NNZVFFZ3JHenNnOFdQVWxsbFBoYTVvQUd1NGx2SDdYcGhrdUpSWlRIRWVVUkpxdjJSZV9zb0J3N3o4QnRpYXpTRHdkZ1pqSWswSjdJMjVEZDZUNzZuWDVXWkNxUDRtQ1p1dnk2ZEx0S0NKT2ZUc3B5eEdRdEpnTlZQMkt5OHFjQ3FfcHpzUFZEY3Z5WDdEQkt4cEN2MFg2eXF4bDZFeHZFWk5tMFpUR0xDZi1JVjN4eUtRaXlNXzBJUFV2N19MVTRhMWtxWnd6d0Y2bVNFQUJSdEU5Z01FTjEtZDJmWkpEYUlsTVJnTEJYdU1iVFoySEttd3libURrSUNJelVic2Mzb0t5ZzNDX0hjZUtfOFQ1QkxRWmx2dmhnbDhNZllla1dNa0Y5akVpNDRKdHRSUU9fTE9sYVUzdzZtTkJEYTBWdkxkRURSa01TOGxWcVZkUmxkWTA1QjJjS1pOUjJEQTZxeDdSVXhNWldXbnE1V1J2STVCNkt2VHRuNEdtaHUweWdEbUZyMlhWd09FWWI0UUFyQVpUeDE3QXdfQkMtcjdpUU5GUTQzUEczNWg1Wm5rVEgwRW11RFowVnFxYnpGNUYwYks1Y3JPbTdUc2ZXS1ZfYzdhcno3U1ZXZUVkblRoOVl5XzZpTUgwRXFZeFd6NXdqTGlvNm1QeXgxS2ZFTVJSV1JVejliWFBVRGU1MWVudEZzRDFwSW94YlU1Y3JmallsVldXcHdvTmFQdnU5UE0tNHNHMXhPWE1JQUxCNC1WVVRJNmNJcTM3a1dUWWwzSVptTFg3OXlWLWxITkdiR0MyTmRzRWFOeHBMZEVzbms3RC1MTFo1TVhKeURhUW9peHk1bHhJbHphVzR4RmxiUkJwcmkzcWZ3S3dWV0Jkb2VaZ3pMTXdUNUJmZjZfVEVXeDFNMnBvemM0TUJNeUQ2SE1aeWczc0V6M0NUMHFGdURMbTRka3AzZ1d1TUh2V1c5RzBKQVVlTEstWEthOTdaWUZHTlRHaVNmbEFJRFU3M0l2TWlBNF9kaFpJUXlxMHJYa2lxOGFRbDNqMTA1RDFFclFTcGxmb0g2WVI3Z0NrLWN4cUNzNWVuR2VMaE41dWRqMnR5eWNuM0gwUmIwcTFEQ09qbmJCUFIwbjM4MGF6TlhxQWpKOFZXWGNKdnl2Wi1zU1BsZU5NYWpsbzVKMGxTLUJKckd6enJnZWhXemstenN3NGNqUk9HeGlGaFNhSl83TlUzLTVZWW9zYVZZTTZzSjNfd3JkVDNaZVp4dk1GQVMxblJBRW1BWUZLU1VKUFkyQ1dPbndUNjYwdll2U0JxN1FQNk5OaGVYR3U5TXdGNGFVZGVXcS1tS2dwbVc1V3hEeXhVNkJ2cjdGX2FpY1NvOTJhcWFyOUVGOFpOdmd0R29Rb2RIaU01R05LeWRxUE00WlhOQVlMbkZxZDNyUFRXdUFGZ0lOUmp2RzIyaDlzMGxNQk40VzFzYjAwMEhjRVlrNWJ5cFhpVWYxQkxYQ25rUDJ3RTY1VlVFLThiNG1nY1hkdnZTMGoyVlN6dkJleFhndDNCODhlOVl1ZHBkci1hd3l0NGNXeWZ6aUp4S3pHS1c4aDM3WElBTjBwYlNSbmJoMk5SNF81VVNqd0dXY1JUejVsZnpGS1Z5dHFPNUVVM1I5eGhjblZjMV9idFJkc3NZaUdHRlIzQWJQdHhzT01qVW8xUUwxNHZmY3Q1aHBnNHhXTGRjb1BmTmM2X0NmdkpxNS1JMHNQNVg1N0xsd0pmdE8wNktkUGpuX0F3LURyaGhyajg3eWNDdkozUFZIYmpJTTZ3WWVCVFZUd1AtRklFUUxTNXkzalpfdlc4VE1tOHU1Q0MtUWdLbEdYRzdVU1RkM3gyeEY3eXBWLUhXVVo4VkZoUHVkakJPNk0tNTJKTU1JZjVISlR3SmJBQkVhRW51UHg3UjBOMVRPRnF2dzIwRkgxczBBUWZpemFFMzFTeDJfWHZhSkhsTzBhcFIzVmZRODEzRUl1b1ZDUGFqYUxjN2JsbkhYdHVPT00yYlUwbmpVbkU0RkJXbWx5UVFJdHNvNUdxQzMyQnQycDJpMjlnd2xwb3huRUJiZUg5dkhaMjhMV2R5T0NsU0N4WjdBX2ZfODhOdTZOZ0x6WlRIUGI3MzR1ZkJicHN6NzUzRzlsUmVkNlR6MjZjTTA3c290Qzh4ejRiWERHbmFtV1BQV2ZKb2pGU0F1OGsySG9hNHdtSkkxTWpwV2gyaVpWcFpsRWs5a0hSY3UzMk4wQ0dkZWtMbG4xOFZ6TXdEOXBob3I0NjNkT28tZk5IcW5FUkg4YnBtUVFLY1Q5M1lzYzhrRGZOaDF6SnpnejRuM1Y3SW1xMUJmLXpJdEM0UjNHU0t5OEhoamxxLXRmWmtyOS1ud09XeGFzc3VFXzNPWWNGcXFwdHN2cVFEZ0dWdUNKbF9Lc3d6dVhPb3NLMlNEaW1xd3JPLUViYV9GTnNRPT0=
|
||||
|
|
|
|||
47
env_int.env
47
env_int.env
|
|
@ -4,6 +4,7 @@
|
|||
APP_ENV_TYPE = int
|
||||
APP_ENV_LABEL = Integration Instance
|
||||
APP_API_URL = https://gateway-int.poweron-center.net
|
||||
APP_KEY_SYSVAR = CONFIG_KEY
|
||||
|
||||
# PostgreSQL Storage (new)
|
||||
DB_APP_HOST=gateway-int-server.postgres.database.azure.com
|
||||
|
|
@ -27,7 +28,7 @@ DB_MANAGEMENT_PASSWORD_SECRET=VkAjgECESbEVQ$Tu
|
|||
DB_MANAGEMENT_PORT=5432
|
||||
|
||||
# Security Configuration
|
||||
APP_JWT_SECRET_SECRET=rotated_jwt_secret_2025_09_17_2c5f8e7a-1b3d-49c7-ae5d-9f0a2c3d4b5e
|
||||
APP_JWT_KEY_SECRET=rotated_jwt_secret_2025_09_17_2c5f8e7a-1b3d-49c7-ae5d-9f0a2c3d4b5e
|
||||
APP_TOKEN_EXPIRY=300
|
||||
|
||||
# CORS Configuration
|
||||
|
|
@ -35,7 +36,7 @@ APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,
|
|||
|
||||
# Logging configuration
|
||||
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||
APP_LOGGING_LOG_FILE = /home/site/wwwroot/poweron.log
|
||||
APP_LOGGING_LOG_DIR = /home/site/wwwroot/
|
||||
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
|
||||
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
|
||||
APP_LOGGING_CONSOLE_ENABLED = True
|
||||
|
|
@ -46,3 +47,45 @@ APP_LOGGING_BACKUP_COUNT = 5
|
|||
# Service Redirects
|
||||
Service_MSFT_REDIRECT_URI = https://gateway-int.poweron-center.net/api/msft/auth/callback
|
||||
Service_GOOGLE_REDIRECT_URI = https://gateway-int.poweron-center.net/api/google/auth/callback
|
||||
|
||||
|
||||
# OpenAI configuration
|
||||
Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions
|
||||
Connector_AiOpenai_API_SECRET = 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
|
||||
Connector_AiOpenai_MODEL_NAME = gpt-4o
|
||||
Connector_AiOpenai_TEMPERATURE = 0.2
|
||||
Connector_AiOpenai_MAX_TOKENS = 2000
|
||||
|
||||
# Anthropic configuration
|
||||
Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages
|
||||
Connector_AiAnthropic_API_SECRET = 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
|
||||
Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
|
||||
Connector_AiAnthropic_TEMPERATURE = 0.2
|
||||
Connector_AiAnthropic_MAX_TOKENS = 2000
|
||||
|
||||
# Agent Mail configuration
|
||||
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
Service_MSFT_CLIENT_SECRET = Kxf8Q~2lJIteZ~JaI32kMf1lfaWKATqxXiNiFbzV
|
||||
Service_MSFT_TENANT_ID = common
|
||||
|
||||
# Google Service configuration
|
||||
Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||
Service_GOOGLE_CLIENT_SECRET = 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
|
||||
|
||||
# Tavily Web Search configuration
|
||||
Connector_WebTavily_API_KEY_SECRET = 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
|
||||
|
||||
# Google Cloud Speech Services configuration
|
||||
Connector_GoogleSpeech_API_KEY_SECRET = {
|
||||
"type": "service_account",
|
||||
"project_id": "poweronid",
|
||||
"private_key_id": "88db66e4248326e9baeac4231bc196fd46a9a441",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDTnJuxA+xBL3LA\nPgFILYCsGuppkkdO6d153Q36f2jTj6zpH3OhKMVsaaTBknG2o2+D0Whlk6Yh5rOw\nkWzpMC3y81leRLm5kucERMkBUgd2GL4v16k6m+QGuC3BFlt/XeyuckJNW0V6v/Dy\n3+bSYM7/5o1ftPNWJeAIEWoE/V4wKCYde8RE4Vp1LO5YwhgcM4rRuPmF2OhekpA+\npteYwkY/8/gTTRpZIc8OTsBYRbaMwsjoDj5riuL3boVtkwZwKRb+ZLvupXeU7Ds7\n1305odTcZUwnImHiHfuq83ZJViQiLRNhUAFnQIXPrYLwEpCmzRBGzYHaRlb69ga/\nzqUbKnclAgMBAAECggEAH6W9qHehubioPMAJM7Y6bC2KU/JLNS4csBZd+idb52gG\nwBwIEFjR+H4ZjymhAA4+pe7c4h7MKyh0RI/l7eoFX98Cb+rEq/r1udm1BhGH3s2h\n2UiI8qRQh1YRjF2/nrN5VjhDBOFa6W9opaopZy/l8AzsT8f21zIgPen8z8o6GpFg\n64fJFcbqCGk2ykN2+x2pIOT04tmCszrfbXZP8LEs4xrUB/XwlHL1vT/M3EWIKbnj\njDaIMjw7q/KRgNUvmKS6SU9b3fnOLcQCz9f5cKdiWACKIU/UvuiWhWJ9ou6BWLWU\nva1A6Fi4XJjhW7s3po58/ioQfl0A9p/L92lGg4ST8QKBgQDx8LIM1g0dh9Ql6LmH\nBUGCOewNNXTs+y3ZznUfvVMoyyZK5w/pzeUvkmOwzbRGnZJ9WyCghq8aezyEpo2D\nPL7Odf988IeHmvhyZIM4PLJYgDvSwGXyf/gh6gJkf/4wpx+tx/yQYNBm3Rht7sA0\npSaLehK0E0kW1uyBzHGKgyQOhwKBgQDf6LiZ7hSQqh54vIU1XMDRth0UOo/s/HGi\nDoij29KjmHjLkm8vOlCo83e79X0WhcnyB5kM7nWFegwcM1PJ0Dl8gidUuTlOVDtM\n5u2AaxDoyXAUL457U5dGFAIW+R653ZDkzMfCglacP8HixXEyIpL1cTLqiCAgzszS\nLcSWwoAr8wKBgQC4CGm3X97sFpTmHSd6sCHLaDnJNl9xoAKZifUHpqCqCBVhpm8x\nXp+11vmj1GULzfJPDlE8Khbp4tH+6R39tOhC7fjgVaoSGWxgv1odHfZfYXOf9R/X\nHUZmrbUSM1XsNkPfkZ7pR+teQ1HA1Xo40WMHd1zgw0a2a9fNR/EZ9nUn4wKBgGaK\nUEgGNRrPHadTRnnaoV8o1IZYD2OLdIqvtzm7SOqsv90SkaKCRUAqR5InaYKwAHy7\nqAa5Cc73xqX/h4arujff7x0ouiq5/nJIa0ndPmAtKAvGf6zQ6j0ompBkxAKAioON\nmInmYL2roSI2I5G/LagDkDrB3lzH+Brk5NvZ9RKrAoGAGox462GGGb/NbGdDkahN\ndifzYYvq4FPiWFFo0ynKAulxCBWLXO/N45XNuAyen433d8eREcAYz1Dzax44+MdQ\nHo9dU7YcZvFyt6iZsYeQF8dluHui3vzMpUe0KbqpZC5KMOSw53ZdNIwzo8NTAK59\n+uv3dHGj7sS8fhDo3yCifzc=\n-----END PRIVATE KEY-----\n",
|
||||
"client_email": "poweron-voice-services@poweronid.iam.gserviceaccount.com",
|
||||
"client_id": "116641749406798186404",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/poweron-voice-services%40poweronid.iam.gserviceaccount.com",
|
||||
"universe_domain": "googleapis.com"
|
||||
}
|
||||
|
|
|
|||
46
env_prod.env
46
env_prod.env
|
|
@ -4,6 +4,7 @@
|
|||
APP_ENV_TYPE = prod
|
||||
APP_ENV_LABEL = Production Instance
|
||||
APP_API_URL = https://gateway.poweron-center.net
|
||||
APP_KEY_SYSVAR = CONFIG_KEY
|
||||
|
||||
# PostgreSQL Storage (new)
|
||||
DB_APP_HOST=gateway-prod-server.postgres.database.azure.com
|
||||
|
|
@ -27,7 +28,7 @@ DB_MANAGEMENT_PASSWORD_SECRET=prod_password_very_secure.2025
|
|||
DB_MANAGEMENT_PORT=5432
|
||||
|
||||
# Security Configuration
|
||||
APP_JWT_SECRET_SECRET=rotated_jwt_secret_2025_09_17_prod_e1a9c4d7-6b8f-4f2e-9c1a-7e3d2a1b9c5f
|
||||
APP_JWT_KEY_SECRET=rotated_jwt_secret_2025_09_17_prod_e1a9c4d7-6b8f-4f2e-9c1a-7e3d2a1b9c5f
|
||||
APP_TOKEN_EXPIRY=300
|
||||
|
||||
# CORS Configuration
|
||||
|
|
@ -35,7 +36,7 @@ APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,
|
|||
|
||||
# Logging configuration
|
||||
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||
APP_LOGGING_LOG_FILE = /home/site/wwwroot/poweron.log
|
||||
APP_LOGGING_LOG_DIR = /home/site/wwwroot/
|
||||
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
|
||||
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
|
||||
APP_LOGGING_CONSOLE_ENABLED = True
|
||||
|
|
@ -46,3 +47,44 @@ APP_LOGGING_BACKUP_COUNT = 5
|
|||
# Service Redirects
|
||||
Service_MSFT_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/msft/auth/callback
|
||||
Service_GOOGLE_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/google/auth/callback
|
||||
|
||||
# OpenAI configuration
|
||||
Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions
|
||||
Connector_AiOpenai_API_SECRET = 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
|
||||
Connector_AiOpenai_MODEL_NAME = gpt-4o
|
||||
Connector_AiOpenai_TEMPERATURE = 0.2
|
||||
Connector_AiOpenai_MAX_TOKENS = 2000
|
||||
|
||||
# Anthropic configuration
|
||||
Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages
|
||||
Connector_AiAnthropic_API_SECRET = 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
|
||||
Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
|
||||
Connector_AiAnthropic_TEMPERATURE = 0.2
|
||||
Connector_AiAnthropic_MAX_TOKENS = 2000
|
||||
|
||||
# Agent Mail configuration
|
||||
Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
|
||||
Service_MSFT_CLIENT_SECRET = Kxf8Q~2lJIteZ~JaI32kMf1lfaWKATqxXiNiFbzV
|
||||
Service_MSFT_TENANT_ID = common
|
||||
|
||||
# Google Service configuration
|
||||
Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||
Service_GOOGLE_CLIENT_SECRET = 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
|
||||
|
||||
# Tavily Web Search configuration
|
||||
Connector_WebTavily_API_KEY_SECRET = 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
|
||||
|
||||
# Google Cloud Speech Services configuration
|
||||
Connector_GoogleSpeech_API_KEY_SECRET = {
|
||||
"type": "service_account",
|
||||
"project_id": "poweronid",
|
||||
"private_key_id": "88db66e4248326e9baeac4231bc196fd46a9a441",
|
||||
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDTnJuxA+xBL3LA\nPgFILYCsGuppkkdO6d153Q36f2jTj6zpH3OhKMVsaaTBknG2o2+D0Whlk6Yh5rOw\nkWzpMC3y81leRLm5kucERMkBUgd2GL4v16k6m+QGuC3BFlt/XeyuckJNW0V6v/Dy\n3+bSYM7/5o1ftPNWJeAIEWoE/V4wKCYde8RE4Vp1LO5YwhgcM4rRuPmF2OhekpA+\npteYwkY/8/gTTRpZIc8OTsBYRbaMwsjoDj5riuL3boVtkwZwKRb+ZLvupXeU7Ds7\n1305odTcZUwnImHiHfuq83ZJViQiLRNhUAFnQIXPrYLwEpCmzRBGzYHaRlb69ga/\nzqUbKnclAgMBAAECggEAH6W9qHehubioPMAJM7Y6bC2KU/JLNS4csBZd+idb52gG\nwBwIEFjR+H4ZjymhAA4+pe7c4h7MKyh0RI/l7eoFX98Cb+rEq/r1udm1BhGH3s2h\n2UiI8qRQh1YRjF2/nrN5VjhDBOFa6W9opaopZy/l8AzsT8f21zIgPen8z8o6GpFg\n64fJFcbqCGk2ykN2+x2pIOT04tmCszrfbXZP8LEs4xrUB/XwlHL1vT/M3EWIKbnj\njDaIMjw7q/KRgNUvmKS6SU9b3fnOLcQCz9f5cKdiWACKIU/UvuiWhWJ9ou6BWLWU\nva1A6Fi4XJjhW7s3po58/ioQfl0A9p/L92lGg4ST8QKBgQDx8LIM1g0dh9Ql6LmH\nBUGCOewNNXTs+y3ZznUfvVMoyyZK5w/pzeUvkmOwzbRGnZJ9WyCghq8aezyEpo2D\nPL7Odf988IeHmvhyZIM4PLJYgDvSwGXyf/gh6gJkf/4wpx+tx/yQYNBm3Rht7sA0\npSaLehK0E0kW1uyBzHGKgyQOhwKBgQDf6LiZ7hSQqh54vIU1XMDRth0UOo/s/HGi\nDoij29KjmHjLkm8vOlCo83e79X0WhcnyB5kM7nWFegwcM1PJ0Dl8gidUuTlOVDtM\n5u2AaxDoyXAUL457U5dGFAIW+R653ZDkzMfCglacP8HixXEyIpL1cTLqiCAgzszS\nLcSWwoAr8wKBgQC4CGm3X97sFpTmHSd6sCHLaDnJNl9xoAKZifUHpqCqCBVhpm8x\nXp+11vmj1GULzfJPDlE8Khbp4tH+6R39tOhC7fjgVaoSGWxgv1odHfZfYXOf9R/X\nHUZmrbUSM1XsNkPfkZ7pR+teQ1HA1Xo40WMHd1zgw0a2a9fNR/EZ9nUn4wKBgGaK\nUEgGNRrPHadTRnnaoV8o1IZYD2OLdIqvtzm7SOqsv90SkaKCRUAqR5InaYKwAHy7\nqAa5Cc73xqX/h4arujff7x0ouiq5/nJIa0ndPmAtKAvGf6zQ6j0ompBkxAKAioON\nmInmYL2roSI2I5G/LagDkDrB3lzH+Brk5NvZ9RKrAoGAGox462GGGb/NbGdDkahN\ndifzYYvq4FPiWFFo0ynKAulxCBWLXO/N45XNuAyen433d8eREcAYz1Dzax44+MdQ\nHo9dU7YcZvFyt6iZsYeQF8dluHui3vzMpUe0KbqpZC5KMOSw53ZdNIwzo8NTAK59\n+uv3dHGj7sS8fhDo3yCifzc=\n-----END PRIVATE KEY-----\n",
|
||||
"client_email": "poweron-voice-services@poweronid.iam.gserviceaccount.com",
|
||||
"client_id": "116641749406798186404",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/poweron-voice-services%40poweronid.iam.gserviceaccount.com",
|
||||
"universe_domain": "googleapis.com"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from pathlib import Path
|
|||
import xml.etree.ElementTree as ET
|
||||
from bs4 import BeautifulSoup
|
||||
import uuid
|
||||
from .documentUtility import (
|
||||
from modules.chat.documents.documentUtility import (
|
||||
getFileExtension,
|
||||
getMimeTypeFromExtension,
|
||||
detectMimeTypeFromContent,
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from typing import Any, Dict, List, Optional
|
|||
from datetime import datetime, UTC
|
||||
import re
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from .documentUtility import (
|
||||
from modules.chat.documents.documentUtility import (
|
||||
getFileExtension,
|
||||
getMimeTypeFromExtension,
|
||||
detectMimeTypeFromContent,
|
||||
|
|
|
|||
|
|
@ -10,9 +10,10 @@ from datetime import datetime, UTC
|
|||
from modules.interfaces.interfaceChatModel import (
|
||||
TaskStatus, TaskStep, TaskContext, TaskAction, ReviewResult, TaskPlan, WorkflowResult, TaskResult, ReviewContext, ActionResult
|
||||
)
|
||||
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
from .executionState import TaskExecutionState
|
||||
from .promptFactory import (
|
||||
from modules.chat.handling.executionState import TaskExecutionState
|
||||
from modules.chat.handling.promptFactory import (
|
||||
createTaskPlanningPrompt,
|
||||
createActionDefinitionPrompt,
|
||||
createResultReviewPrompt
|
||||
|
|
@ -27,11 +28,13 @@ class WorkflowStoppedException(Exception):
|
|||
pass
|
||||
|
||||
class HandlingTasks:
|
||||
def __init__(self, chatInterface, service, workflow=None):
|
||||
def __init__(self, chatInterface, currentUser, workflow=None):
|
||||
self.chatInterface = chatInterface
|
||||
self.service = service
|
||||
self.currentUser = currentUser
|
||||
self.workflow = workflow
|
||||
self.documentGenerator = DocumentGenerator(service)
|
||||
from modules.chat.serviceCenter import ServiceCenter
|
||||
self.service = ServiceCenter(currentUser, workflow)
|
||||
self.documentGenerator = DocumentGenerator(self.service)
|
||||
|
||||
def _checkWorkflowStopped(self):
|
||||
"""
|
||||
|
|
@ -63,7 +66,6 @@ class HandlingTasks:
|
|||
logger.info(f"=== STARTING TASK PLAN GENERATION ===")
|
||||
logger.info(f"Workflow ID: {workflow.id}")
|
||||
logger.info(f"User Input: {userInput}")
|
||||
available_docs = self.service.getAvailableDocuments(workflow)
|
||||
|
||||
# Check workflow status before calling AI service
|
||||
self._checkWorkflowStopped()
|
||||
|
|
@ -83,8 +85,8 @@ class HandlingTasks:
|
|||
task_step=planning_task_step,
|
||||
workflow=workflow,
|
||||
workflow_id=workflow.id,
|
||||
available_documents=available_docs,
|
||||
available_connections=[],
|
||||
available_documents=None,
|
||||
available_connections=None,
|
||||
previous_results=[],
|
||||
previous_handover=None,
|
||||
improvements=[],
|
||||
|
|
@ -105,10 +107,10 @@ class HandlingTasks:
|
|||
# Generate the task planning prompt
|
||||
task_planning_prompt = createTaskPlanningPrompt(task_planning_context, self.service)
|
||||
|
||||
# Log the full task planning prompt being sent to AI for debugging
|
||||
# Log task planning prompt sent to AI
|
||||
logger.info("=== TASK PLANNING PROMPT SENT TO AI ===")
|
||||
logger.info(f"User Input: {userInput}")
|
||||
logger.info(f"Available Documents: {available_docs}")
|
||||
# Trace task planning prompt
|
||||
self.service.writeTraceLog("Task Plan Prompt", task_planning_prompt)
|
||||
|
||||
prompt = await self.service.callAiTextAdvanced(task_planning_prompt)
|
||||
|
||||
|
|
@ -116,12 +118,11 @@ class HandlingTasks:
|
|||
if not prompt:
|
||||
raise ValueError("AI service returned no response for task planning")
|
||||
|
||||
# Log the full AI response for task planning
|
||||
# Log task planning response received
|
||||
logger.info("=== TASK PLANNING AI RESPONSE RECEIVED ===")
|
||||
logger.info(f"Response length: {len(prompt) if prompt else 0}")
|
||||
logger.debug("=== FULL TASK PLANNING AI RESPONSE ===")
|
||||
logger.debug(prompt)
|
||||
logger.debug("=== END TASK PLANNING AI RESPONSE ===")
|
||||
# Trace task planning response
|
||||
self.service.writeTraceLog("Task Plan Response", prompt)
|
||||
|
||||
# Inline _parseTaskPlanResponse logic
|
||||
try:
|
||||
|
|
@ -297,27 +298,15 @@ class HandlingTasks:
|
|||
if enhanced_context and enhanced_context.retry_count > 0:
|
||||
logger.info("=== RETRY CONTEXT FOR ACTION GENERATION ===")
|
||||
logger.info(f"Retry Count: {enhanced_context.retry_count}")
|
||||
logger.info(f"Previous Improvements: {enhanced_context.improvements}")
|
||||
logger.info(f"Previous Review Result: {enhanced_context.previous_review_result}")
|
||||
logger.info(f"Failure Patterns: {enhanced_context.failure_patterns}")
|
||||
logger.info(f"Failed Actions: {enhanced_context.failed_actions}")
|
||||
logger.info(f"Successful Actions: {enhanced_context.successful_actions}")
|
||||
logger.debug(f"Previous Improvements: {enhanced_context.improvements}")
|
||||
logger.debug(f"Previous Review Result: {enhanced_context.previous_review_result}")
|
||||
logger.debug(f"Failure Patterns: {enhanced_context.failure_patterns}")
|
||||
logger.debug(f"Failed Actions: {enhanced_context.failed_actions}")
|
||||
logger.debug(f"Successful Actions: {enhanced_context.successful_actions}")
|
||||
logger.info("=== END RETRY CONTEXT ===")
|
||||
|
||||
available_docs = self.service.getAvailableDocuments(workflow)
|
||||
available_connections = self.service.getConnectionReferenceList()
|
||||
|
||||
# Log available resources for debugging
|
||||
logger.info("=== AVAILABLE RESOURCES FOR ACTION GENERATION ===")
|
||||
logger.info(f"Available Documents: {available_docs}")
|
||||
# Note: available_docs is now a string description, not a list
|
||||
logger.info(f"Available Connections: {len(available_connections) if available_connections else 0}")
|
||||
if available_connections:
|
||||
for i, conn in enumerate(available_connections[:5]): # Show first 5
|
||||
logger.info(f" Conn {i+1}: {conn}")
|
||||
if len(available_connections) > 5:
|
||||
logger.info(f" ... and {len(available_connections) - 5} more connections")
|
||||
logger.info("=== END AVAILABLE RESOURCES ===")
|
||||
# Log that we're starting action generation
|
||||
logger.info("=== STARTING ACTION GENERATION ===")
|
||||
|
||||
# Create proper context object for action definition
|
||||
if enhanced_context and isinstance(enhanced_context, TaskContext):
|
||||
|
|
@ -326,8 +315,8 @@ class HandlingTasks:
|
|||
task_step=enhanced_context.task_step,
|
||||
workflow=enhanced_context.workflow,
|
||||
workflow_id=enhanced_context.workflow_id,
|
||||
available_documents=enhanced_context.available_documents or available_docs,
|
||||
available_connections=enhanced_context.available_connections or available_connections,
|
||||
available_documents=enhanced_context.available_documents,
|
||||
available_connections=enhanced_context.available_connections,
|
||||
previous_results=enhanced_context.previous_results or previous_results or [],
|
||||
previous_handover=enhanced_context.previous_handover,
|
||||
improvements=enhanced_context.improvements or [],
|
||||
|
|
@ -346,8 +335,8 @@ class HandlingTasks:
|
|||
task_step=task_step,
|
||||
workflow=workflow,
|
||||
workflow_id=workflow.id,
|
||||
available_documents=available_docs,
|
||||
available_connections=available_connections,
|
||||
available_documents=None,
|
||||
available_connections=None,
|
||||
previous_results=previous_results or [],
|
||||
previous_handover=None,
|
||||
improvements=[],
|
||||
|
|
@ -364,30 +353,22 @@ class HandlingTasks:
|
|||
# Check workflow status before calling AI service
|
||||
self._checkWorkflowStopped()
|
||||
|
||||
# Log the final action context being sent to AI
|
||||
logger.info("=== FINAL ACTION CONTEXT FOR AI ===")
|
||||
logger.info(f"Task Step ID: {action_context.task_step.id if action_context.task_step else 'None'}")
|
||||
logger.info(f"Task Step Objective: {action_context.task_step.objective if action_context.task_step else 'None'}")
|
||||
logger.info(f"Workflow ID: {action_context.workflow_id}")
|
||||
logger.info(f"Available Documents: {action_context.available_documents or 'No documents available'}")
|
||||
logger.info(f"Available Connections Count: {len(action_context.available_connections) if action_context.available_connections else 0}")
|
||||
logger.info(f"Previous Results Count: {len(action_context.previous_results) if action_context.previous_results else 0}")
|
||||
logger.info(f"Retry Count: {action_context.retry_count}")
|
||||
logger.info(f"Is Regeneration: {action_context.is_regeneration}")
|
||||
logger.info("=== END ACTION CONTEXT ===")
|
||||
|
||||
# Generate the action definition prompt
|
||||
action_prompt = await createActionDefinitionPrompt(action_context, self.service)
|
||||
# Trace action planning prompt
|
||||
self.service.writeTraceLog("Action Plan Prompt", action_prompt)
|
||||
|
||||
prompt = await self.service.callAiTextAdvanced(action_prompt)
|
||||
|
||||
# Check if AI response is valid
|
||||
if not prompt:
|
||||
raise ValueError("AI service returned no response")
|
||||
|
||||
# Log the full AI response for debugging
|
||||
logger.debug("=== FULL AI RESPONSE ===")
|
||||
logger.debug(prompt)
|
||||
logger.debug("=== END AI RESPONSE ===")
|
||||
# Log action response received
|
||||
logger.info("=== ACTION PLAN AI RESPONSE RECEIVED ===")
|
||||
logger.info(f"Response length: {len(prompt) if prompt else 0}")
|
||||
# Trace action planning response
|
||||
self.service.writeTraceLog("Action Plan Response", prompt)
|
||||
|
||||
# Inline parseActionResponse logic here
|
||||
json_start = prompt.find('{')
|
||||
|
|
@ -875,23 +856,21 @@ class HandlingTasks:
|
|||
# Use promptFactory for review prompt
|
||||
prompt = createResultReviewPrompt(review_context, self.service)
|
||||
|
||||
# Log the full result review prompt being sent to AI for debugging
|
||||
# Log result review prompt sent to AI
|
||||
logger.info("=== RESULT REVIEW PROMPT SENT TO AI ===")
|
||||
logger.info(f"Task: {task_step.objective}")
|
||||
logger.info(f"Action Results Count: {len(review_context.action_results) if review_context.action_results else 0}")
|
||||
logger.info(f"Task Actions Count: {len(review_context.task_actions) if review_context.task_actions else 0}")
|
||||
logger.info("=== FULL RESULT REVIEW PROMPT ===")
|
||||
logger.info(prompt)
|
||||
logger.info("=== END RESULT REVIEW PROMPT ===")
|
||||
# Trace result review prompt
|
||||
self.service.writeTraceLog("Result Review Prompt", prompt)
|
||||
|
||||
response = await self.service.callAiTextAdvanced(prompt)
|
||||
|
||||
# Log the full AI response for result review
|
||||
# Log result review response received
|
||||
logger.info("=== RESULT REVIEW AI RESPONSE RECEIVED ===")
|
||||
logger.info(f"Response length: {len(response) if response else 0}")
|
||||
logger.debug("=== FULL RESULT REVIEW AI RESPONSE ===")
|
||||
logger.debug(response)
|
||||
logger.debug("=== END RESULT REVIEW AI RESPONSE ===")
|
||||
# Trace result review response
|
||||
self.service.writeTraceLog("Result Review Response", response)
|
||||
|
||||
# Inline parseReviewResponse logic here
|
||||
json_start = response.find('{')
|
||||
|
|
@ -1095,6 +1074,17 @@ class HandlingTasks:
|
|||
)
|
||||
result_label = action.execResultLabel
|
||||
|
||||
# Trace action result (without document data)
|
||||
action_result_trace = {
|
||||
"method": action.execMethod,
|
||||
"action": action.execAction,
|
||||
"success": result.success,
|
||||
"error": result.error,
|
||||
"resultLabel": result_label,
|
||||
"documentsCount": len(result.documents) if result.documents else 0
|
||||
}
|
||||
self.service.writeTraceLog("Action Result", action_result_trace)
|
||||
|
||||
# Process documents from the action result
|
||||
created_documents = []
|
||||
if result.success:
|
||||
|
|
|
|||
|
|
@ -3,14 +3,68 @@
|
|||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
from typing import Any, Dict, List
|
||||
from modules.interfaces.interfaceChatModel import TaskContext, ReviewContext
|
||||
from modules.chat.documents.documentUtility import getFileExtension
|
||||
|
||||
# Set up logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Prompt creation helpers extracted from managerChat.py
|
||||
|
||||
def _getAvailableDocuments(workflow) -> str:
|
||||
"""
|
||||
Get simple description of available documents for task planning.
|
||||
|
||||
Args:
|
||||
workflow: ChatWorkflow object
|
||||
|
||||
Returns:
|
||||
str: Simple description of document availability
|
||||
"""
|
||||
total_documents = 0
|
||||
document_types = set()
|
||||
|
||||
for message in workflow.messages:
|
||||
if message.documents:
|
||||
total_documents += len(message.documents)
|
||||
for doc in message.documents:
|
||||
try:
|
||||
file_extension = getFileExtension(doc.fileName)
|
||||
if file_extension:
|
||||
document_types.add(file_extension.upper())
|
||||
except:
|
||||
pass
|
||||
|
||||
if total_documents == 0:
|
||||
return "No documents available"
|
||||
elif len(document_types) == 0:
|
||||
return f"{total_documents} document(s) available"
|
||||
else:
|
||||
types_str = ", ".join(sorted(document_types))
|
||||
return f"{total_documents} document(s) available ({types_str} files)"
|
||||
|
||||
def _getConnectionReferenceList(service) -> List[str]:
|
||||
"""Get list of all UserConnection objects as references with enhanced state information"""
|
||||
connections = []
|
||||
# Get user connections through AppObjects interface
|
||||
user_connections = service.interfaceApp.getUserConnections(service.user.id)
|
||||
|
||||
refreshed_count = 0
|
||||
for conn in user_connections:
|
||||
# Get enhanced connection reference with state information
|
||||
enhanced_ref = service.getConnectionReferenceFromUserConnection(conn)
|
||||
connections.append(enhanced_ref)
|
||||
|
||||
# Count refreshed tokens
|
||||
if "refreshed" in enhanced_ref:
|
||||
refreshed_count += 1
|
||||
|
||||
# Sort by connection reference
|
||||
if refreshed_count > 0:
|
||||
logger.info(f"Refreshed {refreshed_count} connection tokens while building action planning prompt")
|
||||
return sorted(connections)
|
||||
|
||||
def _getPreviousRoundContext(service, workflow) -> str:
|
||||
"""Get context from previous workflow rounds to help understand follow-up prompts"""
|
||||
try:
|
||||
|
|
@ -98,8 +152,8 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str:
|
|||
# Extract user request from context - use Pydantic model directly
|
||||
user_request = context.task_step.objective if context.task_step else 'No request specified'
|
||||
|
||||
# Extract available documents from context - use Pydantic model directly
|
||||
available_documents = context.available_documents or "No documents available"
|
||||
# Get available documents using generic function
|
||||
available_documents = _getAvailableDocuments(context.workflow) if context.workflow else "No documents available"
|
||||
|
||||
# Get previous workflow round context for better understanding of follow-up prompts
|
||||
previous_round_context = _getPreviousRoundContext(service, context.workflow)
|
||||
|
|
@ -226,7 +280,9 @@ async def createActionDefinitionPrompt(context: TaskContext, service) -> str:
|
|||
# Get enhanced document context using the new method
|
||||
available_documents_str = service.getEnhancedDocumentContext()
|
||||
|
||||
connRefs = service.getConnectionReferenceList()
|
||||
# Get available documents and connections using generic functions
|
||||
available_docs_summary = _getAvailableDocuments(context.workflow)
|
||||
connRefs = _getConnectionReferenceList(service)
|
||||
|
||||
# Create a structured JSON format for better AI parsing
|
||||
# This replaces the old hard-to-read format with a clean JSON structure
|
||||
|
|
@ -603,7 +659,8 @@ IMPORTANT NOTES:
|
|||
- Always include a user-friendly userMessage for each action in the user's language ({user_language}).
|
||||
- The examples above show German user messages as reference - adapt the language to match the USER LANGUAGE specified above."""
|
||||
|
||||
logging.debug(f"[ACTION PLAN PROMPT] Enhanced Document Context:\n{available_documents_str}\nUser Connections Section:\n{available_connections_str}\nAvailable Methods (detailed):\n{available_methods_str}")
|
||||
# Removed sensitive data from debug logging
|
||||
logging.debug(f"[ACTION PLAN PROMPT] Document context and methods prepared")
|
||||
|
||||
return prompt
|
||||
|
||||
|
|
|
|||
|
|
@ -2,9 +2,8 @@ import logging
|
|||
from typing import Dict, Any, List
|
||||
from modules.interfaces.interfaceAppModel import User
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow, UserInputRequest, TaskStep, TaskAction, ActionResult, ReviewResult, TaskPlan, WorkflowResult, TaskContext
|
||||
from modules.chat.serviceCenter import ServiceCenter
|
||||
from modules.interfaces.interfaceChatObjects import ChatObjects
|
||||
from .handling.handlingTasks import HandlingTasks, WorkflowStoppedException
|
||||
from modules.chat.handling.handlingTasks import HandlingTasks, WorkflowStoppedException
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -16,21 +15,19 @@ class ChatManager:
|
|||
def __init__(self, currentUser: User, chatInterface: ChatObjects):
|
||||
self.currentUser = currentUser
|
||||
self.chatInterface = chatInterface
|
||||
self.service: ServiceCenter = None
|
||||
self.workflow: ChatWorkflow = None
|
||||
self.handlingTasks: HandlingTasks = None
|
||||
|
||||
async def initialize(self, workflow: ChatWorkflow) -> None:
|
||||
"""Initialize chat manager with workflow"""
|
||||
self.workflow = workflow
|
||||
self.service = ServiceCenter(self.currentUser, self.workflow)
|
||||
self.handlingTasks = HandlingTasks(self.chatInterface, self.service, self.workflow)
|
||||
self.handlingTasks = HandlingTasks(self.chatInterface, self.currentUser, self.workflow)
|
||||
|
||||
|
||||
async def executeUnifiedWorkflow(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> WorkflowResult:
|
||||
"""Unified Workflow Execution"""
|
||||
try:
|
||||
logger.info(f"Starting unified workflow execution for workflow {workflow.id}")
|
||||
logger.debug(f"User request: {userInput.prompt}")
|
||||
|
||||
# Phase 1: High-Level Task Planning
|
||||
logger.info("Phase 1: Generating task plan")
|
||||
|
|
@ -54,8 +51,8 @@ class ChatManager:
|
|||
task_step=task_step,
|
||||
workflow=workflow,
|
||||
workflow_id=workflow.id,
|
||||
available_documents=self.service.getAvailableDocuments(workflow),
|
||||
available_connections=self.service.getConnectionReferenceList(),
|
||||
available_documents=None,
|
||||
available_connections=None,
|
||||
previous_results=previous_results,
|
||||
previous_handover=None,
|
||||
improvements=[],
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ from modules.interfaces.interfaceComponentObjects import getInterface as getComp
|
|||
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
|
||||
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||
from modules.chat.documents.documentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData
|
||||
from modules.chat.methodBase import MethodBase
|
||||
from modules.methods.methodBase import MethodBase
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
import uuid
|
||||
|
||||
|
|
@ -586,37 +586,6 @@ Please provide a comprehensive summary of this conversation."""
|
|||
|
||||
# ===== Functions for Prompts + Actions: Connection References generation and resolution =====
|
||||
|
||||
def getConnectionReferenceList(self) -> List[str]:
|
||||
"""Get list of all UserConnection objects as references with enhanced state information"""
|
||||
connections = []
|
||||
# Get user connections through AppObjects interface
|
||||
logger.debug(f"getConnectionReferenceList: Service center user ID: {self.user.id}")
|
||||
logger.debug(f"getConnectionReferenceList: Service center user type: {type(self.user)}")
|
||||
logger.debug(f"getConnectionReferenceList: Service center user object: {self.user}")
|
||||
|
||||
user_connections = self.interfaceApp.getUserConnections(self.user.id)
|
||||
logger.debug(f"getConnectionReferenceList: User ID: {self.user.id}")
|
||||
logger.debug(f"getConnectionReferenceList: Raw user connections: {user_connections}")
|
||||
logger.debug(f"getConnectionReferenceList: User connections type: {type(user_connections)}")
|
||||
logger.debug(f"getConnectionReferenceList: User connections length: {len(user_connections) if user_connections else 0}")
|
||||
|
||||
refreshed_count = 0
|
||||
for conn in user_connections:
|
||||
# Get enhanced connection reference with state information
|
||||
enhanced_ref = self.getConnectionReferenceFromUserConnection(conn)
|
||||
logger.debug(f"getConnectionReferenceList: Enhanced ref for connection {conn.id}: {enhanced_ref}")
|
||||
connections.append(enhanced_ref)
|
||||
|
||||
# Count refreshed tokens
|
||||
if "refreshed" in enhanced_ref:
|
||||
refreshed_count += 1
|
||||
|
||||
# Sort by connection reference
|
||||
logger.debug(f"getConnectionReferenceList: Final connections list: {connections}")
|
||||
if refreshed_count > 0:
|
||||
logger.info(f"Refreshed {refreshed_count} connection tokens while building action planning prompt")
|
||||
return sorted(connections)
|
||||
|
||||
def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str:
|
||||
"""Get connection reference from UserConnection with enhanced state information"""
|
||||
# Get token information to check if it's expired
|
||||
|
|
@ -692,12 +661,12 @@ Please provide a comprehensive summary of this conversation."""
|
|||
# Try advanced AI first, with retries
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
prompt_size = self.calculateObjectSize(prompt)
|
||||
prompt_size = self._calculateObjectSize(prompt)
|
||||
if context:
|
||||
prompt_size += self.calculateObjectSize(context)
|
||||
prompt_size += self._calculateObjectSize(context)
|
||||
response = await self.interfaceAiCalls.callAiTextAdvanced(prompt, context)
|
||||
response_size = self.calculateObjectSize(response)
|
||||
self.updateWorkflowStats(eventLabel="aicall.anthropic.text", bytesSent=prompt_size, bytesReceived=response_size)
|
||||
response_size = self._calculateObjectSize(response)
|
||||
self._updateWorkflowStats(eventLabel="aicall.anthropic.text", bytesSent=prompt_size, bytesReceived=response_size)
|
||||
return response
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
|
|
@ -726,12 +695,12 @@ Please provide a comprehensive summary of this conversation."""
|
|||
last_error = None
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
prompt_size = self.calculateObjectSize(prompt)
|
||||
prompt_size = self._calculateObjectSize(prompt)
|
||||
if context:
|
||||
prompt_size += self.calculateObjectSize(context)
|
||||
prompt_size += self._calculateObjectSize(context)
|
||||
response = await self.interfaceAiCalls.callAiTextBasic(prompt, context)
|
||||
response_size = self.calculateObjectSize(response)
|
||||
self.updateWorkflowStats(eventLabel="aicall.openai.text", bytesSent=prompt_size, bytesReceived=response_size)
|
||||
response_size = self._calculateObjectSize(response)
|
||||
self._updateWorkflowStats(eventLabel="aicall.openai.text", bytesSent=prompt_size, bytesReceived=response_size)
|
||||
return response
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
|
|
@ -745,34 +714,34 @@ Please provide a comprehensive summary of this conversation."""
|
|||
async def callAiImageBasic(self, prompt: str, imageData: str, mimeType: str) -> str:
|
||||
"""Basic image processing using OpenAI"""
|
||||
# Calculate prompt size for stats
|
||||
prompt_size = self.calculateObjectSize(prompt)
|
||||
prompt_size += self.calculateObjectSize(imageData)
|
||||
prompt_size = self._calculateObjectSize(prompt)
|
||||
prompt_size += self._calculateObjectSize(imageData)
|
||||
|
||||
# Call AI
|
||||
response = await self.interfaceAiCalls.callAiImageBasic(prompt, imageData, mimeType)
|
||||
|
||||
# Calculate response size for stats
|
||||
response_size = self.calculateObjectSize(response)
|
||||
response_size = self._calculateObjectSize(response)
|
||||
|
||||
# Update stats
|
||||
self.updateWorkflowStats(eventLabel="aicall.openai.image", bytesSent=prompt_size, bytesReceived=response_size)
|
||||
self._updateWorkflowStats(eventLabel="aicall.openai.image", bytesSent=prompt_size, bytesReceived=response_size)
|
||||
|
||||
return response
|
||||
|
||||
async def callAiImageAdvanced(self, prompt: str, imageData: str, mimeType: str) -> str:
|
||||
"""Advanced image processing using Anthropic"""
|
||||
# Calculate prompt size for stats
|
||||
prompt_size = self.calculateObjectSize(prompt)
|
||||
prompt_size += self.calculateObjectSize(imageData)
|
||||
prompt_size = self._calculateObjectSize(prompt)
|
||||
prompt_size += self._calculateObjectSize(imageData)
|
||||
|
||||
# Call AI
|
||||
response = await self.interfaceAiCalls.callAiImageAdvanced(prompt, imageData, mimeType)
|
||||
|
||||
# Calculate response size for stats
|
||||
response_size = self.calculateObjectSize(response)
|
||||
response_size = self._calculateObjectSize(response)
|
||||
|
||||
# Update stats
|
||||
self.updateWorkflowStats(eventLabel="aicall.anthropic.image", bytesSent=prompt_size, bytesReceived=response_size)
|
||||
self._updateWorkflowStats(eventLabel="aicall.anthropic.image", bytesSent=prompt_size, bytesReceived=response_size)
|
||||
|
||||
return response
|
||||
|
||||
|
|
@ -957,9 +926,9 @@ Please provide a comprehensive summary of this conversation."""
|
|||
|
||||
return document
|
||||
|
||||
# ===== Internal public helper functions =====
|
||||
# ===== Internal helper functions =====
|
||||
|
||||
def updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None:
|
||||
def _updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None:
|
||||
"""
|
||||
Centralized function to update workflow statistics in database and running workflow.
|
||||
|
||||
|
|
@ -983,7 +952,7 @@ Please provide a comprehensive summary of this conversation."""
|
|||
except Exception as e:
|
||||
logger.error(f"Error updating workflow stats: {str(e)}")
|
||||
|
||||
def calculateObjectSize(self, obj: Any) -> int:
|
||||
def _calculateObjectSize(self, obj: Any) -> int:
|
||||
"""
|
||||
Calculate the size of an object in bytes.
|
||||
|
||||
|
|
@ -1008,38 +977,6 @@ Please provide a comprehensive summary of this conversation."""
|
|||
logger.error(f"Error calculating object size: {str(e)}")
|
||||
return 0
|
||||
|
||||
def getAvailableDocuments(self, workflow) -> str:
|
||||
"""
|
||||
Get simple description of available documents for task planning.
|
||||
|
||||
Args:
|
||||
workflow: ChatWorkflow object
|
||||
|
||||
Returns:
|
||||
str: Simple description of document availability
|
||||
"""
|
||||
total_documents = 0
|
||||
document_types = set()
|
||||
|
||||
for message in workflow.messages:
|
||||
if message.documents:
|
||||
total_documents += len(message.documents)
|
||||
for doc in message.documents:
|
||||
try:
|
||||
file_extension = getFileExtension(doc.fileName)
|
||||
if file_extension:
|
||||
document_types.add(file_extension.upper())
|
||||
except:
|
||||
pass
|
||||
|
||||
if total_documents == 0:
|
||||
return "No documents available"
|
||||
elif len(document_types) == 0:
|
||||
return f"{total_documents} document(s) available"
|
||||
else:
|
||||
types_str = ", ".join(sorted(document_types))
|
||||
return f"{total_documents} document(s) available ({types_str} files)"
|
||||
|
||||
# ===== Functions for Manager: Execution Tools =====
|
||||
|
||||
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
|
||||
|
|
@ -1090,6 +1027,85 @@ Please provide a comprehensive summary of this conversation."""
|
|||
"""Set user language for the service center"""
|
||||
self.user.language = language
|
||||
|
||||
def writeTraceLog(self, contextText: str, data: Any) -> None:
|
||||
"""Write trace data to configured trace file if in debug mode"""
|
||||
try:
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime, UTC
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
# Only write if logger is in debug mode
|
||||
if logger.level > logging.DEBUG:
|
||||
return
|
||||
|
||||
# Get log directory from configuration
|
||||
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
|
||||
if not os.path.isabs(logDir):
|
||||
# If relative path, make it relative to the gateway directory
|
||||
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
logDir = os.path.join(gatewayDir, logDir)
|
||||
|
||||
# Ensure log directory exists
|
||||
os.makedirs(logDir, exist_ok=True)
|
||||
|
||||
# Create trace file path
|
||||
trace_file = os.path.join(logDir, "log_trace.log")
|
||||
|
||||
# Format the trace entry
|
||||
timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
||||
trace_entry = f"[{timestamp}] {contextText}\n"
|
||||
|
||||
# Add data if provided
|
||||
if data is not None:
|
||||
if isinstance(data, (dict, list)):
|
||||
import json
|
||||
trace_entry += f"Data: {json.dumps(data, indent=2, default=str)}\n"
|
||||
else:
|
||||
trace_entry += f"Data: {str(data)}\n"
|
||||
|
||||
trace_entry += "-" * 80 + "\n\n"
|
||||
|
||||
# Write to trace file
|
||||
with open(trace_file, "a", encoding="utf-8") as f:
|
||||
f.write(trace_entry)
|
||||
|
||||
except Exception as e:
|
||||
# Don't log trace errors to avoid recursion
|
||||
pass
|
||||
|
||||
def clearTraceLog(self) -> None:
|
||||
"""Clear the trace log file"""
|
||||
try:
|
||||
import logging
|
||||
import os
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
# Get log directory from configuration
|
||||
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
|
||||
if not os.path.isabs(logDir):
|
||||
# If relative path, make it relative to the gateway directory
|
||||
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
logDir = os.path.join(gatewayDir, logDir)
|
||||
|
||||
# Create trace file path
|
||||
trace_file = os.path.join(logDir, "log_trace.log")
|
||||
|
||||
# Only clear if logger is in debug mode
|
||||
if logger.level > logging.DEBUG:
|
||||
# Delete file if not in debug mode
|
||||
if os.path.exists(trace_file):
|
||||
os.remove(trace_file)
|
||||
return
|
||||
|
||||
# Create empty file if in debug mode
|
||||
with open(trace_file, "w", encoding="utf-8") as f:
|
||||
f.write("")
|
||||
|
||||
except Exception as e:
|
||||
# Don't log trace errors to avoid recursion
|
||||
pass
|
||||
|
||||
# ===== Functions for Manager: Workflow Tools =====
|
||||
|
||||
def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
|
||||
|
|
|
|||
|
|
@ -29,10 +29,10 @@ class ConnectorGoogleSpeech:
|
|||
"""
|
||||
try:
|
||||
# Get JSON key from config.ini
|
||||
api_key = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY")
|
||||
api_key = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET")
|
||||
|
||||
if not api_key or api_key == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE":
|
||||
raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY in config.ini with the full service account JSON key")
|
||||
raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY_SECRET in config.ini with the full service account JSON key")
|
||||
|
||||
# Parse the JSON key and set up authentication
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -69,9 +69,9 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase):
|
|||
|
||||
@classmethod
|
||||
async def create(cls):
|
||||
api_key = APP_CONFIG.get("Connector_WebTavily_API_KEY")
|
||||
api_key = APP_CONFIG.get("Connector_WebTavily_API_KEY_SECRET")
|
||||
if not api_key:
|
||||
raise ValueError("Tavily API key not configured. Please set Connector_WebTavily_API_KEY in config.ini")
|
||||
raise ValueError("Tavily API key not configured. Please set Connector_WebTavily_API_KEY_SECRET in config.ini")
|
||||
return cls(client=AsyncTavilyClient(api_key=api_key))
|
||||
|
||||
async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult:
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ class WorkflowManager:
|
|||
await self.chatManager.initialize(workflow)
|
||||
|
||||
# Set user language
|
||||
self.chatManager.service.setUserLanguage(userInput.userLanguage)
|
||||
self.chatManager.handlingTasks.service.setUserLanguage(userInput.userLanguage)
|
||||
|
||||
# Send first message
|
||||
message = await self._sendFirstMessage(userInput, workflow)
|
||||
|
|
@ -170,10 +170,13 @@ class WorkflowManager:
|
|||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
||||
# Clear trace log for new workflow session
|
||||
self.chatManager.handlingTasks.service.clearTraceLog()
|
||||
|
||||
# Add documents if any, now with messageId
|
||||
if userInput.listFileId:
|
||||
# Process file IDs and add to message data
|
||||
documents = await self.chatManager.service.processFileIds(userInput.listFileId, message.id)
|
||||
documents = await self.chatManager.handlingTasks.service.processFileIds(userInput.listFileId, message.id)
|
||||
message.documents = documents
|
||||
# Update the message with documents in database
|
||||
self.chatInterface.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]})
|
||||
|
|
@ -1155,7 +1155,7 @@ class ChatObjects:
|
|||
# Remove the 'Workflow started' log entry
|
||||
|
||||
# Start workflow processing
|
||||
from modules.services.serviceValueonChat import WorkflowManager
|
||||
from modules.features.featureChatPlayground import WorkflowManager
|
||||
workflowManager = WorkflowManager(self, currentUser)
|
||||
|
||||
# Start the workflow processing asynchronously
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import logging
|
|||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime, UTC
|
||||
|
||||
from modules.chat.methodBase import MethodBase, action
|
||||
from modules.methods.methodBase import MethodBase, action
|
||||
from modules.interfaces.interfaceChatModel import ActionResult
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import re
|
|||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime, UTC
|
||||
|
||||
from modules.chat.methodBase import MethodBase, action
|
||||
from modules.methods.methodBase import MethodBase, action
|
||||
from modules.interfaces.interfaceChatModel import ActionResult
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ from datetime import datetime, UTC
|
|||
import json
|
||||
import uuid
|
||||
|
||||
from modules.chat.methodBase import MethodBase, action
|
||||
from modules.methods.methodBase import MethodBase, action
|
||||
from modules.interfaces.interfaceChatModel import ActionResult
|
||||
from modules.interfaces.interfaceAppModel import ConnectionStatus
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ from urllib.parse import urlparse
|
|||
import aiohttp
|
||||
import asyncio
|
||||
|
||||
from modules.chat.methodBase import MethodBase, action
|
||||
from modules.methods.methodBase import MethodBase, action
|
||||
from modules.interfaces.interfaceChatModel import ActionResult
|
||||
from modules.shared.timezoneUtils import get_utc_timestamp
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import logging
|
|||
import csv
|
||||
import io
|
||||
from typing import Any, Dict
|
||||
from modules.chat.methodBase import MethodBase, action
|
||||
from modules.methods.methodBase import MethodBase, action
|
||||
from modules.interfaces.interfaceChatModel import ActionResult, ActionDocument
|
||||
from modules.interfaces.interfaceWebObjects import WebInterface
|
||||
from modules.interfaces.interfaceWebModel import (
|
||||
|
|
|
|||
|
|
@ -4,481 +4,109 @@ Unterstützt TXT, JSON, CSV, Excel und Word-Dateien
|
|||
Mehrsprachig: DE, EN, FR, IT
|
||||
"""
|
||||
|
||||
import re
|
||||
import json
|
||||
import pandas as pd
|
||||
import docx
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple, Any, Union, Optional
|
||||
from dataclasses import dataclass
|
||||
import uuid
|
||||
import logging
|
||||
import traceback
|
||||
import csv
|
||||
from datetime import datetime
|
||||
import xml.etree.ElementTree as ET
|
||||
import os
|
||||
import random
|
||||
from io import StringIO
|
||||
from modules.neutralizer.patterns import Pattern, HeaderPatterns, DataPatterns, get_pattern_for_header, find_patterns_in_text, TextTablePatterns
|
||||
import base64
|
||||
from typing import Dict, List, Any
|
||||
|
||||
# Import all necessary classes and functions
|
||||
from modules.neutralizer.subProcessCommon import ProcessResult, CommonUtils
|
||||
from modules.neutralizer.subProcessText import TextProcessor, PlainText
|
||||
from modules.neutralizer.subProcessList import ListProcessor, TableData
|
||||
from modules.neutralizer.subProcessBinary import BinaryProcessor, BinaryData
|
||||
from modules.neutralizer.subParseString import StringParser
|
||||
from modules.neutralizer.subPatterns import Pattern, HeaderPatterns, DataPatterns, TextTablePatterns
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@dataclass
|
||||
class TableData:
|
||||
"""Repräsentiert Tabellendaten"""
|
||||
headers: List[str]
|
||||
rows: List[List[str]]
|
||||
source_type: str # 'csv', 'json', 'xml', 'text_table'
|
||||
|
||||
@dataclass
|
||||
class PlainText:
|
||||
"""Repräsentiert normalen Text"""
|
||||
content: str
|
||||
source_type: str # 'txt', 'docx', 'text_plain'
|
||||
|
||||
@dataclass
|
||||
class ProcessResult:
|
||||
"""Result of content processing"""
|
||||
data: Any
|
||||
mapping: Dict[str, str]
|
||||
replaced_fields: List[str]
|
||||
processed_info: Dict[str, Any] # Additional processing information
|
||||
# Export all classes and functions for external use
|
||||
__all__ = [
|
||||
'DataAnonymizer',
|
||||
'ProcessResult',
|
||||
'CommonUtils',
|
||||
'TextProcessor',
|
||||
'PlainText',
|
||||
'ListProcessor',
|
||||
'TableData',
|
||||
'BinaryProcessor',
|
||||
'BinaryData',
|
||||
'StringParser',
|
||||
'Pattern',
|
||||
'HeaderPatterns',
|
||||
'DataPatterns',
|
||||
'TextTablePatterns'
|
||||
]
|
||||
|
||||
class DataAnonymizer:
|
||||
"""Hauptklasse für die Datenanonymisierung"""
|
||||
|
||||
def __init__(self, names_to_parse: List[str] = None):
|
||||
"""Initialize the anonymizer with patterns and custom names
|
||||
"""Initialize the anonymizer with specialized processors
|
||||
|
||||
Args:
|
||||
names_to_parse: List of names to parse and replace (case-insensitive)
|
||||
"""
|
||||
self.header_patterns = HeaderPatterns.patterns
|
||||
self.data_patterns = DataPatterns.patterns
|
||||
self.names_to_parse = names_to_parse or []
|
||||
self.replaced_fields = set()
|
||||
self.mapping = {}
|
||||
self.processing_info = []
|
||||
|
||||
def _normalize_whitespace(self, text: str) -> str:
|
||||
"""Normalize whitespace in text"""
|
||||
text = re.sub(r'\s+', ' ', text)
|
||||
text = text.replace('\r\n', '\n').replace('\r', '\n')
|
||||
return text.strip()
|
||||
# Initialize specialized processors
|
||||
self.text_processor = TextProcessor(names_to_parse)
|
||||
self.list_processor = ListProcessor(names_to_parse)
|
||||
self.binary_processor = BinaryProcessor()
|
||||
|
||||
# Common utilities
|
||||
self.common_utils = CommonUtils()
|
||||
|
||||
def _is_table_line(self, line: str) -> bool:
|
||||
"""Check if a line represents a table row"""
|
||||
return bool(re.match(r'^\s*[^:]+:\s*[^:]+$', line) or
|
||||
re.match(r'^\s*[^\t]+\t[^\t]+$', line))
|
||||
|
||||
def _extract_tables_from_text(self, content: str) -> Tuple[List[TableData], List[PlainText]]:
|
||||
"""
|
||||
Extract tables and plain text from content
|
||||
|
||||
Args:
|
||||
content: Content to process
|
||||
|
||||
Returns:
|
||||
Tuple of (list of tables, list of plain text sections)
|
||||
"""
|
||||
tables = []
|
||||
plain_texts = []
|
||||
|
||||
# Process the entire content as plain text
|
||||
plain_texts.append(PlainText(content=content, source_type='text_plain'))
|
||||
|
||||
return tables, plain_texts
|
||||
|
||||
def _anonymize_table(self, table: TableData) -> TableData:
|
||||
"""Anonymize table data"""
|
||||
try:
|
||||
anonymized_table = TableData(
|
||||
headers=table.headers.copy(),
|
||||
rows=[row.copy() for row in table.rows],
|
||||
source_type=table.source_type
|
||||
)
|
||||
|
||||
for i, header in enumerate(anonymized_table.headers):
|
||||
pattern = get_pattern_for_header(header, self.header_patterns)
|
||||
if pattern:
|
||||
for row in anonymized_table.rows:
|
||||
if row[i] is not None:
|
||||
original = str(row[i])
|
||||
if original not in self.mapping:
|
||||
self.mapping[original] = pattern.replacement_template.format(len(self.mapping) + 1)
|
||||
row[i] = self.mapping[original]
|
||||
|
||||
return anonymized_table
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error anonymizing table: {str(e)}")
|
||||
raise
|
||||
|
||||
def _anonymize_plain_text(self, text: PlainText) -> PlainText:
|
||||
"""Anonymize plain text content using simple search-and-replace approach"""
|
||||
try:
|
||||
current_text = text.content
|
||||
|
||||
# Step 1: Replace custom names first (simple regex search-and-replace)
|
||||
for name in self.names_to_parse:
|
||||
if not name.strip():
|
||||
continue
|
||||
|
||||
# Create case-insensitive regex pattern with word boundaries
|
||||
pattern = re.compile(r'\b' + re.escape(name.strip()) + r'\b', re.IGNORECASE)
|
||||
|
||||
# Find all matches for this name
|
||||
matches = list(pattern.finditer(current_text))
|
||||
|
||||
# Replace each match with a placeholder
|
||||
for match in reversed(matches): # Process from right to left to avoid position shifts
|
||||
matched_text = match.group()
|
||||
if matched_text not in self.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
self.mapping[matched_text] = f"[name.{placeholder_id}]"
|
||||
|
||||
replacement = self.mapping[matched_text]
|
||||
start, end = match.span()
|
||||
current_text = current_text[:start] + replacement + current_text[end:]
|
||||
|
||||
# Step 2: Replace pattern-based matches (emails, phones, etc.)
|
||||
# Use the same simple approach for patterns
|
||||
pattern_matches = find_patterns_in_text(current_text, self.data_patterns)
|
||||
|
||||
# Process pattern matches from right to left to avoid position shifts
|
||||
for pattern_name, matched_text, start, end in reversed(pattern_matches):
|
||||
# Skip if already a placeholder
|
||||
if re.match(r'\[[a-z]+\.[a-f0-9-]+\]', matched_text):
|
||||
continue
|
||||
|
||||
# Skip if contains placeholder characters
|
||||
if '[' in matched_text or ']' in matched_text:
|
||||
continue
|
||||
|
||||
if matched_text not in self.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
# Create placeholder in format [type.uuid]
|
||||
type_mapping = {
|
||||
'email': 'email',
|
||||
'phone': 'phone',
|
||||
'address': 'address',
|
||||
'id': 'id'
|
||||
}
|
||||
placeholder_type = type_mapping.get(pattern_name, 'data')
|
||||
self.mapping[matched_text] = f"[{placeholder_type}.{placeholder_id}]"
|
||||
|
||||
replacement = self.mapping[matched_text]
|
||||
current_text = current_text[:start] + replacement + current_text[end:]
|
||||
|
||||
return PlainText(content=current_text, source_type=text.source_type)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error anonymizing plain text: {str(e)}")
|
||||
raise
|
||||
|
||||
def _anonymize_json_value(self, value: Any, key: str = None) -> Any:
|
||||
"""
|
||||
Recursively anonymize JSON values based on their keys and content
|
||||
|
||||
Args:
|
||||
value: Value to anonymize
|
||||
key: Key name (if part of a key-value pair)
|
||||
|
||||
Returns:
|
||||
Anonymized value
|
||||
"""
|
||||
if isinstance(value, dict):
|
||||
return {k: self._anonymize_json_value(v, k) for k, v in value.items()}
|
||||
elif isinstance(value, list):
|
||||
return [self._anonymize_json_value(item) for item in value]
|
||||
elif isinstance(value, str):
|
||||
# Check if this is a key we should process
|
||||
if key:
|
||||
pattern = get_pattern_for_header(key, self.header_patterns)
|
||||
if pattern:
|
||||
if value not in self.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
# Create placeholder in format [type.uuid]
|
||||
type_mapping = {
|
||||
'email': 'email',
|
||||
'phone': 'phone',
|
||||
'name': 'name',
|
||||
'address': 'address',
|
||||
'id': 'id'
|
||||
}
|
||||
placeholder_type = type_mapping.get(pattern.name, 'data')
|
||||
self.mapping[value] = f"[{placeholder_type}.{placeholder_id}]"
|
||||
return self.mapping[value]
|
||||
|
||||
# Check if the value itself matches any patterns
|
||||
pattern_matches = find_patterns_in_text(value, self.data_patterns)
|
||||
custom_name_matches = self._find_custom_names(value)
|
||||
|
||||
if pattern_matches or custom_name_matches:
|
||||
# Use the first match's pattern or custom name
|
||||
if pattern_matches:
|
||||
pattern_name = pattern_matches[0][0]
|
||||
if value not in self.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
# Create placeholder in format [type.uuid]
|
||||
type_mapping = {
|
||||
'email': 'email',
|
||||
'phone': 'phone',
|
||||
'name': 'name',
|
||||
'address': 'address',
|
||||
'id': 'id'
|
||||
}
|
||||
placeholder_type = type_mapping.get(pattern_name, 'data')
|
||||
self.mapping[value] = f"[{placeholder_type}.{placeholder_id}]"
|
||||
elif custom_name_matches:
|
||||
if value not in self.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
self.mapping[value] = f"[name.{placeholder_id}]"
|
||||
return self.mapping[value]
|
||||
|
||||
return value
|
||||
else:
|
||||
return value
|
||||
|
||||
def _anonymize_xml_element(self, element: ET.Element, indent: str = '') -> str:
|
||||
"""
|
||||
Recursively process XML element and return formatted string
|
||||
|
||||
Args:
|
||||
element: XML element to process
|
||||
indent: Current indentation level
|
||||
|
||||
Returns:
|
||||
Formatted XML string
|
||||
"""
|
||||
# Process attributes
|
||||
processed_attrs = {}
|
||||
for attr_name, attr_value in element.attrib.items():
|
||||
# Check if attribute name matches any header patterns
|
||||
pattern = get_pattern_for_header(attr_name, self.header_patterns)
|
||||
if pattern:
|
||||
if attr_value not in self.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
# Create placeholder in format [type.uuid]
|
||||
type_mapping = {
|
||||
'email': 'email',
|
||||
'phone': 'phone',
|
||||
'name': 'name',
|
||||
'address': 'address',
|
||||
'id': 'id'
|
||||
}
|
||||
placeholder_type = type_mapping.get(pattern.name, 'data')
|
||||
self.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
|
||||
processed_attrs[attr_name] = self.mapping[attr_value]
|
||||
else:
|
||||
# Check if attribute value matches any data patterns
|
||||
matches = find_patterns_in_text(attr_value, self.data_patterns)
|
||||
if matches:
|
||||
pattern_name = matches[0][0]
|
||||
pattern = next((p for p in self.data_patterns if p.name == pattern_name), None)
|
||||
if pattern:
|
||||
if attr_value not in self.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
# Create placeholder in format [type.uuid]
|
||||
type_mapping = {
|
||||
'email': 'email',
|
||||
'phone': 'phone',
|
||||
'name': 'name',
|
||||
'address': 'address',
|
||||
'id': 'id'
|
||||
}
|
||||
placeholder_type = type_mapping.get(pattern_name, 'data')
|
||||
self.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
|
||||
processed_attrs[attr_name] = self.mapping[attr_value]
|
||||
else:
|
||||
processed_attrs[attr_name] = attr_value
|
||||
else:
|
||||
processed_attrs[attr_name] = attr_value
|
||||
|
||||
attrs = ' '.join(f'{k}="{v}"' for k, v in processed_attrs.items())
|
||||
attrs = f' {attrs}' if attrs else ''
|
||||
|
||||
# Process text content
|
||||
text = element.text.strip() if element.text and element.text.strip() else ''
|
||||
if text:
|
||||
# Check if text matches any patterns or custom names
|
||||
pattern_matches = find_patterns_in_text(text, self.data_patterns)
|
||||
custom_name_matches = self._find_custom_names(text)
|
||||
|
||||
if pattern_matches or custom_name_matches:
|
||||
if pattern_matches:
|
||||
pattern_name = pattern_matches[0][0]
|
||||
pattern = next((p for p in self.data_patterns if p.name == pattern_name), None)
|
||||
if pattern:
|
||||
if text not in self.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
# Create placeholder in format [type.uuid]
|
||||
type_mapping = {
|
||||
'email': 'email',
|
||||
'phone': 'phone',
|
||||
'name': 'name',
|
||||
'address': 'address',
|
||||
'id': 'id'
|
||||
}
|
||||
placeholder_type = type_mapping.get(pattern_name, 'data')
|
||||
self.mapping[text] = f"[{placeholder_type}.{placeholder_id}]"
|
||||
text = self.mapping[text]
|
||||
elif custom_name_matches:
|
||||
if text not in self.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
self.mapping[text] = f"[name.{placeholder_id}]"
|
||||
text = self.mapping[text]
|
||||
|
||||
# Process child elements
|
||||
children = []
|
||||
for child in element:
|
||||
child_str = self._anonymize_xml_element(child, indent + ' ')
|
||||
children.append(child_str)
|
||||
|
||||
# Build element string
|
||||
if not children and not text:
|
||||
return f"{indent}<{element.tag}{attrs}/>"
|
||||
elif not children:
|
||||
return f"{indent}<{element.tag}{attrs}>{text}</{element.tag}>"
|
||||
else:
|
||||
result = [f"{indent}<{element.tag}{attrs}>"]
|
||||
if text:
|
||||
result.append(f"{indent} {text}")
|
||||
result.extend(children)
|
||||
result.append(f"{indent}</{element.tag}>")
|
||||
return '\n'.join(result)
|
||||
|
||||
def process_content(self, content: str, content_type: str) -> ProcessResult:
|
||||
def process_content(self, content: str, content_type: str = None) -> ProcessResult:
|
||||
"""
|
||||
Process content and return anonymized data
|
||||
|
||||
Args:
|
||||
content: Content to process
|
||||
content_type: Type of content ('csv', 'json', 'xml', 'text')
|
||||
content_type: Type of content ('csv', 'json', 'xml', 'text', 'binary')
|
||||
If None, will auto-detect
|
||||
|
||||
Returns:
|
||||
ProcessResult: Contains anonymized data, mapping, replaced fields and processing info
|
||||
"""
|
||||
try:
|
||||
# Auto-detect content type if not provided
|
||||
if content_type is None:
|
||||
content_type = self.common_utils.detect_content_type(content)
|
||||
|
||||
# Check if content is binary data
|
||||
is_binary = False
|
||||
try:
|
||||
# First, check if content looks like base64 (contains only base64 characters)
|
||||
if re.match(r'^[A-Za-z0-9+/]*={0,2}$', content.strip()):
|
||||
# Try to decode base64 if it looks like base64
|
||||
try:
|
||||
decoded = base64.b64decode(content)
|
||||
# If it's not valid text, consider it binary
|
||||
decoded.decode('utf-8')
|
||||
is_binary = True
|
||||
except (base64.binascii.Error, UnicodeDecodeError):
|
||||
is_binary = False
|
||||
else:
|
||||
is_binary = False
|
||||
except Exception as e:
|
||||
is_binary = False
|
||||
|
||||
if is_binary:
|
||||
# TODO: Implement binary data neutralization
|
||||
# This would require:
|
||||
# 1. Detecting binary data types (images, audio, video, etc.)
|
||||
# 2. Implementing specific neutralization for each type
|
||||
# 3. Handling metadata and embedded content
|
||||
# 4. Preserving binary integrity while removing sensitive data
|
||||
return ProcessResult(content, self.mapping, [], {'type': 'binary', 'status': 'not_implemented'})
|
||||
|
||||
replaced_fields = []
|
||||
processed_info = {}
|
||||
if self.binary_processor.is_binary_content(content):
|
||||
return self.binary_processor.process_binary_content(content)
|
||||
|
||||
# Route to appropriate processor based on content type
|
||||
if content_type in ['csv', 'json', 'xml']:
|
||||
# Handle as table
|
||||
if content_type == 'csv':
|
||||
df = pd.read_csv(StringIO(content), encoding='utf-8')
|
||||
table = TableData(
|
||||
headers=df.columns.tolist(),
|
||||
rows=df.values.tolist(),
|
||||
source_type='csv'
|
||||
)
|
||||
processed_info['type'] = 'table'
|
||||
processed_info['headers'] = table.headers
|
||||
processed_info['row_count'] = len(table.rows)
|
||||
result, mapping, replaced_fields, processed_info = self.list_processor.process_csv_content(content)
|
||||
elif content_type == 'json':
|
||||
data = json.loads(content)
|
||||
# Process JSON recursively
|
||||
result = self._anonymize_json_value(data)
|
||||
processed_info['type'] = 'json'
|
||||
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
|
||||
result, mapping, replaced_fields, processed_info = self.list_processor.process_json_content(content)
|
||||
else: # xml
|
||||
root = ET.fromstring(content)
|
||||
# Process XML recursively with proper formatting
|
||||
result = self._anonymize_xml_element(root)
|
||||
processed_info['type'] = 'xml'
|
||||
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
|
||||
result, mapping, replaced_fields, processed_info = self.list_processor.process_xml_content(content)
|
||||
|
||||
if not table.rows:
|
||||
return ProcessResult(None, self.mapping, [], processed_info)
|
||||
|
||||
anonymized_table = self._anonymize_table(table)
|
||||
|
||||
# Track replaced fields
|
||||
for i, header in enumerate(anonymized_table.headers):
|
||||
for orig_row, anon_row in zip(table.rows, anonymized_table.rows):
|
||||
if anon_row[i] != orig_row[i]:
|
||||
replaced_fields.append(header)
|
||||
|
||||
# Convert back to original format
|
||||
if content_type == 'csv':
|
||||
result = pd.DataFrame(anonymized_table.rows, columns=anonymized_table.headers)
|
||||
elif content_type == 'json':
|
||||
if len(anonymized_table.headers) == 1 and anonymized_table.headers[0] == 'value':
|
||||
result = anonymized_table.rows[0][0]
|
||||
else:
|
||||
result = dict(zip(anonymized_table.headers, anonymized_table.rows[0]))
|
||||
else: # xml
|
||||
result = ET.tostring(root, encoding='unicode')
|
||||
|
||||
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
|
||||
return ProcessResult(result, mapping, replaced_fields, processed_info)
|
||||
else:
|
||||
# Handle as text
|
||||
# First, identify what needs to be replaced using table detection
|
||||
tables, plain_texts = self._extract_tables_from_text(content)
|
||||
processed_info['type'] = 'text'
|
||||
processed_info['tables'] = [{'headers': t.headers, 'row_count': len(t.rows)} for t in tables]
|
||||
|
||||
# Process plain text sections
|
||||
anonymized_texts = [self._anonymize_plain_text(text) for text in plain_texts]
|
||||
|
||||
# Combine all processed content
|
||||
result = content
|
||||
for i, (text, anonymized_text) in enumerate(zip(plain_texts, anonymized_texts)):
|
||||
if text.content != anonymized_text.content:
|
||||
result = result.replace(text.content, anonymized_text.content)
|
||||
|
||||
return ProcessResult(result, self.mapping, replaced_fields, processed_info)
|
||||
result, mapping, replaced_fields, processed_info = self.text_processor.process_text_content(content)
|
||||
return ProcessResult(result, mapping, replaced_fields, processed_info)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing content: {str(e)}")
|
||||
return ProcessResult(None, self.mapping, [], {'type': 'error', 'error': str(e)})
|
||||
return ProcessResult(None, {}, [], {'type': 'error', 'error': str(e)})
|
||||
|
||||
def get_mapping(self) -> Dict[str, str]:
|
||||
"""
|
||||
Get the combined mapping from all processors
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: Combined mapping dictionary
|
||||
"""
|
||||
text_mapping = self.text_processor.get_mapping()
|
||||
list_mapping = self.list_processor.get_mapping()
|
||||
return self.common_utils.merge_mappings(text_mapping, list_mapping)
|
||||
|
||||
def clear_mapping(self):
|
||||
"""Clear the mapping in all processors"""
|
||||
self.text_processor.clear_mapping()
|
||||
self.list_processor.clear_mapping()
|
||||
91
modules/neutralizer/readme.md
Normal file
91
modules/neutralizer/readme.md
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
# Neutralizer Module Structure
|
||||
|
||||
This module provides DSGVO-compliant data anonymization for AI agent systems. The code has been refactored into specialized sub-modules for better maintainability and code reuse.
|
||||
|
||||
## Module Overview
|
||||
|
||||
### Core Module
|
||||
- **`neutralizer.py`** - Main DataAnonymizer class that orchestrates all processing
|
||||
|
||||
### Specialized Processors
|
||||
- **`subProcessText.py`** - Handles plain text processing without header information
|
||||
- **`subProcessList.py`** - Handles structured data with headers (CSV, JSON, XML)
|
||||
- **`subProcessBinary.py`** - Handles binary data types (images, audio, video, etc.)
|
||||
|
||||
### Utility Modules
|
||||
- **`subParseString.py`** - String parsing and replacement utilities for emails, phones, addresses, IDs and names
|
||||
- **`subProcessCommon.py`** - Common utilities and data structures shared across modules
|
||||
- **`patterns.py`** - Pattern definitions for data anonymization
|
||||
|
||||
## Key Features
|
||||
|
||||
### 1. Modular Architecture
|
||||
- **Separation of Concerns**: Each module handles a specific type of data processing
|
||||
- **Code Reuse**: Common functionality is centralized in utility modules
|
||||
- **Maintainability**: Easier to modify and extend individual components
|
||||
|
||||
### 2. Processing Order
|
||||
1. **Pattern-based matches** (emails, phones, addresses, etc.) are processed FIRST
|
||||
2. **Custom names** from the user list are processed SECOND
|
||||
3. **Already anonymized content** (placeholders) is skipped
|
||||
|
||||
### 3. Supported Data Types
|
||||
- **Text**: Plain text documents, emails, etc.
|
||||
- **Structured Data**: CSV, JSON, XML with headers
|
||||
- **Binary Data**: Images, audio, video (framework ready, implementation pending)
|
||||
|
||||
### 4. Placeholder Protection
|
||||
- Prevents re-anonymization of already processed content
|
||||
- Uses format `[tag.uuid]` for placeholders
|
||||
- Validates placeholder format before processing
|
||||
|
||||
## Usage Example
|
||||
|
||||
```python
|
||||
from modules.neutralizer import DataAnonymizer
|
||||
|
||||
# Initialize with custom names
|
||||
anonymizer = DataAnonymizer(names_to_parse=['John Doe', 'Jane Smith'])
|
||||
|
||||
# Process content (auto-detects type)
|
||||
result = anonymizer.process_content(content, content_type='text')
|
||||
|
||||
# Or specify content type explicitly
|
||||
result = anonymizer.process_content(content, content_type='csv')
|
||||
|
||||
# Get mapping of original values to placeholders
|
||||
mapping = anonymizer.get_mapping()
|
||||
```
|
||||
|
||||
## Module Dependencies
|
||||
|
||||
```
|
||||
neutralizer.py
|
||||
├── subProcessCommon.py (ProcessResult, CommonUtils)
|
||||
├── subProcessText.py (TextProcessor)
|
||||
├── subProcessList.py (ListProcessor)
|
||||
├── subProcessBinary.py (BinaryProcessor)
|
||||
└── patterns.py (Pattern definitions)
|
||||
|
||||
subProcessText.py
|
||||
└── subParseString.py (StringParser)
|
||||
|
||||
subProcessList.py
|
||||
├── subParseString.py (StringParser)
|
||||
└── patterns.py (HeaderPatterns)
|
||||
|
||||
subProcessBinary.py
|
||||
└── (standalone)
|
||||
|
||||
subParseString.py
|
||||
└── patterns.py (DataPatterns)
|
||||
```
|
||||
|
||||
## Benefits of New Structure
|
||||
|
||||
1. **Single Responsibility**: Each module has one clear purpose
|
||||
2. **DRY Principle**: No code duplication across modules
|
||||
3. **Testability**: Individual modules can be tested in isolation
|
||||
4. **Extensibility**: Easy to add new data types or processing methods
|
||||
5. **Maintainability**: Changes to one module don't affect others
|
||||
6. **Performance**: Specialized processors are optimized for their data types
|
||||
162
modules/neutralizer/subParseString.py
Normal file
162
modules/neutralizer/subParseString.py
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
"""
|
||||
String parsing and replacement utilities for data anonymization
|
||||
Handles pattern matching and replacement for emails, phones, addresses, IDs and names
|
||||
"""
|
||||
|
||||
import re
|
||||
import uuid
|
||||
from typing import Dict, List, Tuple, Any
|
||||
from modules.neutralizer.subPatterns import DataPatterns, find_patterns_in_text
|
||||
|
||||
class StringParser:
|
||||
"""Handles string parsing and replacement operations"""
|
||||
|
||||
def __init__(self, names_to_parse: List[str] = None):
|
||||
"""
|
||||
Initialize the string parser
|
||||
|
||||
Args:
|
||||
names_to_parse: List of names to parse and replace (case-insensitive)
|
||||
"""
|
||||
self.data_patterns = DataPatterns.patterns
|
||||
self.names_to_parse = names_to_parse or []
|
||||
self.mapping = {}
|
||||
|
||||
def is_placeholder(self, text: str) -> bool:
|
||||
"""
|
||||
Check if text is already a placeholder in format [tag.uuid]
|
||||
|
||||
Args:
|
||||
text: Text to check
|
||||
|
||||
Returns:
|
||||
bool: True if text is a placeholder
|
||||
"""
|
||||
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', text))
|
||||
|
||||
def replace_pattern_matches(self, text: str) -> str:
|
||||
"""
|
||||
Replace pattern-based matches (emails, phones, etc.) in text
|
||||
|
||||
Args:
|
||||
text: Text to process
|
||||
|
||||
Returns:
|
||||
str: Text with pattern matches replaced
|
||||
"""
|
||||
pattern_matches = find_patterns_in_text(text, self.data_patterns)
|
||||
|
||||
# Process pattern matches from right to left to avoid position shifts
|
||||
for pattern_name, matched_text, start, end in reversed(pattern_matches):
|
||||
# Skip if already a placeholder
|
||||
if self.is_placeholder(matched_text):
|
||||
continue
|
||||
|
||||
# Skip if contains placeholder characters
|
||||
if '[' in matched_text or ']' in matched_text:
|
||||
continue
|
||||
|
||||
if matched_text not in self.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
# Create placeholder in format [type.uuid]
|
||||
type_mapping = {
|
||||
'email': 'email',
|
||||
'phone': 'phone',
|
||||
'address': 'address',
|
||||
'id': 'id'
|
||||
}
|
||||
placeholder_type = type_mapping.get(pattern_name, 'data')
|
||||
self.mapping[matched_text] = f"[{placeholder_type}.{placeholder_id}]"
|
||||
|
||||
replacement = self.mapping[matched_text]
|
||||
text = text[:start] + replacement + text[end:]
|
||||
|
||||
return text
|
||||
|
||||
def replace_custom_names(self, text: str) -> str:
|
||||
"""
|
||||
Replace custom names from the user list in text
|
||||
|
||||
Args:
|
||||
text: Text to process
|
||||
|
||||
Returns:
|
||||
str: Text with custom names replaced
|
||||
"""
|
||||
for name in self.names_to_parse:
|
||||
if not name.strip():
|
||||
continue
|
||||
|
||||
# Create case-insensitive regex pattern with word boundaries
|
||||
pattern = re.compile(r'\b' + re.escape(name.strip()) + r'\b', re.IGNORECASE)
|
||||
|
||||
# Find all matches for this name
|
||||
matches = list(pattern.finditer(text))
|
||||
|
||||
# Replace each match with a placeholder
|
||||
for match in reversed(matches): # Process from right to left to avoid position shifts
|
||||
matched_text = match.group()
|
||||
if matched_text not in self.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
self.mapping[matched_text] = f"[name.{placeholder_id}]"
|
||||
|
||||
replacement = self.mapping[matched_text]
|
||||
start, end = match.span()
|
||||
text = text[:start] + replacement + text[end:]
|
||||
|
||||
return text
|
||||
|
||||
def process_string(self, text: str) -> str:
|
||||
"""
|
||||
Process a string by replacing patterns first, then custom names
|
||||
|
||||
Args:
|
||||
text: Text to process
|
||||
|
||||
Returns:
|
||||
str: Processed text with replacements
|
||||
"""
|
||||
if self.is_placeholder(text):
|
||||
return text
|
||||
|
||||
# Step 1: Replace pattern-based matches FIRST
|
||||
text = self.replace_pattern_matches(text)
|
||||
|
||||
# Step 2: Replace custom names SECOND
|
||||
text = self.replace_custom_names(text)
|
||||
|
||||
return text
|
||||
|
||||
def process_json_value(self, value: Any) -> Any:
|
||||
"""
|
||||
Process a JSON value for anonymization
|
||||
|
||||
Args:
|
||||
value: Value to process
|
||||
|
||||
Returns:
|
||||
Any: Processed value
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
return self.process_string(value)
|
||||
elif isinstance(value, dict):
|
||||
return {k: self.process_json_value(v) for k, v in value.items()}
|
||||
elif isinstance(value, list):
|
||||
return [self.process_json_value(item) for item in value]
|
||||
else:
|
||||
return value
|
||||
|
||||
def get_mapping(self) -> Dict[str, str]:
|
||||
"""
|
||||
Get the current mapping of original values to placeholders
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: Mapping dictionary
|
||||
"""
|
||||
return self.mapping.copy()
|
||||
|
||||
def clear_mapping(self):
|
||||
"""Clear the current mapping"""
|
||||
self.mapping.clear()
|
||||
101
modules/neutralizer/subProcessBinary.py
Normal file
101
modules/neutralizer/subProcessBinary.py
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
"""
|
||||
Binary data processing module for data anonymization
|
||||
Handles binary data types (images, audio, video, etc.)
|
||||
"""
|
||||
|
||||
import base64
|
||||
import re
|
||||
from typing import Dict, Any, Tuple
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class BinaryData:
|
||||
"""Repräsentiert Binärdaten"""
|
||||
content: str
|
||||
data_type: str # 'image', 'audio', 'video', 'document', 'unknown'
|
||||
encoding: str # 'base64', 'hex', 'raw'
|
||||
|
||||
class BinaryProcessor:
|
||||
"""Handles binary data processing for anonymization"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the binary processor"""
|
||||
self.supported_types = {
|
||||
'image': ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'],
|
||||
'audio': ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a'],
|
||||
'video': ['.mp4', '.avi', '.mov', '.wmv', '.flv', '.mkv', '.webm'],
|
||||
'document': ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx']
|
||||
}
|
||||
|
||||
def detect_binary_type(self, content: str) -> str:
|
||||
"""
|
||||
Detect if content is binary data and determine type
|
||||
|
||||
Args:
|
||||
content: Content to analyze
|
||||
|
||||
Returns:
|
||||
str: Binary type or 'text' if not binary
|
||||
"""
|
||||
# Check if content looks like base64
|
||||
if re.match(r'^[A-Za-z0-9+/]*={0,2}$', content.strip()):
|
||||
try:
|
||||
decoded = base64.b64decode(content)
|
||||
# Try to decode as text
|
||||
decoded.decode('utf-8')
|
||||
return 'text' # It's base64 encoded text
|
||||
except (base64.binascii.Error, UnicodeDecodeError):
|
||||
# It's binary data
|
||||
return 'binary'
|
||||
|
||||
# Check for binary patterns
|
||||
if len(content) > 100 and '\x00' in content:
|
||||
return 'binary'
|
||||
|
||||
return 'text'
|
||||
|
||||
def is_binary_content(self, content: str) -> bool:
|
||||
"""
|
||||
Check if content is binary data
|
||||
|
||||
Args:
|
||||
content: Content to check
|
||||
|
||||
Returns:
|
||||
bool: True if content is binary
|
||||
"""
|
||||
return self.detect_binary_type(content) == 'binary'
|
||||
|
||||
def process_binary_content(self, content: str) -> Tuple[Any, Dict[str, str], list, Dict[str, Any]]:
|
||||
"""
|
||||
Process binary content for anonymization
|
||||
|
||||
Args:
|
||||
content: Binary content to process
|
||||
|
||||
Returns:
|
||||
Tuple of (processed_data, mapping, replaced_fields, processed_info)
|
||||
"""
|
||||
# TODO: Implement binary data neutralization
|
||||
# This would require:
|
||||
# 1. Detecting binary data types (images, audio, video, etc.)
|
||||
# 2. Implementing specific neutralization for each type
|
||||
# 3. Handling metadata and embedded content
|
||||
# 4. Preserving binary integrity while removing sensitive data
|
||||
|
||||
processed_info = {
|
||||
'type': 'binary',
|
||||
'status': 'not_implemented',
|
||||
'message': 'Binary data neutralization not yet implemented'
|
||||
}
|
||||
|
||||
return content, {}, [], processed_info
|
||||
|
||||
def get_supported_types(self) -> Dict[str, list]:
|
||||
"""
|
||||
Get list of supported binary file types
|
||||
|
||||
Returns:
|
||||
Dict[str, list]: Dictionary of supported types and their extensions
|
||||
"""
|
||||
return self.supported_types.copy()
|
||||
143
modules/neutralizer/subProcessCommon.py
Normal file
143
modules/neutralizer/subProcessCommon.py
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
"""
|
||||
Common processing utilities for data anonymization
|
||||
Shared functions and data structures
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Dict, List, Any, Union, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class ProcessResult:
|
||||
"""Result of content processing"""
|
||||
data: Any
|
||||
mapping: Dict[str, str]
|
||||
replaced_fields: List[str]
|
||||
processed_info: Dict[str, Any] # Additional processing information
|
||||
|
||||
class CommonUtils:
|
||||
"""Common utility functions for data processing"""
|
||||
|
||||
@staticmethod
|
||||
def normalize_whitespace(text: str) -> str:
|
||||
"""
|
||||
Normalize whitespace in text
|
||||
|
||||
Args:
|
||||
text: Text to normalize
|
||||
|
||||
Returns:
|
||||
str: Normalized text
|
||||
"""
|
||||
text = re.sub(r'\s+', ' ', text)
|
||||
text = text.replace('\r\n', '\n').replace('\r', '\n')
|
||||
return text.strip()
|
||||
|
||||
@staticmethod
|
||||
def is_table_line(line: str) -> bool:
|
||||
"""
|
||||
Check if a line represents a table row
|
||||
|
||||
Args:
|
||||
line: Line to check
|
||||
|
||||
Returns:
|
||||
bool: True if line is a table row
|
||||
"""
|
||||
return bool(re.match(r'^\s*[^:]+:\s*[^:]+$', line) or
|
||||
re.match(r'^\s*[^\t]+\t[^\t]+$', line))
|
||||
|
||||
@staticmethod
|
||||
def detect_content_type(content: str) -> str:
|
||||
"""
|
||||
Detect the type of content based on its structure
|
||||
|
||||
Args:
|
||||
content: Content to analyze
|
||||
|
||||
Returns:
|
||||
str: Content type ('csv', 'json', 'xml', 'text', 'binary')
|
||||
"""
|
||||
content = content.strip()
|
||||
|
||||
# Check for JSON
|
||||
if content.startswith('{') and content.endswith('}'):
|
||||
return 'json'
|
||||
if content.startswith('[') and content.endswith(']'):
|
||||
return 'json'
|
||||
|
||||
# Check for XML
|
||||
if content.startswith('<') and content.endswith('>'):
|
||||
return 'xml'
|
||||
|
||||
# Check for CSV (has commas and newlines)
|
||||
if ',' in content and '\n' in content:
|
||||
lines = content.split('\n')
|
||||
if len(lines) > 1 and all(',' in line for line in lines[:3]):
|
||||
return 'csv'
|
||||
|
||||
# Check for binary
|
||||
if len(content) > 100 and '\x00' in content:
|
||||
return 'binary'
|
||||
|
||||
# Default to text
|
||||
return 'text'
|
||||
|
||||
@staticmethod
|
||||
def merge_mappings(*mappings: Dict[str, str]) -> Dict[str, str]:
|
||||
"""
|
||||
Merge multiple mapping dictionaries
|
||||
|
||||
Args:
|
||||
*mappings: Mapping dictionaries to merge
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: Merged mapping dictionary
|
||||
"""
|
||||
merged = {}
|
||||
for mapping in mappings:
|
||||
merged.update(mapping)
|
||||
return merged
|
||||
|
||||
@staticmethod
|
||||
def create_placeholder(placeholder_type: str, placeholder_id: str) -> str:
|
||||
"""
|
||||
Create a placeholder string in the format [type.uuid]
|
||||
|
||||
Args:
|
||||
placeholder_type: Type of placeholder (email, phone, name, etc.)
|
||||
placeholder_id: Unique identifier for the placeholder
|
||||
|
||||
Returns:
|
||||
str: Formatted placeholder string
|
||||
"""
|
||||
return f"[{placeholder_type}.{placeholder_id}]"
|
||||
|
||||
@staticmethod
|
||||
def validate_placeholder(placeholder: str) -> bool:
|
||||
"""
|
||||
Validate if a string is a valid placeholder
|
||||
|
||||
Args:
|
||||
placeholder: String to validate
|
||||
|
||||
Returns:
|
||||
bool: True if valid placeholder
|
||||
"""
|
||||
return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', placeholder))
|
||||
|
||||
@staticmethod
|
||||
def extract_placeholder_info(placeholder: str) -> Optional[tuple]:
|
||||
"""
|
||||
Extract type and ID from a placeholder
|
||||
|
||||
Args:
|
||||
placeholder: Placeholder string
|
||||
|
||||
Returns:
|
||||
Optional[tuple]: (type, id) or None if invalid
|
||||
"""
|
||||
match = re.match(r'^\[([a-z]+)\.([a-f0-9-]+)\]$', placeholder)
|
||||
if match:
|
||||
return match.group(1), match.group(2)
|
||||
return None
|
||||
279
modules/neutralizer/subProcessList.py
Normal file
279
modules/neutralizer/subProcessList.py
Normal file
|
|
@ -0,0 +1,279 @@
|
|||
"""
|
||||
List processing module for data anonymization
|
||||
Handles structured data with headers (CSV, JSON, XML)
|
||||
"""
|
||||
|
||||
import json
|
||||
import pandas as pd
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import Dict, List, Any, Union
|
||||
from dataclasses import dataclass
|
||||
from io import StringIO
|
||||
from modules.neutralizer.subParseString import StringParser
|
||||
from modules.neutralizer.subPatterns import get_pattern_for_header, HeaderPatterns
|
||||
|
||||
@dataclass
|
||||
class TableData:
|
||||
"""Repräsentiert Tabellendaten"""
|
||||
headers: List[str]
|
||||
rows: List[List[str]]
|
||||
source_type: str # 'csv', 'json', 'xml', 'text_table'
|
||||
|
||||
class ListProcessor:
|
||||
"""Handles structured data processing with headers for anonymization"""
|
||||
|
||||
def __init__(self, names_to_parse: List[str] = None):
|
||||
"""
|
||||
Initialize the list processor
|
||||
|
||||
Args:
|
||||
names_to_parse: List of names to parse and replace
|
||||
"""
|
||||
self.string_parser = StringParser(names_to_parse)
|
||||
self.header_patterns = HeaderPatterns.patterns
|
||||
|
||||
def anonymize_table(self, table: TableData) -> TableData:
|
||||
"""
|
||||
Anonymize table data based on headers
|
||||
|
||||
Args:
|
||||
table: TableData object to anonymize
|
||||
|
||||
Returns:
|
||||
TableData: Anonymized table
|
||||
"""
|
||||
anonymized_table = TableData(
|
||||
headers=table.headers.copy(),
|
||||
rows=[row.copy() for row in table.rows],
|
||||
source_type=table.source_type
|
||||
)
|
||||
|
||||
for i, header in enumerate(anonymized_table.headers):
|
||||
pattern = get_pattern_for_header(header, self.header_patterns)
|
||||
if pattern:
|
||||
for row in anonymized_table.rows:
|
||||
if row[i] is not None:
|
||||
original = str(row[i])
|
||||
if original not in self.string_parser.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
self.string_parser.mapping[original] = pattern.replacement_template.format(len(self.string_parser.mapping) + 1)
|
||||
row[i] = self.string_parser.mapping[original]
|
||||
|
||||
return anonymized_table
|
||||
|
||||
def process_csv_content(self, content: str) -> tuple:
|
||||
"""
|
||||
Process CSV content
|
||||
|
||||
Args:
|
||||
content: CSV content to process
|
||||
|
||||
Returns:
|
||||
Tuple of (processed_data, mapping, replaced_fields, processed_info)
|
||||
"""
|
||||
df = pd.read_csv(StringIO(content), encoding='utf-8')
|
||||
table = TableData(
|
||||
headers=df.columns.tolist(),
|
||||
rows=df.values.tolist(),
|
||||
source_type='csv'
|
||||
)
|
||||
|
||||
if not table.rows:
|
||||
return None, self.string_parser.get_mapping(), [], {'type': 'table', 'headers': table.headers, 'row_count': 0}
|
||||
|
||||
anonymized_table = self.anonymize_table(table)
|
||||
|
||||
# Track replaced fields
|
||||
replaced_fields = []
|
||||
for i, header in enumerate(anonymized_table.headers):
|
||||
for orig_row, anon_row in zip(table.rows, anonymized_table.rows):
|
||||
if anon_row[i] != orig_row[i]:
|
||||
replaced_fields.append(header)
|
||||
|
||||
# Convert back to DataFrame
|
||||
result = pd.DataFrame(anonymized_table.rows, columns=anonymized_table.headers)
|
||||
|
||||
processed_info = {
|
||||
'type': 'table',
|
||||
'headers': table.headers,
|
||||
'row_count': len(table.rows)
|
||||
}
|
||||
|
||||
return result, self.string_parser.get_mapping(), replaced_fields, processed_info
|
||||
|
||||
def process_json_content(self, content: str) -> tuple:
|
||||
"""
|
||||
Process JSON content
|
||||
|
||||
Args:
|
||||
content: JSON content to process
|
||||
|
||||
Returns:
|
||||
Tuple of (processed_data, mapping, replaced_fields, processed_info)
|
||||
"""
|
||||
data = json.loads(content)
|
||||
|
||||
# Process JSON recursively using string parser
|
||||
result = self.string_parser.process_json_value(data)
|
||||
|
||||
processed_info = {'type': 'json'}
|
||||
|
||||
return result, self.string_parser.get_mapping(), [], processed_info
|
||||
|
||||
def anonymize_xml_element(self, element: ET.Element, indent: str = '') -> str:
|
||||
"""
|
||||
Recursively process XML element and return formatted string
|
||||
|
||||
Args:
|
||||
element: XML element to process
|
||||
indent: Current indentation level
|
||||
|
||||
Returns:
|
||||
Formatted XML string
|
||||
"""
|
||||
# Process attributes
|
||||
processed_attrs = {}
|
||||
for attr_name, attr_value in element.attrib.items():
|
||||
# Check if attribute name matches any header patterns
|
||||
pattern = get_pattern_for_header(attr_name, self.header_patterns)
|
||||
if pattern:
|
||||
if attr_value not in self.string_parser.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
# Create placeholder in format [type.uuid]
|
||||
type_mapping = {
|
||||
'email': 'email',
|
||||
'phone': 'phone',
|
||||
'name': 'name',
|
||||
'address': 'address',
|
||||
'id': 'id'
|
||||
}
|
||||
placeholder_type = type_mapping.get(pattern.name, 'data')
|
||||
self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
|
||||
processed_attrs[attr_name] = self.string_parser.mapping[attr_value]
|
||||
else:
|
||||
# Check if attribute value matches any data patterns
|
||||
from modules.neutralizer.subPatterns import find_patterns_in_text, DataPatterns
|
||||
matches = find_patterns_in_text(attr_value, DataPatterns.patterns)
|
||||
if matches:
|
||||
pattern_name = matches[0][0]
|
||||
pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None)
|
||||
if pattern:
|
||||
if attr_value not in self.string_parser.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
# Create placeholder in format [type.uuid]
|
||||
type_mapping = {
|
||||
'email': 'email',
|
||||
'phone': 'phone',
|
||||
'name': 'name',
|
||||
'address': 'address',
|
||||
'id': 'id'
|
||||
}
|
||||
placeholder_type = type_mapping.get(pattern_name, 'data')
|
||||
self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]"
|
||||
processed_attrs[attr_name] = self.string_parser.mapping[attr_value]
|
||||
else:
|
||||
processed_attrs[attr_name] = attr_value
|
||||
else:
|
||||
processed_attrs[attr_name] = attr_value
|
||||
|
||||
attrs = ' '.join(f'{k}="{v}"' for k, v in processed_attrs.items())
|
||||
attrs = f' {attrs}' if attrs else ''
|
||||
|
||||
# Process text content
|
||||
text = element.text.strip() if element.text and element.text.strip() else ''
|
||||
if text:
|
||||
# Skip if already a placeholder
|
||||
if not self.string_parser.is_placeholder(text):
|
||||
# Check if text matches any patterns
|
||||
from modules.neutralizer.subPatterns import find_patterns_in_text, DataPatterns
|
||||
pattern_matches = find_patterns_in_text(text, DataPatterns.patterns)
|
||||
|
||||
if pattern_matches:
|
||||
pattern_name = pattern_matches[0][0]
|
||||
pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None)
|
||||
if pattern:
|
||||
if text not in self.string_parser.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
# Create placeholder in format [type.uuid]
|
||||
type_mapping = {
|
||||
'email': 'email',
|
||||
'phone': 'phone',
|
||||
'name': 'name',
|
||||
'address': 'address',
|
||||
'id': 'id'
|
||||
}
|
||||
placeholder_type = type_mapping.get(pattern_name, 'data')
|
||||
self.string_parser.mapping[text] = f"[{placeholder_type}.{placeholder_id}]"
|
||||
text = self.string_parser.mapping[text]
|
||||
else:
|
||||
# Check if text matches any custom names from the user list
|
||||
for name in self.string_parser.names_to_parse:
|
||||
if not name.strip():
|
||||
continue
|
||||
if text.lower().strip() == name.lower().strip():
|
||||
if text not in self.string_parser.mapping:
|
||||
# Generate a UUID for the placeholder
|
||||
import uuid
|
||||
placeholder_id = str(uuid.uuid4())
|
||||
self.string_parser.mapping[text] = f"[name.{placeholder_id}]"
|
||||
text = self.string_parser.mapping[text]
|
||||
break
|
||||
|
||||
# Process child elements
|
||||
children = []
|
||||
for child in element:
|
||||
child_str = self.anonymize_xml_element(child, indent + ' ')
|
||||
children.append(child_str)
|
||||
|
||||
# Build element string
|
||||
if not children and not text:
|
||||
return f"{indent}<{element.tag}{attrs}/>"
|
||||
elif not children:
|
||||
return f"{indent}<{element.tag}{attrs}>{text}</{element.tag}>"
|
||||
else:
|
||||
result = [f"{indent}<{element.tag}{attrs}>"]
|
||||
if text:
|
||||
result.append(f"{indent} {text}")
|
||||
result.extend(children)
|
||||
result.append(f"{indent}</{element.tag}>")
|
||||
return '\n'.join(result)
|
||||
|
||||
def process_xml_content(self, content: str) -> tuple:
|
||||
"""
|
||||
Process XML content
|
||||
|
||||
Args:
|
||||
content: XML content to process
|
||||
|
||||
Returns:
|
||||
Tuple of (processed_data, mapping, replaced_fields, processed_info)
|
||||
"""
|
||||
root = ET.fromstring(content)
|
||||
|
||||
# Process XML recursively with proper formatting
|
||||
result = self.anonymize_xml_element(root)
|
||||
|
||||
processed_info = {'type': 'xml'}
|
||||
|
||||
return result, self.string_parser.get_mapping(), [], processed_info
|
||||
|
||||
def get_mapping(self) -> Dict[str, str]:
|
||||
"""
|
||||
Get the current mapping of original values to placeholders
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: Mapping dictionary
|
||||
"""
|
||||
return self.string_parser.get_mapping()
|
||||
|
||||
def clear_mapping(self):
|
||||
"""Clear the current mapping"""
|
||||
self.string_parser.clear_mapping()
|
||||
101
modules/neutralizer/subProcessText.py
Normal file
101
modules/neutralizer/subProcessText.py
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
"""
|
||||
Text processing module for data anonymization
|
||||
Handles plain text processing without header information
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Any
|
||||
from dataclasses import dataclass
|
||||
from modules.neutralizer.subParseString import StringParser
|
||||
|
||||
@dataclass
|
||||
class PlainText:
|
||||
"""Repräsentiert normalen Text"""
|
||||
content: str
|
||||
source_type: str # 'txt', 'docx', 'text_plain'
|
||||
|
||||
class TextProcessor:
|
||||
"""Handles plain text processing for anonymization"""
|
||||
|
||||
def __init__(self, names_to_parse: List[str] = None):
|
||||
"""
|
||||
Initialize the text processor
|
||||
|
||||
Args:
|
||||
names_to_parse: List of names to parse and replace
|
||||
"""
|
||||
self.string_parser = StringParser(names_to_parse)
|
||||
|
||||
def extract_tables_from_text(self, content: str) -> tuple:
|
||||
"""
|
||||
Extract tables and plain text from content
|
||||
|
||||
Args:
|
||||
content: Content to process
|
||||
|
||||
Returns:
|
||||
Tuple of (list of tables, list of plain text sections)
|
||||
"""
|
||||
# For now, process the entire content as plain text
|
||||
# This can be extended later to detect table-like structures
|
||||
tables = []
|
||||
plain_texts = [PlainText(content=content, source_type='text_plain')]
|
||||
|
||||
return tables, plain_texts
|
||||
|
||||
def anonymize_plain_text(self, text: PlainText) -> PlainText:
|
||||
"""
|
||||
Anonymize plain text content
|
||||
|
||||
Args:
|
||||
text: PlainText object to anonymize
|
||||
|
||||
Returns:
|
||||
PlainText: Anonymized text
|
||||
"""
|
||||
# Use the string parser to process the content
|
||||
anonymized_content = self.string_parser.process_string(text.content)
|
||||
|
||||
return PlainText(content=anonymized_content, source_type=text.source_type)
|
||||
|
||||
def process_text_content(self, content: str) -> tuple:
|
||||
"""
|
||||
Process text content and return anonymized data
|
||||
|
||||
Args:
|
||||
content: Text content to process
|
||||
|
||||
Returns:
|
||||
Tuple of (anonymized_content, mapping, replaced_fields, processed_info)
|
||||
"""
|
||||
# Extract tables and plain text sections
|
||||
tables, plain_texts = self.extract_tables_from_text(content)
|
||||
|
||||
# Process plain text sections
|
||||
anonymized_texts = [self.anonymize_plain_text(text) for text in plain_texts]
|
||||
|
||||
# Combine all processed content
|
||||
result = content
|
||||
for text, anonymized_text in zip(plain_texts, anonymized_texts):
|
||||
if text.content != anonymized_text.content:
|
||||
result = result.replace(text.content, anonymized_text.content)
|
||||
|
||||
# Get processing information
|
||||
processed_info = {
|
||||
'type': 'text',
|
||||
'tables': [{'headers': t.headers, 'row_count': len(t.rows)} for t in tables] if hasattr(tables[0], 'headers') else []
|
||||
}
|
||||
|
||||
return result, self.string_parser.get_mapping(), [], processed_info
|
||||
|
||||
def get_mapping(self) -> Dict[str, str]:
|
||||
"""
|
||||
Get the current mapping of original values to placeholders
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: Mapping dictionary
|
||||
"""
|
||||
return self.string_parser.get_mapping()
|
||||
|
||||
def clear_mapping(self):
|
||||
"""Clear the current mapping"""
|
||||
self.string_parser.clear_mapping()
|
||||
|
|
@ -18,7 +18,7 @@ import modules.interfaces.interfaceComponentObjects as interfaceComponentObjects
|
|||
from modules.interfaces.interfaceComponentModel import FileItem, FilePreview
|
||||
from modules.shared.attributeUtils import getModelAttributeDefinitions, AttributeResponse, AttributeDefinition
|
||||
from modules.interfaces.interfaceAppModel import User, DataNeutraliserConfig, DataNeutralizerAttributes
|
||||
from modules.services.serviceNeutralization import NeutralizationService
|
||||
from modules.features.featureNeutralizePlayground import NeutralizationService
|
||||
|
||||
# Configure logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
|
|||
|
|
@ -590,6 +590,20 @@ async def logout(
|
|||
try:
|
||||
appInterface = getInterface(currentUser)
|
||||
appInterface.logout()
|
||||
|
||||
# Log successful logout
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_user_access(
|
||||
user_id=str(currentUser.id),
|
||||
mandate_id=str(currentUser.mandateId),
|
||||
action="logout",
|
||||
success_info="google_auth_logout"
|
||||
)
|
||||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
pass
|
||||
|
||||
return {"message": "Logged out successfully"}
|
||||
except Exception as e:
|
||||
logger.error(f"Error during logout: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -124,6 +124,19 @@ async def login(
|
|||
# Save access token
|
||||
userInterface.saveAccessToken(token)
|
||||
|
||||
# Log successful login
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_user_access(
|
||||
user_id=str(user.id),
|
||||
mandate_id=str(user.mandateId),
|
||||
action="login",
|
||||
success_info="local_auth_success"
|
||||
)
|
||||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
pass
|
||||
|
||||
# Create response data
|
||||
response_data = {
|
||||
"type": "local_auth_success",
|
||||
|
|
@ -138,6 +151,20 @@ async def login(
|
|||
# Handle authentication errors
|
||||
error_msg = str(e)
|
||||
logger.warning(f"Authentication failed for user {formData.username}: {error_msg}")
|
||||
|
||||
# Log failed login attempt
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_user_access(
|
||||
user_id="unknown",
|
||||
mandate_id="unknown",
|
||||
action="login",
|
||||
success_info=f"failed: {error_msg}"
|
||||
)
|
||||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
pass
|
||||
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail=error_msg,
|
||||
|
|
@ -253,6 +280,19 @@ async def logout(request: Request, currentUser: User = Depends(getCurrentUser))
|
|||
appInterface.revokeTokenById(jti, revokedBy=currentUser.id, reason="logout")
|
||||
revoked = 1
|
||||
|
||||
# Log successful logout
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_user_access(
|
||||
user_id=str(currentUser.id),
|
||||
mandate_id=str(currentUser.mandateId),
|
||||
action="logout",
|
||||
success_info=f"revoked_tokens: {revoked}"
|
||||
)
|
||||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
pass
|
||||
|
||||
return JSONResponse({
|
||||
"message": "Successfully logged out",
|
||||
"revokedTokens": revoked
|
||||
|
|
|
|||
|
|
@ -463,6 +463,20 @@ async def logout(
|
|||
try:
|
||||
appInterface = getInterface(currentUser)
|
||||
appInterface.logout()
|
||||
|
||||
# Log successful logout
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_user_access(
|
||||
user_id=str(currentUser.id),
|
||||
mandate_id=str(currentUser.mandateId),
|
||||
action="logout",
|
||||
success_info="microsoft_auth_logout"
|
||||
)
|
||||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
pass
|
||||
|
||||
return {"message": "Logged out successfully"}
|
||||
except Exception as e:
|
||||
logger.error(f"Error during logout: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -161,12 +161,12 @@ async def realtime_interpreter(
|
|||
logger.info(f"📊 Audio file size: {len(audio_content)} bytes")
|
||||
|
||||
# Save audio file for debugging with correct extension
|
||||
file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav"
|
||||
debug_filename = f"debug_audio/audio_google_{audio_file.filename.replace('.wav', '.webm')}"
|
||||
os.makedirs("debug_audio", exist_ok=True)
|
||||
with open(debug_filename, "wb") as f:
|
||||
f.write(audio_content)
|
||||
logger.info(f"💾 Saved audio file for debugging: {debug_filename}")
|
||||
# file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav"
|
||||
# debug_filename = f"debug_audio/audio_google_{audio_file.filename.replace('.wav', '.webm')}"
|
||||
# os.makedirs("debug_audio", exist_ok=True)
|
||||
# with open(debug_filename, "wb") as f:
|
||||
# f.write(audio_content)
|
||||
# logger.info(f"💾 Saved audio file for debugging: {debug_filename}")
|
||||
|
||||
# Validate audio format
|
||||
connector = get_google_speech_connector()
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ from modules.interfaces.interfaceAppObjects import getRootInterface
|
|||
from modules.interfaces.interfaceAppModel import User, AuthAuthority, Token
|
||||
|
||||
# Get Config Data
|
||||
SECRET_KEY = APP_CONFIG.get("APP_JWT_SECRET_SECRET")
|
||||
SECRET_KEY = APP_CONFIG.get("APP_JWT_KEY_SECRET")
|
||||
ALGORITHM = APP_CONFIG.get("Auth_ALGORITHM")
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES = int(APP_CONFIG.get("APP_TOKEN_EXPIRY"))
|
||||
REFRESH_TOKEN_EXPIRE_DAYS = int(APP_CONFIG.get("APP_REFRESH_TOKEN_EXPIRY", "7"))
|
||||
|
|
|
|||
202
modules/shared/auditLogger.py
Normal file
202
modules/shared/auditLogger.py
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
"""
|
||||
Audit Logging System for PowerOn Gateway
|
||||
|
||||
This module provides centralized audit logging functionality for security events,
|
||||
user actions, and system access patterns.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
|
||||
class DailyRotatingFileHandler(RotatingFileHandler):
|
||||
"""
|
||||
A rotating file handler that automatically switches to a new file when the date changes.
|
||||
The log file name includes the current date and switches at midnight.
|
||||
"""
|
||||
|
||||
def __init__(self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs):
|
||||
self.log_dir = log_dir
|
||||
self.filename_prefix = filename_prefix
|
||||
self.current_date = None
|
||||
self.current_file = None
|
||||
|
||||
# Initialize with today's file
|
||||
self._update_file_if_needed()
|
||||
|
||||
# Call parent constructor with current file
|
||||
super().__init__(self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs)
|
||||
|
||||
def _update_file_if_needed(self):
|
||||
"""Update the log file if the date has changed"""
|
||||
today = datetime.now().strftime("%Y%m%d")
|
||||
|
||||
if self.current_date != today:
|
||||
self.current_date = today
|
||||
new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log")
|
||||
|
||||
if self.current_file != new_file:
|
||||
self.current_file = new_file
|
||||
return True
|
||||
return False
|
||||
|
||||
def emit(self, record):
|
||||
"""Emit a log record, switching files if date has changed"""
|
||||
# Check if we need to switch to a new file
|
||||
if self._update_file_if_needed():
|
||||
# Close current file and open new one
|
||||
if self.stream:
|
||||
self.stream.close()
|
||||
self.stream = None
|
||||
|
||||
# Update the baseFilename for the parent class
|
||||
self.baseFilename = self.current_file
|
||||
# Reopen the stream
|
||||
if not self.delay:
|
||||
self.stream = self._open()
|
||||
|
||||
# Call parent emit method
|
||||
super().emit(record)
|
||||
|
||||
|
||||
class AuditLogger:
|
||||
"""Centralized audit logging system"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = None
|
||||
self._setup_audit_logger()
|
||||
|
||||
def _setup_audit_logger(self):
|
||||
"""Setup the audit logger with daily file rotation"""
|
||||
try:
|
||||
# Get log directory from config
|
||||
logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./")
|
||||
if not os.path.isabs(logDir):
|
||||
# If relative path, make it relative to the gateway directory
|
||||
gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
logDir = os.path.join(gatewayDir, logDir)
|
||||
|
||||
# Ensure log directory exists
|
||||
os.makedirs(logDir, exist_ok=True)
|
||||
|
||||
# Create audit logger
|
||||
self.logger = logging.getLogger('audit')
|
||||
self.logger.setLevel(logging.INFO)
|
||||
|
||||
# Remove any existing handlers to avoid duplicates
|
||||
for handler in self.logger.handlers[:]:
|
||||
self.logger.removeHandler(handler)
|
||||
|
||||
# Create daily rotating file handler for audit log
|
||||
rotationSize = int(APP_CONFIG.get("APP_LOGGING_ROTATION_SIZE", 10485760)) # Default: 10MB
|
||||
backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5))
|
||||
|
||||
fileHandler = DailyRotatingFileHandler(
|
||||
log_dir=logDir,
|
||||
filename_prefix="log_audit",
|
||||
max_bytes=rotationSize,
|
||||
backup_count=backupCount
|
||||
)
|
||||
|
||||
# Create formatter for audit log
|
||||
auditFormatter = logging.Formatter(
|
||||
fmt="%(asctime)s | %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S"
|
||||
)
|
||||
fileHandler.setFormatter(auditFormatter)
|
||||
|
||||
# Add handler to logger
|
||||
self.logger.addHandler(fileHandler)
|
||||
|
||||
# Prevent propagation to root logger
|
||||
self.logger.propagate = False
|
||||
|
||||
except Exception as e:
|
||||
# Fallback to standard logger if audit setup fails
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.logger.error(f"Failed to setup audit logger: {str(e)}")
|
||||
|
||||
def log_event(self,
|
||||
user_id: str,
|
||||
mandate_id: str,
|
||||
category: str,
|
||||
action: str,
|
||||
details: str = "",
|
||||
timestamp: Optional[datetime] = None) -> None:
|
||||
"""
|
||||
Log an audit event
|
||||
|
||||
Args:
|
||||
user_id: User identifier
|
||||
mandate_id: Mandate identifier (can be empty if not applicable)
|
||||
category: Event category (e.g., 'key', 'access', 'data')
|
||||
action: Specific action (e.g., 'decode', 'login', 'logout')
|
||||
details: Additional details about the event
|
||||
timestamp: Optional custom timestamp (defaults to current time)
|
||||
"""
|
||||
try:
|
||||
if not self.logger:
|
||||
return
|
||||
|
||||
# Use provided timestamp or current time
|
||||
if timestamp is None:
|
||||
timestamp = datetime.now()
|
||||
|
||||
# Format the audit log entry
|
||||
# Format: timestamp | userid | mandateid | category | action | details
|
||||
audit_entry = f"{user_id} | {mandate_id} | {category} | {action} | {details}"
|
||||
|
||||
# Log the event
|
||||
self.logger.info(audit_entry)
|
||||
|
||||
except Exception as e:
|
||||
# Use standard logger as fallback
|
||||
logging.getLogger(__name__).error(f"Failed to log audit event: {str(e)}")
|
||||
|
||||
def log_key_access(self, user_id: str, mandate_id: str, key_name: str, action: str) -> None:
|
||||
"""Log key access events (decode/encode)"""
|
||||
self.log_event(
|
||||
user_id=user_id,
|
||||
mandate_id=mandate_id,
|
||||
category="key",
|
||||
action=action,
|
||||
details=key_name
|
||||
)
|
||||
|
||||
def log_user_access(self, user_id: str, mandate_id: str, action: str, success_info: str = "") -> None:
|
||||
"""Log user access events (login/logout)"""
|
||||
self.log_event(
|
||||
user_id=user_id,
|
||||
mandate_id=mandate_id,
|
||||
category="access",
|
||||
action=action,
|
||||
details=success_info
|
||||
)
|
||||
|
||||
def log_data_access(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None:
|
||||
"""Log data access events"""
|
||||
self.log_event(
|
||||
user_id=user_id,
|
||||
mandate_id=mandate_id,
|
||||
category="data",
|
||||
action=action,
|
||||
details=details
|
||||
)
|
||||
|
||||
def log_security_event(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None:
|
||||
"""Log security-related events"""
|
||||
self.log_event(
|
||||
user_id=user_id,
|
||||
mandate_id=mandate_id,
|
||||
category="security",
|
||||
action=action,
|
||||
details=details
|
||||
)
|
||||
|
||||
|
||||
# Global audit logger instance
|
||||
audit_logger = AuditLogger()
|
||||
|
|
@ -7,8 +7,14 @@ config.ini files and environment variables stored in .env files, using a flat st
|
|||
|
||||
import os
|
||||
import logging
|
||||
import json
|
||||
import base64
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
from pathlib import Path
|
||||
from cryptography.fernet import Fernet
|
||||
from cryptography.hazmat.primitives import hashes
|
||||
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
|
||||
|
||||
# Set up basic logging for configuration loading
|
||||
logging.basicConfig(
|
||||
|
|
@ -119,21 +125,44 @@ class Configuration:
|
|||
|
||||
try:
|
||||
with open(envPath, 'r') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
# Skip empty lines and comments
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
lines = f.readlines()
|
||||
|
||||
# Parse key-value pairs
|
||||
if '=' in line:
|
||||
key, value = line.split('=', 1)
|
||||
key = key.strip()
|
||||
value = value.strip()
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
# Add directly to data dictionary
|
||||
# Skip empty lines and comments
|
||||
if not line or line.startswith('#'):
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Parse key-value pairs
|
||||
if '=' in line:
|
||||
key, value = line.split('=', 1)
|
||||
key = key.strip()
|
||||
value = value.strip()
|
||||
|
||||
# Check if value starts with { (JSON object)
|
||||
if value.startswith('{'):
|
||||
# Collect all lines until we find the closing }
|
||||
json_lines = [value]
|
||||
i += 1
|
||||
brace_count = value.count('{') - value.count('}')
|
||||
|
||||
while i < len(lines) and brace_count > 0:
|
||||
json_lines.append(lines[i].rstrip('\n'))
|
||||
brace_count += lines[i].count('{') - lines[i].count('}')
|
||||
i += 1
|
||||
|
||||
# Join all lines and create the full JSON value
|
||||
full_json_value = '\n'.join(json_lines)
|
||||
self._data[key] = full_json_value
|
||||
else:
|
||||
# Single line value
|
||||
self._data[key] = value
|
||||
|
||||
i += 1
|
||||
|
||||
logger.info(f"Loaded environment variables from {envPath.absolute()}")
|
||||
|
||||
# Also load system environment variables (don't override existing)
|
||||
|
|
@ -158,7 +187,7 @@ class Configuration:
|
|||
logger.info("Environment file has changed, reloading...")
|
||||
self._loadEnv()
|
||||
|
||||
def get(self, key: str, default: Any = None) -> Any:
|
||||
def get(self, key: str, default: Any = None, user_id: str = "system") -> Any:
|
||||
"""Get configuration value with optional default"""
|
||||
self.checkForUpdates() # Check for file changes
|
||||
|
||||
|
|
@ -166,10 +195,24 @@ class Configuration:
|
|||
value = self._data[key]
|
||||
# Handle secrets (keys ending with _SECRET)
|
||||
if key.endswith("_SECRET"):
|
||||
return handleSecret(value)
|
||||
# Handle JSON secrets (keys ending with _API_KEY that contain JSON)
|
||||
elif key.endswith("_API_KEY") and value.startswith("{"):
|
||||
return handleJsonSecret(value)
|
||||
# Log audit event for secret key access
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_key_access(
|
||||
user_id=user_id,
|
||||
mandate_id="system",
|
||||
key_name=key,
|
||||
action="decode"
|
||||
)
|
||||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
pass
|
||||
|
||||
if value.startswith("{") and value.endswith("}"):
|
||||
# Handle JSON secrets (keys ending with _API_KEY that contain JSON)
|
||||
return handleSecretJson(value, user_id, key)
|
||||
else:
|
||||
return handleSecretText(value, user_id, key)
|
||||
return value
|
||||
return default
|
||||
|
||||
|
|
@ -177,7 +220,7 @@ class Configuration:
|
|||
"""Enable attribute-style access to configuration"""
|
||||
self.checkForUpdates() # Check for file changes
|
||||
|
||||
value = self.get(name)
|
||||
value = self.get(name, user_id="system")
|
||||
if value is None:
|
||||
raise AttributeError(f"Configuration key '{name}' not found")
|
||||
return value
|
||||
|
|
@ -191,42 +234,306 @@ class Configuration:
|
|||
"""Set a configuration value (for testing/overrides)"""
|
||||
self._data[key] = value
|
||||
|
||||
def handleSecret(value: str) -> str:
|
||||
def handleSecretText(value: str, user_id: str = "system", key_name: str = "unknown") -> str:
|
||||
"""
|
||||
Handle secret values. Currently just returns the plain text value,
|
||||
but can be enhanced to provide actual decryption in the future.
|
||||
Handle secret values with encryption/decryption support.
|
||||
|
||||
Args:
|
||||
value: The secret value to handle
|
||||
value: The secret value to handle (may be encrypted)
|
||||
user_id: The user ID making the request (default: "system")
|
||||
key_name: The name of the key being decrypted (default: "unknown")
|
||||
|
||||
Returns:
|
||||
str: Processed secret value
|
||||
str: Processed secret value (decrypted if encrypted)
|
||||
"""
|
||||
# For now, just return the value as-is
|
||||
# In the future, this could be enhanced to decrypt values
|
||||
if _is_encrypted_value(value):
|
||||
return decrypt_value(value, user_id, key_name)
|
||||
return value
|
||||
|
||||
def handleJsonSecret(value: str) -> str:
|
||||
def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unknown") -> str:
|
||||
"""
|
||||
Handle JSON secret values (like Google service account keys).
|
||||
Validates that the value is valid JSON.
|
||||
Handle JSON secret values (like Google service account keys) with encryption/decryption support.
|
||||
Validates that the value is valid JSON after decryption.
|
||||
|
||||
Args:
|
||||
value: The JSON secret value to handle
|
||||
value: The JSON secret value to handle (may be encrypted)
|
||||
user_id: The user ID making the request (default: "system")
|
||||
key_name: The name of the key being decrypted (default: "unknown")
|
||||
|
||||
Returns:
|
||||
str: Processed JSON secret value
|
||||
str: Processed JSON secret value (decrypted if encrypted)
|
||||
|
||||
Raises:
|
||||
ValueError: If the value is not valid JSON
|
||||
ValueError: If the value is not valid JSON after decryption
|
||||
"""
|
||||
import json
|
||||
# Decrypt if encrypted
|
||||
if _is_encrypted_value(value):
|
||||
decrypted_value = decrypt_value(value, user_id, key_name)
|
||||
else:
|
||||
decrypted_value = value
|
||||
|
||||
try:
|
||||
# Validate that it's valid JSON
|
||||
json.loads(value)
|
||||
return value
|
||||
json.loads(decrypted_value)
|
||||
return decrypted_value
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Invalid JSON in secret value: {e}")
|
||||
|
||||
# Global rate limiting tracking
|
||||
# Structure: {user_id: {key_name: [timestamps]}}
|
||||
_decryption_attempts = {}
|
||||
|
||||
def _get_master_key() -> bytes:
|
||||
"""
|
||||
Get the master key for the current environment.
|
||||
|
||||
Returns:
|
||||
bytes: The master key for encryption/decryption
|
||||
|
||||
Raises:
|
||||
ValueError: If no master key is found
|
||||
"""
|
||||
# Get the key location from config
|
||||
key_location = APP_CONFIG.get('APP_KEY_SYSVAR')
|
||||
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
|
||||
|
||||
if not key_location:
|
||||
raise ValueError("APP_KEY_SYSVAR not configured")
|
||||
|
||||
# First try to get from environment variable
|
||||
master_key = os.environ.get(key_location)
|
||||
|
||||
if master_key:
|
||||
# If found in environment, use it directly
|
||||
return master_key.encode('utf-8')
|
||||
|
||||
# If not in environment, try to read from file
|
||||
if os.path.exists(key_location):
|
||||
try:
|
||||
with open(key_location, 'r') as f:
|
||||
content = f.read().strip()
|
||||
|
||||
# Parse the key file format: env = key
|
||||
lines = content.split('\n')
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
|
||||
if '=' in line:
|
||||
key_env, key_value = line.split('=', 1)
|
||||
key_env = key_env.strip()
|
||||
key_value = key_value.strip()
|
||||
|
||||
if key_env == env_type:
|
||||
return key_value.encode('utf-8')
|
||||
|
||||
raise ValueError(f"No key found for environment '{env_type}' in {key_location}")
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Error reading key file {key_location}: {e}")
|
||||
|
||||
raise ValueError(f"Master key not found. Checked environment variable '{key_location}' and file path")
|
||||
|
||||
def _derive_encryption_key(master_key: bytes) -> bytes:
|
||||
"""
|
||||
Derive a 32-byte encryption key from the master key using PBKDF2.
|
||||
|
||||
Args:
|
||||
master_key: The master key bytes
|
||||
|
||||
Returns:
|
||||
bytes: 32-byte derived key suitable for Fernet
|
||||
"""
|
||||
# Use a fixed salt for consistency (in production, consider using a random salt stored separately)
|
||||
salt = b'poweron_config_salt_2025'
|
||||
|
||||
kdf = PBKDF2HMAC(
|
||||
algorithm=hashes.SHA256(),
|
||||
length=32,
|
||||
salt=salt,
|
||||
iterations=100000,
|
||||
)
|
||||
|
||||
return base64.urlsafe_b64encode(kdf.derive(master_key))
|
||||
|
||||
def _is_encrypted_value(value: str) -> bool:
|
||||
"""
|
||||
Check if a value is encrypted (starts with environment-specific prefix).
|
||||
|
||||
Args:
|
||||
value: The value to check
|
||||
|
||||
Returns:
|
||||
bool: True if encrypted, False otherwise
|
||||
"""
|
||||
if not value or not isinstance(value, str):
|
||||
return False
|
||||
|
||||
# Check for environment-specific encryption prefixes
|
||||
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev').upper()
|
||||
expected_prefix = f"{env_type}_ENC:"
|
||||
return value.startswith(expected_prefix)
|
||||
|
||||
def _get_encryption_prefix(env_type: str) -> str:
|
||||
"""
|
||||
Get the encryption prefix for the given environment type.
|
||||
|
||||
Args:
|
||||
env_type: The environment type (dev, int, prod, etc.)
|
||||
|
||||
Returns:
|
||||
str: The encryption prefix
|
||||
"""
|
||||
return f"{env_type.upper()}_ENC:"
|
||||
|
||||
def _check_decryption_rate_limit(user_id: str, key_name: str, max_per_second: int = 10) -> bool:
|
||||
"""
|
||||
Check if decryption is allowed based on rate limiting (max 10 per second per user per key).
|
||||
|
||||
Args:
|
||||
user_id: The user ID making the request
|
||||
key_name: The name of the key being decrypted
|
||||
max_per_second: Maximum decryptions per second (default: 10)
|
||||
|
||||
Returns:
|
||||
bool: True if allowed, False if rate limited
|
||||
"""
|
||||
current_time = time.time()
|
||||
|
||||
# Initialize tracking for this user if not exists
|
||||
if user_id not in _decryption_attempts:
|
||||
_decryption_attempts[user_id] = {}
|
||||
|
||||
# Initialize tracking for this key if not exists
|
||||
if key_name not in _decryption_attempts[user_id]:
|
||||
_decryption_attempts[user_id][key_name] = []
|
||||
|
||||
# Clean old attempts (older than 1 second)
|
||||
_decryption_attempts[user_id][key_name] = [
|
||||
timestamp for timestamp in _decryption_attempts[user_id][key_name]
|
||||
if current_time - timestamp < 1.0
|
||||
]
|
||||
|
||||
# Check if we're within rate limit
|
||||
if len(_decryption_attempts[user_id][key_name]) >= max_per_second:
|
||||
logger.warning(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' ({max_per_second}/sec)")
|
||||
return False
|
||||
|
||||
# Record this attempt
|
||||
_decryption_attempts[user_id][key_name].append(current_time)
|
||||
return True
|
||||
|
||||
def encrypt_value(value: str, env_type: str = None, user_id: str = "system", key_name: str = "unknown") -> str:
|
||||
"""
|
||||
Encrypt a value using the master key for the specified environment.
|
||||
|
||||
Args:
|
||||
value: The plain text value to encrypt
|
||||
env_type: The environment type (dev, int, prod). If None, uses current environment.
|
||||
user_id: The user ID making the request (default: "system")
|
||||
key_name: The name of the key being encrypted (default: "unknown")
|
||||
|
||||
Returns:
|
||||
str: The encrypted value with prefix
|
||||
|
||||
Raises:
|
||||
ValueError: If encryption fails
|
||||
"""
|
||||
if env_type is None:
|
||||
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
|
||||
|
||||
try:
|
||||
master_key = _get_master_key()
|
||||
derived_key = _derive_encryption_key(master_key)
|
||||
fernet = Fernet(derived_key)
|
||||
|
||||
# Encrypt the value
|
||||
encrypted_bytes = fernet.encrypt(value.encode('utf-8'))
|
||||
encrypted_b64 = base64.urlsafe_b64encode(encrypted_bytes).decode('utf-8')
|
||||
|
||||
# Add environment prefix
|
||||
prefix = _get_encryption_prefix(env_type)
|
||||
encrypted_value = f"{prefix}{encrypted_b64}"
|
||||
|
||||
# Log audit event for encryption
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_key_access(
|
||||
user_id=user_id,
|
||||
mandate_id="system",
|
||||
key_name=key_name,
|
||||
action="encrypt"
|
||||
)
|
||||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
pass
|
||||
|
||||
return encrypted_value
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Encryption failed: {e}")
|
||||
|
||||
def decrypt_value(encrypted_value: str, user_id: str = "system", key_name: str = "unknown") -> str:
|
||||
"""
|
||||
Decrypt a value using the master key for the current environment.
|
||||
|
||||
Args:
|
||||
encrypted_value: The encrypted value with prefix
|
||||
user_id: The user ID making the request (default: "system")
|
||||
key_name: The name of the key being decrypted (default: "unknown")
|
||||
|
||||
Returns:
|
||||
str: The decrypted plain text value
|
||||
|
||||
Raises:
|
||||
ValueError: If decryption fails
|
||||
"""
|
||||
if not _is_encrypted_value(encrypted_value):
|
||||
return encrypted_value # Return as-is if not encrypted
|
||||
|
||||
# Check rate limiting (10 per second per user per key)
|
||||
if not _check_decryption_rate_limit(user_id, key_name, max_per_second=10):
|
||||
raise ValueError(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' (10/sec)")
|
||||
|
||||
try:
|
||||
# Extract the encrypted part (remove prefix)
|
||||
env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev')
|
||||
env_type_upper = env_type.upper()
|
||||
expected_prefix = f"{env_type_upper}_ENC:"
|
||||
|
||||
if not encrypted_value.startswith(expected_prefix):
|
||||
raise ValueError(f"Invalid encryption prefix. Expected {expected_prefix}")
|
||||
|
||||
encrypted_part = encrypted_value[len(expected_prefix):]
|
||||
|
||||
# Get master key and derive encryption key
|
||||
master_key = _get_master_key()
|
||||
derived_key = _derive_encryption_key(master_key)
|
||||
fernet = Fernet(derived_key)
|
||||
|
||||
# Decode and decrypt
|
||||
encrypted_bytes = base64.urlsafe_b64decode(encrypted_part.encode('utf-8'))
|
||||
decrypted_bytes = fernet.decrypt(encrypted_bytes)
|
||||
decrypted_value = decrypted_bytes.decode('utf-8')
|
||||
|
||||
# Log audit event for decryption
|
||||
try:
|
||||
from modules.shared.auditLogger import audit_logger
|
||||
audit_logger.log_key_access(
|
||||
user_id=user_id,
|
||||
mandate_id="system",
|
||||
key_name=key_name,
|
||||
action="decrypt"
|
||||
)
|
||||
except Exception:
|
||||
# Don't fail if audit logging fails
|
||||
pass
|
||||
|
||||
return decrypted_value
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Decryption failed: {e}")
|
||||
|
||||
# Create the global APP_CONFIG instance
|
||||
APP_CONFIG = Configuration()
|
||||
1206
notes/changelog.txt
1206
notes/changelog.txt
File diff suppressed because it is too large
Load diff
|
|
@ -1,48 +0,0 @@
|
|||
MERMAID DIAGRAM:
|
||||
|
||||
can you make chart "wiki/diagramm_komponenten.mermaid". produce an component diagram, based on current code in poweron/*
|
||||
if document existsadd missing components, remove obsolete components.
|
||||
|
||||
in box texts to use <br> instead of \n
|
||||
|
||||
for all subgraphs to to add path on a separate line to find the module in the code.
|
||||
|
||||
read all code modules caerfully to identify all components and their relations.
|
||||
|
||||
connectors without texts, only lines.
|
||||
|
||||
to add connector between frontend and backend (apiCalls.js -> app.py)
|
||||
|
||||
to connect app.py (Main application module) with the route*.py
|
||||
|
||||
to put all items of frontend into subgraph "Frontend"
|
||||
to put all items of gateway into subgraph "Gateway"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
to put following boxes to a dedicated subgraph within their existing subgraph:
|
||||
- workflowManager.py, workflowAgentsRegistry.py, documentProcessor.py, --> "Workflow"
|
||||
- mimeUtils.py, defAttributes.py, configuration.py, autho.py --> "Shared"
|
||||
- agent*.py --> "Agents"
|
||||
- workflow*.js --> "Workflow"
|
||||
- all *.js in js/modules/ not starting with workflow* --> "Administration"
|
||||
- formGeneric.js not to put to subgraph "Shared", but to a separated subgraph "Shared
|
||||
|
||||
to connect the main.js (main app in the frontend) to nativation.js, globalState.js, login.js, register.js, msftCall.js, config.js
|
||||
|
||||
to connect navigation.js to moduleLoader.js
|
||||
|
||||
to connect moduleLoader.js to workflow.js, and all *.js in js/modules/ not starting with workflow*
|
||||
|
||||
to connect all *.js in js/modules/ not starting with workflow* --> formGeneric.js
|
||||
|
||||
to connect fomrGeneric.js --> apiCalls.js
|
||||
|
||||
|
||||
to use underscores (e.g. Backend_Python, Workflow_Modules, etc.) for all subgraph titles.
|
||||
|
||||
if adding legend, then to give same colors like references to legend
|
||||
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
### Launch APP
|
||||
|
||||
cd .\frontend_agents\
|
||||
cls; python ./server.py
|
||||
|
||||
conda activate C:\Users\pmots\anaconda3\envs\poweron
|
||||
cd .\gateway\
|
||||
cls; uvicorn app:app --host 0.0.0.0 --port 8000
|
||||
|
||||
|
||||
### git permanent login with vs code
|
||||
git remote set-url origin https://valueon@github.com/valueonag/gateway
|
||||
git remote set-url origin https://valueon@github.com/valueonag/frontend_agents
|
||||
git remote set-url origin https://valueon@github.com/valueonag/wiki
|
||||
git remote set-url origin https://valueon@github.com/valueonag/customer-svbe
|
||||
git remote set-url origin https://valueon@github.com/valueonag/customer-althaus
|
||||
|
||||
### git delete workflow runs (cleanup)
|
||||
|
||||
gh auth login
|
||||
|
||||
Navigate to your repository folder (if not already there):
|
||||
bash: cd /path/to/your/repository
|
||||
|
||||
List workflow runs:
|
||||
bash: gh run list
|
||||
|
||||
Delete a specific workflow run:
|
||||
bash: gh run delete [RUN_ID]
|
||||
|
||||
Delete all completed workflow runs (to clear up space):
|
||||
bash: gh run list --status completed --json databaseId -q '.[].databaseId' | xargs -I{} gh run delete {}
|
||||
powershell:
|
||||
|
||||
$runs = gh run list --status completed --json databaseId -q ".[].databaseId" | ConvertFrom-Json
|
||||
foreach ($run in $runs) {
|
||||
Write-Host "Deleting run $run"
|
||||
echo "y" | gh run delete $run
|
||||
}
|
||||
1
query
1
query
|
|
@ -1 +0,0 @@
|
|||
postgresql
|
||||
|
|
@ -10,6 +10,7 @@ slowapi==0.1.8 # For rate limiting
|
|||
|
||||
## Authentication & Security
|
||||
python-jose[cryptography]==3.3.0 # For JWT tokens
|
||||
cryptography>=41.0.0 # For encryption/decryption of configuration values
|
||||
passlib==1.7.4
|
||||
argon2-cffi>=21.3.0 # Für Passwort-Hashing in gateway_interface.py
|
||||
google-auth-oauthlib==1.2.0 # Für Google OAuth
|
||||
|
|
|
|||
|
|
@ -1,77 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script to verify the Excel header parsing fix
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import pandas as pd
|
||||
from io import BytesIO
|
||||
|
||||
# Add the gateway modules to the path
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), 'modules'))
|
||||
|
||||
from modules.interfaces.interfaceTicketObjects import TicketSharepointSyncInterface
|
||||
|
||||
def test_excel_header_parsing():
|
||||
"""Test the Excel header parsing fix"""
|
||||
print("=== Testing Excel Header Parsing Fix ===\n")
|
||||
|
||||
# Create a mock interface instance
|
||||
interface = TicketSharepointSyncInterface(
|
||||
connector_ticket=None,
|
||||
connector_sharepoint=None,
|
||||
task_sync_definition={
|
||||
"ID": ["get", ["id"]],
|
||||
"Summary": ["get", ["fields", "summary"]],
|
||||
"Status": ["get", ["fields", "status", "name"]],
|
||||
"Assignee": ["put", ["fields", "assignee", "displayName"]]
|
||||
},
|
||||
sync_folder="test",
|
||||
sync_file="test.xlsx",
|
||||
backup_folder="backup",
|
||||
audit_folder="audit",
|
||||
site_id="test"
|
||||
)
|
||||
|
||||
# Test data
|
||||
test_data = [
|
||||
{"ID": "TEST-1", "Summary": "Test Issue 1", "Status": "Open", "Assignee": "John Doe"},
|
||||
{"ID": "TEST-2", "Summary": "Test Issue 2", "Status": "Closed", "Assignee": "Jane Smith"},
|
||||
]
|
||||
|
||||
# Create Excel content
|
||||
print("1. Creating Excel content...")
|
||||
excel_content = interface._create_excel_content(test_data)
|
||||
print(f" ✓ Created Excel content: {len(excel_content)} bytes")
|
||||
|
||||
# Parse it back
|
||||
print("2. Parsing Excel content...")
|
||||
try:
|
||||
parsed_data, parsed_headers = interface._parse_excel_content(excel_content)
|
||||
print(f" ✓ Parsed Excel content: {len(parsed_data)} records")
|
||||
print(f" ✓ Headers type: header1={type(parsed_headers['header1'])}, header2={type(parsed_headers['header2'])}")
|
||||
print(f" ✓ Headers content: header1='{parsed_headers['header1']}', header2='{parsed_headers['header2']}'")
|
||||
|
||||
# Test creating content with the parsed headers
|
||||
print("3. Testing round-trip with parsed headers...")
|
||||
new_excel_content = interface._create_excel_content(test_data, parsed_headers)
|
||||
print(f" ✓ Created new Excel content: {len(new_excel_content)} bytes")
|
||||
|
||||
# Parse the new content
|
||||
final_data, final_headers = interface._parse_excel_content(new_excel_content)
|
||||
print(f" ✓ Final parse successful: {len(final_data)} records")
|
||||
print(f" ✓ Final headers: header1='{final_headers['header1']}', header2='{final_headers['header2']}'")
|
||||
|
||||
print("\n✅ All tests passed! The header parsing fix works correctly.")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f" ✗ Error during parsing: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = test_excel_header_parsing()
|
||||
exit(0 if success else 1)
|
||||
375
tool_security_encrypt_config_value.py
Normal file
375
tool_security_encrypt_config_value.py
Normal file
|
|
@ -0,0 +1,375 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tool for encrypting configuration values.
|
||||
|
||||
This tool allows developers to encrypt secret values for use in configuration files.
|
||||
It supports both text and JSON values and automatically determines the environment.
|
||||
It can also encrypt all *_SECRET keys in an environment file at once.
|
||||
|
||||
Usage:
|
||||
# Encrypt a single value
|
||||
python tool_encrypt_config_value.py --value "my_secret_value" --env dev
|
||||
python tool_encrypt_config_value.py --file "path/to/file.json" --env prod
|
||||
|
||||
# Encrypt all secrets in a file
|
||||
python tool_encrypt_config_value.py --encrypt-all env_dev.env --env dev
|
||||
python tool_encrypt_config_value.py --encrypt-all env_prod.env --env prod --dry-run
|
||||
|
||||
# Decrypt a value (for testing)
|
||||
python tool_encrypt_config_value.py --decrypt "DEV_ENC:encrypted_value"
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import argparse
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
# Add the modules directory to the Python path
|
||||
sys.path.insert(0, str(Path(__file__).parent / 'modules'))
|
||||
|
||||
from shared.configuration import encrypt_value, decrypt_value, _is_encrypted_value
|
||||
|
||||
def find_secret_keys_in_file(file_path: Path) -> list:
|
||||
"""
|
||||
Find all *_SECRET keys in an environment file that are not encrypted.
|
||||
|
||||
Args:
|
||||
file_path: Path to the environment file
|
||||
|
||||
Returns:
|
||||
list: List of tuples (line_number, key, value, full_line)
|
||||
"""
|
||||
secret_keys = []
|
||||
|
||||
if not file_path.exists():
|
||||
return secret_keys
|
||||
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i].strip()
|
||||
|
||||
# Skip empty lines and comments
|
||||
if not line or line.startswith('#'):
|
||||
i += 1
|
||||
continue
|
||||
|
||||
# Check if line contains a key-value pair
|
||||
if '=' in line:
|
||||
key, value = line.split('=', 1)
|
||||
key = key.strip()
|
||||
value = value.strip()
|
||||
|
||||
# Check if it's a secret key and not already encrypted
|
||||
if key.endswith('_SECRET') and value and not _is_encrypted_value(value):
|
||||
# Check if value starts with { (JSON object)
|
||||
if value.startswith('{'):
|
||||
# Collect all lines until we find the closing }
|
||||
json_lines = [value]
|
||||
start_line = i + 1
|
||||
i += 1
|
||||
brace_count = value.count('{') - value.count('}')
|
||||
|
||||
while i < len(lines) and brace_count > 0:
|
||||
json_lines.append(lines[i].rstrip('\n'))
|
||||
brace_count += lines[i].count('{') - lines[i].count('}')
|
||||
i += 1
|
||||
|
||||
# Join all lines and create the full JSON value
|
||||
full_json_value = '\n'.join(json_lines)
|
||||
secret_keys.append((start_line, key, full_json_value, line))
|
||||
i -= 1 # Adjust for the loop increment
|
||||
else:
|
||||
# Single line value
|
||||
secret_keys.append((i + 1, key, value, line))
|
||||
# Check if it's a secret key with multiline JSON (value is just "{")
|
||||
elif key.endswith('_SECRET') and value == '{' and not _is_encrypted_value(value):
|
||||
# Collect all lines until we find the closing }
|
||||
json_lines = [value]
|
||||
start_line = i + 1
|
||||
i += 1
|
||||
brace_count = 1 # We already have one opening brace
|
||||
|
||||
while i < len(lines) and brace_count > 0:
|
||||
json_lines.append(lines[i].rstrip('\n'))
|
||||
brace_count += lines[i].count('{') - lines[i].count('}')
|
||||
i += 1
|
||||
|
||||
# Join all lines and create the full JSON value
|
||||
full_json_value = '\n'.join(json_lines)
|
||||
secret_keys.append((start_line, key, full_json_value, line))
|
||||
i -= 1 # Adjust for the loop increment
|
||||
|
||||
i += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error reading {file_path}: {e}")
|
||||
|
||||
return secret_keys
|
||||
|
||||
def backup_file(file_path: Path) -> Path:
|
||||
"""
|
||||
Create a backup of the file before modification.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file to backup
|
||||
|
||||
Returns:
|
||||
Path: Path to the backup file
|
||||
"""
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
backup_path = file_path.with_suffix(f'.{timestamp}.backup')
|
||||
shutil.copy2(file_path, backup_path)
|
||||
return backup_path
|
||||
|
||||
def encrypt_all_secrets_in_file(file_path: Path, env_type: str, dry_run: bool = False, create_backup: bool = True) -> dict:
|
||||
"""
|
||||
Encrypt all non-encrypted secrets in a file.
|
||||
|
||||
Args:
|
||||
file_path: Path to the environment file
|
||||
env_type: The environment type
|
||||
dry_run: If True, only show what would be changed
|
||||
create_backup: If True, create a backup before modifying
|
||||
|
||||
Returns:
|
||||
dict: Results of the encryption process
|
||||
"""
|
||||
results = {
|
||||
'file': str(file_path),
|
||||
'env_type': env_type,
|
||||
'secrets_found': 0,
|
||||
'secrets_encrypted': 0,
|
||||
'errors': [],
|
||||
'backup_created': None
|
||||
}
|
||||
|
||||
# Find all secret keys
|
||||
secret_keys = find_secret_keys_in_file(file_path)
|
||||
results['secrets_found'] = len(secret_keys)
|
||||
|
||||
if not secret_keys:
|
||||
return results
|
||||
|
||||
print(f"\n📁 Processing {file_path.name} ({env_type}):")
|
||||
print(f" Found {len(secret_keys)} non-encrypted secrets")
|
||||
|
||||
if dry_run:
|
||||
print(" [DRY RUN] Would encrypt the following secrets:")
|
||||
for line_num, key, value, full_line in secret_keys:
|
||||
print(f" Line {line_num}: {key} = {value[:50]}{'...' if len(value) > 50 else ''}")
|
||||
return results
|
||||
|
||||
# Create backup if requested
|
||||
if create_backup:
|
||||
try:
|
||||
backup_path = backup_file(file_path)
|
||||
results['backup_created'] = str(backup_path)
|
||||
print(f" 📋 Backup created: {backup_path.name}")
|
||||
except Exception as e:
|
||||
results['errors'].append(f"Failed to create backup: {e}")
|
||||
print(f" ⚠️ Warning: Could not create backup: {e}")
|
||||
|
||||
# Read the file content
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
except Exception as e:
|
||||
results['errors'].append(f"Failed to read file: {e}")
|
||||
return results
|
||||
|
||||
# Process each secret key
|
||||
for line_num, key, value, full_line in secret_keys:
|
||||
try:
|
||||
print(f" 🔐 Encrypting {key}...")
|
||||
|
||||
# Encrypt the value using the existing function
|
||||
encrypted_value = encrypt_value(value, env_type)
|
||||
|
||||
# Replace the line in the file content
|
||||
new_line = f"{key} = {encrypted_value}\n"
|
||||
lines[line_num - 1] = new_line
|
||||
|
||||
# If this was a multiline JSON, we need to remove the remaining lines
|
||||
if value.startswith('{') and '\n' in value:
|
||||
# Count how many lines the original JSON spanned
|
||||
json_lines = value.split('\n')
|
||||
lines_to_remove = len(json_lines) - 1 # -1 because we already replaced the first line
|
||||
|
||||
# Remove the remaining lines
|
||||
for i in range(line_num, line_num + lines_to_remove):
|
||||
if i < len(lines):
|
||||
lines[i] = ""
|
||||
|
||||
results['secrets_encrypted'] += 1
|
||||
print(f" ✓ Encrypted successfully")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to encrypt {key}: {e}"
|
||||
results['errors'].append(error_msg)
|
||||
print(f" ✗ {error_msg}")
|
||||
|
||||
# Write the modified content back to the file
|
||||
if results['secrets_encrypted'] > 0:
|
||||
try:
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
f.writelines(lines)
|
||||
print(f" 💾 File updated successfully")
|
||||
except Exception as e:
|
||||
results['errors'].append(f"Failed to write file: {e}")
|
||||
print(f" ✗ Failed to write file: {e}")
|
||||
|
||||
return results
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Encrypt configuration values')
|
||||
parser.add_argument('--value', '-v', help='Plain text value to encrypt')
|
||||
parser.add_argument('--file', '-f', help='File containing the value to encrypt')
|
||||
parser.add_argument('--env', '-e', choices=['dev', 'int', 'prod'],
|
||||
help='Environment type (default: current environment)')
|
||||
parser.add_argument('--decrypt', '-d', help='Decrypt an encrypted value (for testing)')
|
||||
parser.add_argument('--interactive', '-i', action='store_true',
|
||||
help='Interactive mode - prompt for value')
|
||||
parser.add_argument('--encrypt-all', '-a', help='Encrypt all *_SECRET keys in the specified file')
|
||||
parser.add_argument('--dry-run', action='store_true',
|
||||
help='Show what would be changed without making changes (for --encrypt-all)')
|
||||
parser.add_argument('--no-backup', action='store_true',
|
||||
help='Skip creating backup files (for --encrypt-all)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
# Handle encrypt-all functionality
|
||||
if args.encrypt_all:
|
||||
file_path = Path(args.encrypt_all)
|
||||
if not file_path.exists():
|
||||
print(f"Error: File not found: {file_path}")
|
||||
return 1
|
||||
|
||||
if not args.env:
|
||||
print("Error: --env is required when using --encrypt-all")
|
||||
return 1
|
||||
|
||||
print("🔐 PowerOn Secret Encryption Tool")
|
||||
print("=" * 50)
|
||||
|
||||
if args.dry_run:
|
||||
print("🔍 DRY RUN MODE - No changes will be made")
|
||||
print()
|
||||
|
||||
results = encrypt_all_secrets_in_file(
|
||||
file_path,
|
||||
args.env,
|
||||
dry_run=args.dry_run,
|
||||
create_backup=not args.no_backup
|
||||
)
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 50)
|
||||
print("📊 SUMMARY")
|
||||
print("=" * 50)
|
||||
print(f"File processed: {file_path.name}")
|
||||
print(f"Secrets found: {results['secrets_found']}")
|
||||
|
||||
if not args.dry_run:
|
||||
print(f"Secrets encrypted: {results['secrets_encrypted']}")
|
||||
print(f"Errors: {len(results['errors'])}")
|
||||
|
||||
if len(results['errors']) == 0 and results['secrets_encrypted'] > 0:
|
||||
print("\n🎉 All secrets encrypted successfully!")
|
||||
elif len(results['errors']) > 0:
|
||||
print(f"\n⚠️ Completed with {len(results['errors'])} errors")
|
||||
else:
|
||||
print("\n✅ No secrets needed encryption")
|
||||
else:
|
||||
print(f"Secrets that would be encrypted: {results['secrets_found']}")
|
||||
|
||||
# Show backup information
|
||||
if results['backup_created']:
|
||||
print(f"\n📋 Backup created: {Path(results['backup_created']).name}")
|
||||
|
||||
# Show errors if any
|
||||
if results['errors']:
|
||||
print(f"\n❌ Errors encountered:")
|
||||
for error in results['errors']:
|
||||
print(f" - {error}")
|
||||
|
||||
return 0 if len(results['errors']) == 0 else 1
|
||||
|
||||
# Handle decryption
|
||||
if args.decrypt:
|
||||
if _is_encrypted_value(args.decrypt):
|
||||
decrypted = decrypt_value(args.decrypt)
|
||||
print(f"Decrypted value: {decrypted}")
|
||||
else:
|
||||
print("Error: Value does not appear to be encrypted (missing ENV_ENC: prefix)")
|
||||
return
|
||||
|
||||
# Determine the value to encrypt
|
||||
value_to_encrypt = None
|
||||
|
||||
if args.value:
|
||||
value_to_encrypt = args.value
|
||||
elif args.file:
|
||||
if not os.path.exists(args.file):
|
||||
print(f"Error: File not found: {args.file}")
|
||||
return
|
||||
|
||||
with open(args.file, 'r', encoding='utf-8') as f:
|
||||
value_to_encrypt = f.read().strip()
|
||||
elif args.interactive:
|
||||
print("Enter the value to encrypt (press Ctrl+D when done):")
|
||||
try:
|
||||
value_to_encrypt = sys.stdin.read().strip()
|
||||
except EOFError:
|
||||
print("Error: No input provided")
|
||||
return
|
||||
else:
|
||||
# Interactive mode by default
|
||||
print("Enter the value to encrypt (press Ctrl+D when done):")
|
||||
try:
|
||||
value_to_encrypt = sys.stdin.read().strip()
|
||||
except EOFError:
|
||||
print("Error: No input provided")
|
||||
return
|
||||
|
||||
if not value_to_encrypt:
|
||||
print("Error: No value provided to encrypt")
|
||||
return
|
||||
|
||||
# Validate JSON if it looks like JSON
|
||||
if value_to_encrypt.strip().startswith('{'):
|
||||
try:
|
||||
json.loads(value_to_encrypt)
|
||||
print("✓ Valid JSON detected")
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Warning: Value looks like JSON but is invalid: {e}")
|
||||
response = input("Continue anyway? (y/N): ")
|
||||
if response.lower() != 'y':
|
||||
return
|
||||
|
||||
# Encrypt the value
|
||||
encrypted_value = encrypt_value(value_to_encrypt, args.env)
|
||||
|
||||
print(f"\n✓ Encryption successful!")
|
||||
print(f"Environment: {args.env or 'current'}")
|
||||
print(f"Encrypted value:")
|
||||
print(f"{encrypted_value}")
|
||||
print(f"\nCopy the above value to your configuration file.")
|
||||
|
||||
# Show usage example
|
||||
print(f"\nUsage in config file:")
|
||||
print(f"MY_SECRET_KEY = {encrypted_value}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
89
tools_security_generate_master_keys.py
Normal file
89
tools_security_generate_master_keys.py
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate secure master keys for all environments.
|
||||
|
||||
This tool generates cryptographically secure 256-bit master keys for all environments
|
||||
and updates the key.txt file with the new keys.
|
||||
|
||||
Usage:
|
||||
python generate_master_keys.py
|
||||
python generate_master_keys.py --output "path/to/key.txt"
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import secrets
|
||||
import base64
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
def generate_master_key():
|
||||
"""Generate a secure 256-bit master key."""
|
||||
# Generate 32 random bytes (256 bits)
|
||||
key_bytes = secrets.token_bytes(32)
|
||||
# Encode as base64 for easy storage
|
||||
return base64.urlsafe_b64encode(key_bytes).decode('utf-8')
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Generate secure master keys for all environments')
|
||||
parser.add_argument('--output', '-o',
|
||||
default='../local/key.txt',
|
||||
help='Output file path (default: ../local/key.txt)')
|
||||
parser.add_argument('--force', '-f', action='store_true',
|
||||
help='Overwrite existing key file without confirmation')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Convert to absolute path
|
||||
output_path = Path(args.output).resolve()
|
||||
|
||||
# Check if file exists and get confirmation
|
||||
if output_path.exists() and not args.force:
|
||||
response = input(f"File {output_path} already exists. Overwrite? (y/N): ")
|
||||
if response.lower() != 'y':
|
||||
print("Operation cancelled.")
|
||||
return
|
||||
|
||||
try:
|
||||
# Generate keys for all environments
|
||||
keys = {
|
||||
'prod': generate_master_key(),
|
||||
'int': generate_master_key(),
|
||||
'dev': generate_master_key()
|
||||
}
|
||||
|
||||
# Create output content
|
||||
content = []
|
||||
content.append("# PowerOn Master Keys")
|
||||
content.append("# Generated on: " + str(Path(__file__).stat().st_mtime))
|
||||
content.append("# WARNING: Keep this file secure and never commit to version control!")
|
||||
content.append("")
|
||||
|
||||
for env, key in keys.items():
|
||||
content.append(f"{env} = {key}")
|
||||
|
||||
# Ensure output directory exists
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Write to file
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write('\n'.join(content))
|
||||
|
||||
print("✓ Master keys generated successfully!")
|
||||
print(f"Output file: {output_path}")
|
||||
print("\nGenerated keys:")
|
||||
for env, key in keys.items():
|
||||
print(f" {env}: {key[:20]}...")
|
||||
|
||||
print(f"\n⚠️ IMPORTANT SECURITY NOTES:")
|
||||
print(f" - Keep this file secure and never commit to version control")
|
||||
print(f" - Store production keys in Azure environment variables")
|
||||
print(f" - Share development keys securely with team members")
|
||||
print(f" - Consider rotating keys regularly")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error generating keys: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Reference in a new issue