diff --git a/app.py b/app.py index ad932e9a..cade2e7c 100644 --- a/app.py +++ b/app.py @@ -8,19 +8,79 @@ from zoneinfo import ZoneInfo import logging from logging.handlers import RotatingFileHandler -from datetime import timedelta +from datetime import timedelta, datetime import pathlib from modules.shared.configuration import APP_CONFIG from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.triggers.cron import CronTrigger + +class DailyRotatingFileHandler(RotatingFileHandler): + """ + A rotating file handler that automatically switches to a new file when the date changes. + The log file name includes the current date and switches at midnight. + """ + + def __init__(self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs): + self.log_dir = log_dir + self.filename_prefix = filename_prefix + self.current_date = None + self.current_file = None + + # Initialize with today's file + self._update_file_if_needed() + + # Call parent constructor with current file + super().__init__(self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs) + + def _update_file_if_needed(self): + """Update the log file if the date has changed""" + today = datetime.now().strftime("%Y%m%d") + + if self.current_date != today: + self.current_date = today + new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log") + + if self.current_file != new_file: + self.current_file = new_file + return True + return False + + def emit(self, record): + """Emit a log record, switching files if date has changed""" + # Check if we need to switch to a new file + if self._update_file_if_needed(): + # Close current file and open new one + if self.stream: + self.stream.close() + self.stream = None + + # Update the baseFilename for the parent class + self.baseFilename = self.current_file + # Reopen the stream + if not self.delay: + self.stream = self._open() + + # Call parent emit method + super().emit(record) + def initLogging(): """Initialize logging with configuration from APP_CONFIG""" # Get log level from config (default to INFO if not found) logLevelName = APP_CONFIG.get("APP_LOGGING_LOG_LEVEL", "WARNING") logLevel = getattr(logging, logLevelName) + # Get log directory from config + logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./") + if not os.path.isabs(logDir): + # If relative path, make it relative to the gateway directory + gatewayDir = os.path.dirname(os.path.abspath(__file__)) + logDir = os.path.join(gatewayDir, logDir) + + # Ensure log directory exists + os.makedirs(logDir, exist_ok=True) + # Create formatters - using single line format consoleFormatter = logging.Formatter( fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s", @@ -89,25 +149,15 @@ def initLogging(): # Add file handler if enabled if APP_CONFIG.get("APP_LOGGING_FILE_ENABLED", True): - # Get log file path and ensure it's absolute - logFile = APP_CONFIG.get("APP_LOGGING_LOG_FILE", "app.log") - if not os.path.isabs(logFile): - # If relative path, make it relative to the gateway directory - gatewayDir = os.path.dirname(os.path.abspath(__file__)) - logFile = os.path.join(gatewayDir, logFile) - - # Ensure log directory exists - logDir = os.path.dirname(logFile) - if logDir: - os.makedirs(logDir, exist_ok=True) - + # Create daily application log file with automatic date switching rotationSize = int(APP_CONFIG.get("APP_LOGGING_ROTATION_SIZE", 10485760)) # Default: 10MB backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5)) - fileHandler = RotatingFileHandler( - logFile, - maxBytes=rotationSize, - backupCount=backupCount + fileHandler = DailyRotatingFileHandler( + log_dir=logDir, + filename_prefix="log_app", + max_bytes=rotationSize, + backup_count=backupCount ) fileHandler.setFormatter(fileFormatter) fileHandler.addFilter(ChromeDevToolsFilter()) @@ -133,7 +183,15 @@ def initLogging(): # Log the current logging configuration logger = logging.getLogger(__name__) logger.info(f"Logging initialized with level {logLevelName}") - logger.info(f"Log file: {logFile if APP_CONFIG.get('APP_LOGGING_FILE_ENABLED', True) else 'disabled'}") + logger.info(f"Log directory: {logDir}") + + if APP_CONFIG.get('APP_LOGGING_FILE_ENABLED', True): + today = datetime.now().strftime("%Y%m%d") + appLogFile = os.path.join(logDir, f"log_app_{today}.log") + logger.info(f"Application log file: {appLogFile} (auto-switches daily)") + else: + logger.info("Application log file: disabled") + logger.info(f"Console logging: {'enabled' if APP_CONFIG.get('APP_LOGGING_CONSOLE_ENABLED', True) else 'disabled'}") # Initialize logging @@ -154,7 +212,7 @@ async def lifespan(app: FastAPI): # Setup APScheduler for JIRA sync scheduler = AsyncIOScheduler(timezone=ZoneInfo("Europe/Zurich")) try: - from modules.services.serviceDeltaSync import perform_sync_jira_delta_group + from modules.features.featureSyncDelta import perform_sync_jira_delta_group # Schedule sync every 20 minutes (at minutes 00, 20, 40) scheduler.add_job( perform_sync_jira_delta_group, diff --git a/config.ini b/config.ini index bc8aeb7f..780a9e08 100644 --- a/config.ini +++ b/config.ini @@ -5,21 +5,6 @@ Auth_ALGORITHM = HS256 Auth_TOKEN_TYPE = bearer -# OpenAI configuration -Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions -Connector_AiOpenai_API_SECRET = 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 -Connector_AiOpenai_MODEL_NAME = gpt-4o -Connector_AiOpenai_TEMPERATURE = 0.2 -Connector_AiOpenai_MAX_TOKENS = 2000 - -# Anthropic configuration -Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages -Connector_AiAnthropic_API_SECRET_OLD = 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 -Connector_AiAnthropic_API_SECRET = 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 -Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022 -Connector_AiAnthropic_TEMPERATURE = 0.2 -Connector_AiAnthropic_MAX_TOKENS = 2000 - # File management configuration File_Management_MAX_UPLOAD_SIZE_MB = 50 File_Management_CLEANUP_INTERVAL = 240 @@ -36,33 +21,6 @@ Security_LOCK_DURATION_MINUTES = 30 # Content Neutralization configuration Content_Neutralization_ENABLED = False -# Agent Mail configuration -Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_CLIENT_SECRET = Kxf8Q~2lJIteZ~JaI32kMf1lfaWKATqxXiNiFbzV -Service_MSFT_TENANT_ID = common - -# Google Service configuration -Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_CLIENT_SECRET = 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 - -# Tavily Web Search configuration -Connector_WebTavily_API_KEY = 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 - -# Google Cloud Speech Services configuration -Connector_GoogleSpeech_API_KEY = { - "type": "service_account", - "project_id": "poweronid", - "private_key_id": "88db66e4248326e9baeac4231bc196fd46a9a441", - "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDTnJuxA+xBL3LA\nPgFILYCsGuppkkdO6d153Q36f2jTj6zpH3OhKMVsaaTBknG2o2+D0Whlk6Yh5rOw\nkWzpMC3y81leRLm5kucERMkBUgd2GL4v16k6m+QGuC3BFlt/XeyuckJNW0V6v/Dy\n3+bSYM7/5o1ftPNWJeAIEWoE/V4wKCYde8RE4Vp1LO5YwhgcM4rRuPmF2OhekpA+\npteYwkY/8/gTTRpZIc8OTsBYRbaMwsjoDj5riuL3boVtkwZwKRb+ZLvupXeU7Ds7\n1305odTcZUwnImHiHfuq83ZJViQiLRNhUAFnQIXPrYLwEpCmzRBGzYHaRlb69ga/\nzqUbKnclAgMBAAECggEAH6W9qHehubioPMAJM7Y6bC2KU/JLNS4csBZd+idb52gG\nwBwIEFjR+H4ZjymhAA4+pe7c4h7MKyh0RI/l7eoFX98Cb+rEq/r1udm1BhGH3s2h\n2UiI8qRQh1YRjF2/nrN5VjhDBOFa6W9opaopZy/l8AzsT8f21zIgPen8z8o6GpFg\n64fJFcbqCGk2ykN2+x2pIOT04tmCszrfbXZP8LEs4xrUB/XwlHL1vT/M3EWIKbnj\njDaIMjw7q/KRgNUvmKS6SU9b3fnOLcQCz9f5cKdiWACKIU/UvuiWhWJ9ou6BWLWU\nva1A6Fi4XJjhW7s3po58/ioQfl0A9p/L92lGg4ST8QKBgQDx8LIM1g0dh9Ql6LmH\nBUGCOewNNXTs+y3ZznUfvVMoyyZK5w/pzeUvkmOwzbRGnZJ9WyCghq8aezyEpo2D\nPL7Odf988IeHmvhyZIM4PLJYgDvSwGXyf/gh6gJkf/4wpx+tx/yQYNBm3Rht7sA0\npSaLehK0E0kW1uyBzHGKgyQOhwKBgQDf6LiZ7hSQqh54vIU1XMDRth0UOo/s/HGi\nDoij29KjmHjLkm8vOlCo83e79X0WhcnyB5kM7nWFegwcM1PJ0Dl8gidUuTlOVDtM\n5u2AaxDoyXAUL457U5dGFAIW+R653ZDkzMfCglacP8HixXEyIpL1cTLqiCAgzszS\nLcSWwoAr8wKBgQC4CGm3X97sFpTmHSd6sCHLaDnJNl9xoAKZifUHpqCqCBVhpm8x\nXp+11vmj1GULzfJPDlE8Khbp4tH+6R39tOhC7fjgVaoSGWxgv1odHfZfYXOf9R/X\nHUZmrbUSM1XsNkPfkZ7pR+teQ1HA1Xo40WMHd1zgw0a2a9fNR/EZ9nUn4wKBgGaK\nUEgGNRrPHadTRnnaoV8o1IZYD2OLdIqvtzm7SOqsv90SkaKCRUAqR5InaYKwAHy7\nqAa5Cc73xqX/h4arujff7x0ouiq5/nJIa0ndPmAtKAvGf6zQ6j0ompBkxAKAioON\nmInmYL2roSI2I5G/LagDkDrB3lzH+Brk5NvZ9RKrAoGAGox462GGGb/NbGdDkahN\ndifzYYvq4FPiWFFo0ynKAulxCBWLXO/N45XNuAyen433d8eREcAYz1Dzax44+MdQ\nHo9dU7YcZvFyt6iZsYeQF8dluHui3vzMpUe0KbqpZC5KMOSw53ZdNIwzo8NTAK59\n+uv3dHGj7sS8fhDo3yCifzc=\n-----END PRIVATE KEY-----\n", - "client_email": "poweron-voice-services@poweronid.iam.gserviceaccount.com", - "client_id": "116641749406798186404", - "auth_uri": "https://accounts.google.com/o/oauth2/auth", - "token_uri": "https://oauth2.googleapis.com/token", - "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", - "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/poweron-voice-services%40poweronid.iam.gserviceaccount.com", - "universe_domain": "googleapis.com" -} - # Web Search configuration Web_Search_MAX_QUERY_LENGTH = 400 Web_Search_MAX_RESULTS = 20 diff --git a/debug_audio/audio_google_interpreter_recording.webm b/debug_audio/audio_google_interpreter_recording.webm deleted file mode 100644 index 862174f4..00000000 Binary files a/debug_audio/audio_google_interpreter_recording.webm and /dev/null differ diff --git a/env_dev.env b/env_dev.env index 24a15187..4348fe37 100644 --- a/env_dev.env +++ b/env_dev.env @@ -4,51 +4,31 @@ APP_ENV_TYPE = dev APP_ENV_LABEL = Development Instance Patrick APP_API_URL = http://localhost:8000 - -# Database Configuration for Application -# JSON File Storage (current) -# DB_APP_HOST=D:/Temp/_powerondb -# DB_APP_DATABASE=app -# DB_APP_USER=dev_user -# DB_APP_PASSWORD_SECRET=dev_password +APP_KEY_SYSVAR = D:/Athi/Local/Web/poweron/local/key.txt # PostgreSQL Storage (new) DB_APP_HOST=localhost -DB_APP_DATABASE=poweron_app_dev +DB_APP_DATABASE=poweron_app DB_APP_USER=poweron_dev -DB_APP_PASSWORD_SECRET=dev_password +DB_APP_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNU2ZKVk41bU5HbmJOREJ6ZmZ1cTcwZ3ZXQlcxY0dTcjVTUEgxemlRVmtUYWlmWXdicW1JcDFUQkRHamFZVUJSUlg4ZTlHaWZIUGhzVUUtTEFiYkxZeXN6NEtrSjZubjFzN0g2OG5SdjdnQm89 DB_APP_PORT=5432 -# Database Configuration Chat -# JSON File Storage (current) -# DB_CHAT_HOST=D:/Temp/_powerondb -# DB_CHAT_DATABASE=chat -# DB_CHAT_USER=dev_user -# DB_CHAT_PASSWORD_SECRET=dev_password - # PostgreSQL Storage (new) DB_CHAT_HOST=localhost -DB_CHAT_DATABASE=poweron_chat_dev +DB_CHAT_DATABASE=poweron_chat DB_CHAT_USER=poweron_dev -DB_CHAT_PASSWORD_SECRET=dev_password +DB_CHAT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNZk1fUE1Pa01QM1c0WDdaTnQ0ZWRhTExmZm5iR2R0SEZlMDI2VmJvQ2Nrc0RDY1Z3NG9CSVJucUxkX1B4Qk45bkxvN05XYmZXY1NGa2gtWWxuaFg5bmFnR3d0ZmdYS1A5V2xSeFFYTm5ialE9 DB_CHAT_PORT=5432 -# Database Configuration Management -# JSON File Storage (current) -# DB_MANAGEMENT_HOST=D:/Temp/_powerondb -# DB_MANAGEMENT_DATABASE=management -# DB_MANAGEMENT_USER=dev_user -# DB_MANAGEMENT_PASSWORD_SECRET=dev_password - # PostgreSQL Storage (new) DB_MANAGEMENT_HOST=localhost -DB_MANAGEMENT_DATABASE=poweron_management_dev +DB_MANAGEMENT_DATABASE=poweron_management DB_MANAGEMENT_USER=poweron_dev -DB_MANAGEMENT_PASSWORD_SECRET=dev_password +DB_MANAGEMENT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNVFd0WkdsZGlLYjcxOUpaM2szUGZyWkZseHBCM1JaYm5fMnJNQ1hVLUIwVVlMaTAtZlBkZ0hsTVM5eXVjZkoxamdmWU00dUU5TEs5Zzlhd0RXYVJGR2twV2hLbjFoN2RsUkVjSGd5NExqV1U9 DB_MANAGEMENT_PORT=5432 # Security Configuration -APP_JWT_SECRET_SECRET=rotated_jwt_secret_2025_09_17_f8a3b6c2-7d4e-45b6-9a1f-3c0b9a1d2e7f +APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNMnRzSGtvR1Uyd1RmVm01MTJUTkFlQVRYVHJNVmVhSEpaY2k4YTdIUUtvalhLXzJaeDJVQkhlRHZ2MnExR2k4b09ScnF5U2xubnZtWmRUNmx1b2c4bmItbmdMWmc2eVU2X1pFVmE0UzR0d0xzOG52SkVlSi1uZGZoYVdqMGN3Y0tIVUR1bGtyLW9hNEdRemwtSlJJc1RGbWxJdlpxdHhtMldJTjRDWTE4MFhjPQ== APP_TOKEN_EXPIRY=300 # CORS Configuration @@ -56,7 +36,7 @@ APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net # Logging configuration APP_LOGGING_LOG_LEVEL = DEBUG -APP_LOGGING_LOG_FILE = poweron.log +APP_LOGGING_LOG_DIR = D:/Athi/Local/Web/poweron/local/logs APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S APP_LOGGING_CONSOLE_ENABLED = True @@ -66,4 +46,33 @@ APP_LOGGING_BACKUP_COUNT = 5 # Service Redirects Service_MSFT_REDIRECT_URI = http://localhost:8000/api/msft/auth/callback -Service_GOOGLE_REDIRECT_URI = http://localhost:8000/api/google/auth/callback \ No newline at end of file +Service_GOOGLE_REDIRECT_URI = http://localhost:8000/api/google/auth/callback + +# OpenAI configuration +Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions +Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNdndUSzRSc0l6UGdRYzNkVlJWZjF1ZG1Id3RwTFhPRnBkX3BhN0NlMHk0a2NkQmk2bmhnemNwY1FtanFEemZUd21zcVFYUTRGWUhpeTlOSEgyUWdZVVBneTYxT2RZQTEyZk1XQ3Y5MDhDd3JnMXRwbVVfaVpDOWF2TDU3Mjl2YURvR0daLW92dDdmUktkQ2VOei0tdHdBPT0= +Connector_AiOpenai_MODEL_NAME = gpt-4o +Connector_AiOpenai_TEMPERATURE = 0.2 +Connector_AiOpenai_MAX_TOKENS = 2000 + +# Anthropic configuration +Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages +Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNRW9tR094QlB6bU9na1lGc0RIYzZOX3g5ZGh4dC1NaXZnUExFWDhnWURQdmNRTi1vc2F6RExGZTFZRU5BUjVjV1NTb3hURS1UY1NYdVhBUVRPemptZXZIclRhOG8wLVkxTGc4R01RTG95THFET2ZJRGlSeWMzcVdwejdVcjIyR0VoUzRaVUsyLVVsQ0sxckxoc2MwWmFfSjBvOTNMaGtCajFpRGpqYm5Sc0Zud08xb2dWdXhOYzQ3ZXZySUNrRVZmYnpyQ0tQdjNjbVExelA2UXNzOENzQT09 +Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022 +Connector_AiAnthropic_TEMPERATURE = 0.2 +Connector_AiAnthropic_MAX_TOKENS = 2000 + +# Agent Mail configuration +Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c +Service_MSFT_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNay04TEUzbmRHR29jNWp0Nm1MZEtjUkNKRVVTU1p0QUNHMC1vWHpFcTR5eHNDMDBYbnVzRWpEdWVQeE1FRkJDMGlWRWNXZHZfc3M5aG1UdmRYd1J0cElWZGY0aVZ1OWNUMndZTWNXNm9fQ0hCemNwMWdUQW9ya0owOEVUMG1kLUk= +Service_MSFT_TENANT_ID = common + +# Google Service configuration +Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com +Service_GOOGLE_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNOVNWNjV1SXllM1ZnWVRmQTdXZmY1YnBjXzl6Q2lKR3R6SzA4SHFvWU96QWVyWG4wc2tLaGQ2SkVOM0tNMUpXaHNNTjEyOWRGeWVtSjdycHBOSjFlRU5XWVFKV0o5Z2l3THU5SHJLaHJXZC1ST1FGdVhwdXBaMFFmQ0lzUmplQmo= + +# Tavily Web Search configuration +Connector_WebTavily_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNSEJ2YmVieFRaWk5yR1kwVzJ2ajlvTVVZN3dzV2pBT25nTmRsa0NXZEM2eHhqRXhBZ19VMTlFWkQ4ZzlnTUY1M0h0SUpWenZLR3JtZDBVOXZuT1JFV3UxMkJCdjZ2YjB1cE1jYlBOVzZsSHVXa19kcTNiVzZIRUZFdVZCeXJ5YUQ= + +# Google Cloud Speech Services configuration +Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8wSFJNSFFITGlUVzF3NE5Ldk10d3o5MS10Q2o4aEJGM250WF9CeWxFQVNaNHBhMk1hS3E5YXRrakh5dmx0VDJuZ3BsWGVMTC0tbU9wWFRWZWM1N25ibWpkeF84enJ1Y2ViMVd1V0plUWdxN3VId1VRUzBhN3MzLVBkSXEwM1BHT2Z2c3JBalh6eXVKMUNFX2pfbGdGYUg1ZUFfcXhSRnJyT0tzdWVVdG1HSHBZOUgwLUVPMVQ4YkZUc3dMcFlLWjRxQUM1X05OWm5ndmJGcjFETV9UM1FoLWt2RVVEem92UGhvZlRFXzNxOVRzQkhyV0hqeTRWQXdMdDVDbEMwOWFkTnV3UXpsYWZwRENaRzd4QjlwTjJUWHhHLVZPTzd1eXNhSWh5ajNwelgxSDRlNUx0N05yTlI1N1RjSzdIZGhFLXBOMjEwMkxsT0daSVhiWVpQZUtfNVdwdGVrazVMM2NkUGZPOHBuNjM3YXdFcGFPdlVtY01ReGhsVENwNnRvNGhJejNHd3hFOFA0bWgxalFFNDVoQ2xYTG5VN1dDZGhndEdWRlFjYzBRMUgwbzRfS2N3VVgyaXJpYmJfZzNadmx5cTFxS2Vja1I5Qm1UT0hDM1FuNk5JRmYtT2p3RWp2SWxTWGZuU1psOUN4NEJTOHkweWIzY2NjbTJRZG5oRjVxNGh4LTUwZE1zZi1zLU43Ulk4UGtmR0N6dU5RcVVvRF9DQlE5Sk1FR1YtOE84WnVuTDlOUHhQR1JLT2g0VkNIT2ctWTBuMXIwNHhSSjcxNnNWRFhQc18zSm1UR1M0Mm54TGxsRG5uX2tDSWhBNDRGaHFObkhuVmtnVVlQU1FhVWhTdnpGUDRfcDQ1OWpERklHMmN5Y0RVWC1JYlItTUozaWY1dmxZUW12NXAtUEtsQWpqUFk4NzFwWVNfSUNqeDNkc25wMnJHN3c5NTB1dmxmUFZfU0NWS1hQMTc1NmdOTmEyREZRVXB0cmlyaldkT3B0Q3FQMFdpdWQ3WU1RZDZKYlFneDdnQ2NWWHFHSXl1c2xRN21LbDdyUGFUcWFxeVVTOWoxSkVJaFZiUHI2VFBHWEdvM2Q1cXdIVGYyc3Y2cVdRd00ydHdrME8tcDVqSmNLV193R291VElTNWFNa2pMQi1zX21VdnZ1R0tTbEJndndvbWRrVE52eW1aTFFzRURtdGItc3FJeXJDenVTWTlIZ0E1eG1yX2N1SHJSUWIxdm8wakdzaDIyaDQ0cE9UdDlhclp2MzVVamQ2em0zbmdLUzBJa1ZaRFpQaTBnZGpTWnRhRGZxUVNZWDg5VDFndWFmZlZnVG5SUEhlWkpfQnREWS0xbEZfNXd5OUpEUkZHa1NZNWtPbnBadFFialgzazlyM0dTb3ctR2x5LUozT3VDc3F1Tk5TbGN2MnRRS1hTb1gzWUNVSlJuUl85azhxaGxCMzVNQUQzVGg1cDZHalRaOUFrM1JPSGJKaGlKRTAwbnV4TmxIZnhkMF9FODVKUk1GZGlWZk1ScnhmQnJXWmRxMTk3SWhIdnBjSVJJOElkalRUWXFRTFNvQXZpdFpFOUdDWkhHOTRLVmN2cEh0X2JpYjNvRjhvUHFVQVNQdXY4OWxQSWNvcUNfZW5HYy10dEFicldhRHZLS1ktY2RGczQta2lGWXkxb2RhNUZMNExabWx0dXdhR3BSWGpSYVUxRXJZVTNBYmdNVFd5NW1vY2s0T0RlV3hqZjNSMHhJakY1TDBackV5bmM2V1o2SEJlT3RSbnpPR0VXbmhQTUtPMzYyU1RjbFRmQUlWTUZjVGRheXBuekZJN3NNZVFFZ3JHenNnOFdQVWxsbFBoYTVvQUd1NGx2SDdYcGhrdUpSWlRIRWVVUkpxdjJSZV9zb0J3N3o4QnRpYXpTRHdkZ1pqSWswSjdJMjVEZDZUNzZuWDVXWkNxUDRtQ1p1dnk2ZEx0S0NKT2ZUc3B5eEdRdEpnTlZQMkt5OHFjQ3FfcHpzUFZEY3Z5WDdEQkt4cEN2MFg2eXF4bDZFeHZFWk5tMFpUR0xDZi1JVjN4eUtRaXlNXzBJUFV2N19MVTRhMWtxWnd6d0Y2bVNFQUJSdEU5Z01FTjEtZDJmWkpEYUlsTVJnTEJYdU1iVFoySEttd3libURrSUNJelVic2Mzb0t5ZzNDX0hjZUtfOFQ1QkxRWmx2dmhnbDhNZllla1dNa0Y5akVpNDRKdHRSUU9fTE9sYVUzdzZtTkJEYTBWdkxkRURSa01TOGxWcVZkUmxkWTA1QjJjS1pOUjJEQTZxeDdSVXhNWldXbnE1V1J2STVCNkt2VHRuNEdtaHUweWdEbUZyMlhWd09FWWI0UUFyQVpUeDE3QXdfQkMtcjdpUU5GUTQzUEczNWg1Wm5rVEgwRW11RFowVnFxYnpGNUYwYks1Y3JPbTdUc2ZXS1ZfYzdhcno3U1ZXZUVkblRoOVl5XzZpTUgwRXFZeFd6NXdqTGlvNm1QeXgxS2ZFTVJSV1JVejliWFBVRGU1MWVudEZzRDFwSW94YlU1Y3JmallsVldXcHdvTmFQdnU5UE0tNHNHMXhPWE1JQUxCNC1WVVRJNmNJcTM3a1dUWWwzSVptTFg3OXlWLWxITkdiR0MyTmRzRWFOeHBMZEVzbms3RC1MTFo1TVhKeURhUW9peHk1bHhJbHphVzR4RmxiUkJwcmkzcWZ3S3dWV0Jkb2VaZ3pMTXdUNUJmZjZfVEVXeDFNMnBvemM0TUJNeUQ2SE1aeWczc0V6M0NUMHFGdURMbTRka3AzZ1d1TUh2V1c5RzBKQVVlTEstWEthOTdaWUZHTlRHaVNmbEFJRFU3M0l2TWlBNF9kaFpJUXlxMHJYa2lxOGFRbDNqMTA1RDFFclFTcGxmb0g2WVI3Z0NrLWN4cUNzNWVuR2VMaE41dWRqMnR5eWNuM0gwUmIwcTFEQ09qbmJCUFIwbjM4MGF6TlhxQWpKOFZXWGNKdnl2Wi1zU1BsZU5NYWpsbzVKMGxTLUJKckd6enJnZWhXemstenN3NGNqUk9HeGlGaFNhSl83TlUzLTVZWW9zYVZZTTZzSjNfd3JkVDNaZVp4dk1GQVMxblJBRW1BWUZLU1VKUFkyQ1dPbndUNjYwdll2U0JxN1FQNk5OaGVYR3U5TXdGNGFVZGVXcS1tS2dwbVc1V3hEeXhVNkJ2cjdGX2FpY1NvOTJhcWFyOUVGOFpOdmd0R29Rb2RIaU01R05LeWRxUE00WlhOQVlMbkZxZDNyUFRXdUFGZ0lOUmp2RzIyaDlzMGxNQk40VzFzYjAwMEhjRVlrNWJ5cFhpVWYxQkxYQ25rUDJ3RTY1VlVFLThiNG1nY1hkdnZTMGoyVlN6dkJleFhndDNCODhlOVl1ZHBkci1hd3l0NGNXeWZ6aUp4S3pHS1c4aDM3WElBTjBwYlNSbmJoMk5SNF81VVNqd0dXY1JUejVsZnpGS1Z5dHFPNUVVM1I5eGhjblZjMV9idFJkc3NZaUdHRlIzQWJQdHhzT01qVW8xUUwxNHZmY3Q1aHBnNHhXTGRjb1BmTmM2X0NmdkpxNS1JMHNQNVg1N0xsd0pmdE8wNktkUGpuX0F3LURyaGhyajg3eWNDdkozUFZIYmpJTTZ3WWVCVFZUd1AtRklFUUxTNXkzalpfdlc4VE1tOHU1Q0MtUWdLbEdYRzdVU1RkM3gyeEY3eXBWLUhXVVo4VkZoUHVkakJPNk0tNTJKTU1JZjVISlR3SmJBQkVhRW51UHg3UjBOMVRPRnF2dzIwRkgxczBBUWZpemFFMzFTeDJfWHZhSkhsTzBhcFIzVmZRODEzRUl1b1ZDUGFqYUxjN2JsbkhYdHVPT00yYlUwbmpVbkU0RkJXbWx5UVFJdHNvNUdxQzMyQnQycDJpMjlnd2xwb3huRUJiZUg5dkhaMjhMV2R5T0NsU0N4WjdBX2ZfODhOdTZOZ0x6WlRIUGI3MzR1ZkJicHN6NzUzRzlsUmVkNlR6MjZjTTA3c290Qzh4ejRiWERHbmFtV1BQV2ZKb2pGU0F1OGsySG9hNHdtSkkxTWpwV2gyaVpWcFpsRWs5a0hSY3UzMk4wQ0dkZWtMbG4xOFZ6TXdEOXBob3I0NjNkT28tZk5IcW5FUkg4YnBtUVFLY1Q5M1lzYzhrRGZOaDF6SnpnejRuM1Y3SW1xMUJmLXpJdEM0UjNHU0t5OEhoamxxLXRmWmtyOS1ud09XeGFzc3VFXzNPWWNGcXFwdHN2cVFEZ0dWdUNKbF9Lc3d6dVhPb3NLMlNEaW1xd3JPLUViYV9GTnNRPT0= diff --git a/env_int.env b/env_int.env index 416a5b9e..d6d1be1d 100644 --- a/env_int.env +++ b/env_int.env @@ -4,6 +4,7 @@ APP_ENV_TYPE = int APP_ENV_LABEL = Integration Instance APP_API_URL = https://gateway-int.poweron-center.net +APP_KEY_SYSVAR = CONFIG_KEY # PostgreSQL Storage (new) DB_APP_HOST=gateway-int-server.postgres.database.azure.com @@ -27,7 +28,7 @@ DB_MANAGEMENT_PASSWORD_SECRET=VkAjgECESbEVQ$Tu DB_MANAGEMENT_PORT=5432 # Security Configuration -APP_JWT_SECRET_SECRET=rotated_jwt_secret_2025_09_17_2c5f8e7a-1b3d-49c7-ae5d-9f0a2c3d4b5e +APP_JWT_KEY_SECRET=rotated_jwt_secret_2025_09_17_2c5f8e7a-1b3d-49c7-ae5d-9f0a2c3d4b5e APP_TOKEN_EXPIRY=300 # CORS Configuration @@ -35,7 +36,7 @@ APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net, # Logging configuration APP_LOGGING_LOG_LEVEL = DEBUG -APP_LOGGING_LOG_FILE = /home/site/wwwroot/poweron.log +APP_LOGGING_LOG_DIR = /home/site/wwwroot/ APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S APP_LOGGING_CONSOLE_ENABLED = True @@ -46,3 +47,45 @@ APP_LOGGING_BACKUP_COUNT = 5 # Service Redirects Service_MSFT_REDIRECT_URI = https://gateway-int.poweron-center.net/api/msft/auth/callback Service_GOOGLE_REDIRECT_URI = https://gateway-int.poweron-center.net/api/google/auth/callback + + +# OpenAI configuration +Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions +Connector_AiOpenai_API_SECRET = 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 +Connector_AiOpenai_MODEL_NAME = gpt-4o +Connector_AiOpenai_TEMPERATURE = 0.2 +Connector_AiOpenai_MAX_TOKENS = 2000 + +# Anthropic configuration +Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages +Connector_AiAnthropic_API_SECRET = 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 +Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022 +Connector_AiAnthropic_TEMPERATURE = 0.2 +Connector_AiAnthropic_MAX_TOKENS = 2000 + +# Agent Mail configuration +Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c +Service_MSFT_CLIENT_SECRET = Kxf8Q~2lJIteZ~JaI32kMf1lfaWKATqxXiNiFbzV +Service_MSFT_TENANT_ID = common + +# Google Service configuration +Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com +Service_GOOGLE_CLIENT_SECRET = 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 + +# Tavily Web Search configuration +Connector_WebTavily_API_KEY_SECRET = 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 + +# Google Cloud Speech Services configuration +Connector_GoogleSpeech_API_KEY_SECRET = { + "type": "service_account", + "project_id": "poweronid", + "private_key_id": "88db66e4248326e9baeac4231bc196fd46a9a441", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDTnJuxA+xBL3LA\nPgFILYCsGuppkkdO6d153Q36f2jTj6zpH3OhKMVsaaTBknG2o2+D0Whlk6Yh5rOw\nkWzpMC3y81leRLm5kucERMkBUgd2GL4v16k6m+QGuC3BFlt/XeyuckJNW0V6v/Dy\n3+bSYM7/5o1ftPNWJeAIEWoE/V4wKCYde8RE4Vp1LO5YwhgcM4rRuPmF2OhekpA+\npteYwkY/8/gTTRpZIc8OTsBYRbaMwsjoDj5riuL3boVtkwZwKRb+ZLvupXeU7Ds7\n1305odTcZUwnImHiHfuq83ZJViQiLRNhUAFnQIXPrYLwEpCmzRBGzYHaRlb69ga/\nzqUbKnclAgMBAAECggEAH6W9qHehubioPMAJM7Y6bC2KU/JLNS4csBZd+idb52gG\nwBwIEFjR+H4ZjymhAA4+pe7c4h7MKyh0RI/l7eoFX98Cb+rEq/r1udm1BhGH3s2h\n2UiI8qRQh1YRjF2/nrN5VjhDBOFa6W9opaopZy/l8AzsT8f21zIgPen8z8o6GpFg\n64fJFcbqCGk2ykN2+x2pIOT04tmCszrfbXZP8LEs4xrUB/XwlHL1vT/M3EWIKbnj\njDaIMjw7q/KRgNUvmKS6SU9b3fnOLcQCz9f5cKdiWACKIU/UvuiWhWJ9ou6BWLWU\nva1A6Fi4XJjhW7s3po58/ioQfl0A9p/L92lGg4ST8QKBgQDx8LIM1g0dh9Ql6LmH\nBUGCOewNNXTs+y3ZznUfvVMoyyZK5w/pzeUvkmOwzbRGnZJ9WyCghq8aezyEpo2D\nPL7Odf988IeHmvhyZIM4PLJYgDvSwGXyf/gh6gJkf/4wpx+tx/yQYNBm3Rht7sA0\npSaLehK0E0kW1uyBzHGKgyQOhwKBgQDf6LiZ7hSQqh54vIU1XMDRth0UOo/s/HGi\nDoij29KjmHjLkm8vOlCo83e79X0WhcnyB5kM7nWFegwcM1PJ0Dl8gidUuTlOVDtM\n5u2AaxDoyXAUL457U5dGFAIW+R653ZDkzMfCglacP8HixXEyIpL1cTLqiCAgzszS\nLcSWwoAr8wKBgQC4CGm3X97sFpTmHSd6sCHLaDnJNl9xoAKZifUHpqCqCBVhpm8x\nXp+11vmj1GULzfJPDlE8Khbp4tH+6R39tOhC7fjgVaoSGWxgv1odHfZfYXOf9R/X\nHUZmrbUSM1XsNkPfkZ7pR+teQ1HA1Xo40WMHd1zgw0a2a9fNR/EZ9nUn4wKBgGaK\nUEgGNRrPHadTRnnaoV8o1IZYD2OLdIqvtzm7SOqsv90SkaKCRUAqR5InaYKwAHy7\nqAa5Cc73xqX/h4arujff7x0ouiq5/nJIa0ndPmAtKAvGf6zQ6j0ompBkxAKAioON\nmInmYL2roSI2I5G/LagDkDrB3lzH+Brk5NvZ9RKrAoGAGox462GGGb/NbGdDkahN\ndifzYYvq4FPiWFFo0ynKAulxCBWLXO/N45XNuAyen433d8eREcAYz1Dzax44+MdQ\nHo9dU7YcZvFyt6iZsYeQF8dluHui3vzMpUe0KbqpZC5KMOSw53ZdNIwzo8NTAK59\n+uv3dHGj7sS8fhDo3yCifzc=\n-----END PRIVATE KEY-----\n", + "client_email": "poweron-voice-services@poweronid.iam.gserviceaccount.com", + "client_id": "116641749406798186404", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/poweron-voice-services%40poweronid.iam.gserviceaccount.com", + "universe_domain": "googleapis.com" +} diff --git a/env_prod.env b/env_prod.env index 9410cea6..ab046d57 100644 --- a/env_prod.env +++ b/env_prod.env @@ -4,6 +4,7 @@ APP_ENV_TYPE = prod APP_ENV_LABEL = Production Instance APP_API_URL = https://gateway.poweron-center.net +APP_KEY_SYSVAR = CONFIG_KEY # PostgreSQL Storage (new) DB_APP_HOST=gateway-prod-server.postgres.database.azure.com @@ -27,7 +28,7 @@ DB_MANAGEMENT_PASSWORD_SECRET=prod_password_very_secure.2025 DB_MANAGEMENT_PORT=5432 # Security Configuration -APP_JWT_SECRET_SECRET=rotated_jwt_secret_2025_09_17_prod_e1a9c4d7-6b8f-4f2e-9c1a-7e3d2a1b9c5f +APP_JWT_KEY_SECRET=rotated_jwt_secret_2025_09_17_prod_e1a9c4d7-6b8f-4f2e-9c1a-7e3d2a1b9c5f APP_TOKEN_EXPIRY=300 # CORS Configuration @@ -35,7 +36,7 @@ APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net, # Logging configuration APP_LOGGING_LOG_LEVEL = DEBUG -APP_LOGGING_LOG_FILE = /home/site/wwwroot/poweron.log +APP_LOGGING_LOG_DIR = /home/site/wwwroot/ APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S APP_LOGGING_CONSOLE_ENABLED = True @@ -46,3 +47,44 @@ APP_LOGGING_BACKUP_COUNT = 5 # Service Redirects Service_MSFT_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/msft/auth/callback Service_GOOGLE_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/google/auth/callback + +# OpenAI configuration +Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions +Connector_AiOpenai_API_SECRET = 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 +Connector_AiOpenai_MODEL_NAME = gpt-4o +Connector_AiOpenai_TEMPERATURE = 0.2 +Connector_AiOpenai_MAX_TOKENS = 2000 + +# Anthropic configuration +Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages +Connector_AiAnthropic_API_SECRET = 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 +Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022 +Connector_AiAnthropic_TEMPERATURE = 0.2 +Connector_AiAnthropic_MAX_TOKENS = 2000 + +# Agent Mail configuration +Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c +Service_MSFT_CLIENT_SECRET = Kxf8Q~2lJIteZ~JaI32kMf1lfaWKATqxXiNiFbzV +Service_MSFT_TENANT_ID = common + +# Google Service configuration +Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com +Service_GOOGLE_CLIENT_SECRET = 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 + +# Tavily Web Search configuration +Connector_WebTavily_API_KEY_SECRET = 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 + +# Google Cloud Speech Services configuration +Connector_GoogleSpeech_API_KEY_SECRET = { + "type": "service_account", + "project_id": "poweronid", + "private_key_id": "88db66e4248326e9baeac4231bc196fd46a9a441", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDTnJuxA+xBL3LA\nPgFILYCsGuppkkdO6d153Q36f2jTj6zpH3OhKMVsaaTBknG2o2+D0Whlk6Yh5rOw\nkWzpMC3y81leRLm5kucERMkBUgd2GL4v16k6m+QGuC3BFlt/XeyuckJNW0V6v/Dy\n3+bSYM7/5o1ftPNWJeAIEWoE/V4wKCYde8RE4Vp1LO5YwhgcM4rRuPmF2OhekpA+\npteYwkY/8/gTTRpZIc8OTsBYRbaMwsjoDj5riuL3boVtkwZwKRb+ZLvupXeU7Ds7\n1305odTcZUwnImHiHfuq83ZJViQiLRNhUAFnQIXPrYLwEpCmzRBGzYHaRlb69ga/\nzqUbKnclAgMBAAECggEAH6W9qHehubioPMAJM7Y6bC2KU/JLNS4csBZd+idb52gG\nwBwIEFjR+H4ZjymhAA4+pe7c4h7MKyh0RI/l7eoFX98Cb+rEq/r1udm1BhGH3s2h\n2UiI8qRQh1YRjF2/nrN5VjhDBOFa6W9opaopZy/l8AzsT8f21zIgPen8z8o6GpFg\n64fJFcbqCGk2ykN2+x2pIOT04tmCszrfbXZP8LEs4xrUB/XwlHL1vT/M3EWIKbnj\njDaIMjw7q/KRgNUvmKS6SU9b3fnOLcQCz9f5cKdiWACKIU/UvuiWhWJ9ou6BWLWU\nva1A6Fi4XJjhW7s3po58/ioQfl0A9p/L92lGg4ST8QKBgQDx8LIM1g0dh9Ql6LmH\nBUGCOewNNXTs+y3ZznUfvVMoyyZK5w/pzeUvkmOwzbRGnZJ9WyCghq8aezyEpo2D\nPL7Odf988IeHmvhyZIM4PLJYgDvSwGXyf/gh6gJkf/4wpx+tx/yQYNBm3Rht7sA0\npSaLehK0E0kW1uyBzHGKgyQOhwKBgQDf6LiZ7hSQqh54vIU1XMDRth0UOo/s/HGi\nDoij29KjmHjLkm8vOlCo83e79X0WhcnyB5kM7nWFegwcM1PJ0Dl8gidUuTlOVDtM\n5u2AaxDoyXAUL457U5dGFAIW+R653ZDkzMfCglacP8HixXEyIpL1cTLqiCAgzszS\nLcSWwoAr8wKBgQC4CGm3X97sFpTmHSd6sCHLaDnJNl9xoAKZifUHpqCqCBVhpm8x\nXp+11vmj1GULzfJPDlE8Khbp4tH+6R39tOhC7fjgVaoSGWxgv1odHfZfYXOf9R/X\nHUZmrbUSM1XsNkPfkZ7pR+teQ1HA1Xo40WMHd1zgw0a2a9fNR/EZ9nUn4wKBgGaK\nUEgGNRrPHadTRnnaoV8o1IZYD2OLdIqvtzm7SOqsv90SkaKCRUAqR5InaYKwAHy7\nqAa5Cc73xqX/h4arujff7x0ouiq5/nJIa0ndPmAtKAvGf6zQ6j0ompBkxAKAioON\nmInmYL2roSI2I5G/LagDkDrB3lzH+Brk5NvZ9RKrAoGAGox462GGGb/NbGdDkahN\ndifzYYvq4FPiWFFo0ynKAulxCBWLXO/N45XNuAyen433d8eREcAYz1Dzax44+MdQ\nHo9dU7YcZvFyt6iZsYeQF8dluHui3vzMpUe0KbqpZC5KMOSw53ZdNIwzo8NTAK59\n+uv3dHGj7sS8fhDo3yCifzc=\n-----END PRIVATE KEY-----\n", + "client_email": "poweron-voice-services@poweronid.iam.gserviceaccount.com", + "client_id": "116641749406798186404", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/poweron-voice-services%40poweronid.iam.gserviceaccount.com", + "universe_domain": "googleapis.com" +} diff --git a/modules/chat/documents/documentExtraction.py b/modules/chat/documents/documentExtraction.py index a304cbe3..b6165b9e 100644 --- a/modules/chat/documents/documentExtraction.py +++ b/modules/chat/documents/documentExtraction.py @@ -9,7 +9,7 @@ from pathlib import Path import xml.etree.ElementTree as ET from bs4 import BeautifulSoup import uuid -from .documentUtility import ( +from modules.chat.documents.documentUtility import ( getFileExtension, getMimeTypeFromExtension, detectMimeTypeFromContent, diff --git a/modules/chat/documents/documentGeneration.py b/modules/chat/documents/documentGeneration.py index a5a9ae59..2d844ed3 100644 --- a/modules/chat/documents/documentGeneration.py +++ b/modules/chat/documents/documentGeneration.py @@ -3,7 +3,7 @@ from typing import Any, Dict, List, Optional from datetime import datetime, UTC import re from modules.shared.timezoneUtils import get_utc_timestamp -from .documentUtility import ( +from modules.chat.documents.documentUtility import ( getFileExtension, getMimeTypeFromExtension, detectMimeTypeFromContent, diff --git a/modules/chat/handling/handlingTasks.py b/modules/chat/handling/handlingTasks.py index 49d0b97c..98cca8bb 100644 --- a/modules/chat/handling/handlingTasks.py +++ b/modules/chat/handling/handlingTasks.py @@ -10,9 +10,10 @@ from datetime import datetime, UTC from modules.interfaces.interfaceChatModel import ( TaskStatus, TaskStep, TaskContext, TaskAction, ReviewResult, TaskPlan, WorkflowResult, TaskResult, ReviewContext, ActionResult ) +from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects from modules.shared.timezoneUtils import get_utc_timestamp -from .executionState import TaskExecutionState -from .promptFactory import ( +from modules.chat.handling.executionState import TaskExecutionState +from modules.chat.handling.promptFactory import ( createTaskPlanningPrompt, createActionDefinitionPrompt, createResultReviewPrompt @@ -27,11 +28,13 @@ class WorkflowStoppedException(Exception): pass class HandlingTasks: - def __init__(self, chatInterface, service, workflow=None): + def __init__(self, chatInterface, currentUser, workflow=None): self.chatInterface = chatInterface - self.service = service + self.currentUser = currentUser self.workflow = workflow - self.documentGenerator = DocumentGenerator(service) + from modules.chat.serviceCenter import ServiceCenter + self.service = ServiceCenter(currentUser, workflow) + self.documentGenerator = DocumentGenerator(self.service) def _checkWorkflowStopped(self): """ @@ -63,7 +66,6 @@ class HandlingTasks: logger.info(f"=== STARTING TASK PLAN GENERATION ===") logger.info(f"Workflow ID: {workflow.id}") logger.info(f"User Input: {userInput}") - available_docs = self.service.getAvailableDocuments(workflow) # Check workflow status before calling AI service self._checkWorkflowStopped() @@ -83,8 +85,8 @@ class HandlingTasks: task_step=planning_task_step, workflow=workflow, workflow_id=workflow.id, - available_documents=available_docs, - available_connections=[], + available_documents=None, + available_connections=None, previous_results=[], previous_handover=None, improvements=[], @@ -105,10 +107,10 @@ class HandlingTasks: # Generate the task planning prompt task_planning_prompt = createTaskPlanningPrompt(task_planning_context, self.service) - # Log the full task planning prompt being sent to AI for debugging + # Log task planning prompt sent to AI logger.info("=== TASK PLANNING PROMPT SENT TO AI ===") - logger.info(f"User Input: {userInput}") - logger.info(f"Available Documents: {available_docs}") + # Trace task planning prompt + self.service.writeTraceLog("Task Plan Prompt", task_planning_prompt) prompt = await self.service.callAiTextAdvanced(task_planning_prompt) @@ -116,12 +118,11 @@ class HandlingTasks: if not prompt: raise ValueError("AI service returned no response for task planning") - # Log the full AI response for task planning + # Log task planning response received logger.info("=== TASK PLANNING AI RESPONSE RECEIVED ===") logger.info(f"Response length: {len(prompt) if prompt else 0}") - logger.debug("=== FULL TASK PLANNING AI RESPONSE ===") - logger.debug(prompt) - logger.debug("=== END TASK PLANNING AI RESPONSE ===") + # Trace task planning response + self.service.writeTraceLog("Task Plan Response", prompt) # Inline _parseTaskPlanResponse logic try: @@ -297,27 +298,15 @@ class HandlingTasks: if enhanced_context and enhanced_context.retry_count > 0: logger.info("=== RETRY CONTEXT FOR ACTION GENERATION ===") logger.info(f"Retry Count: {enhanced_context.retry_count}") - logger.info(f"Previous Improvements: {enhanced_context.improvements}") - logger.info(f"Previous Review Result: {enhanced_context.previous_review_result}") - logger.info(f"Failure Patterns: {enhanced_context.failure_patterns}") - logger.info(f"Failed Actions: {enhanced_context.failed_actions}") - logger.info(f"Successful Actions: {enhanced_context.successful_actions}") + logger.debug(f"Previous Improvements: {enhanced_context.improvements}") + logger.debug(f"Previous Review Result: {enhanced_context.previous_review_result}") + logger.debug(f"Failure Patterns: {enhanced_context.failure_patterns}") + logger.debug(f"Failed Actions: {enhanced_context.failed_actions}") + logger.debug(f"Successful Actions: {enhanced_context.successful_actions}") logger.info("=== END RETRY CONTEXT ===") - available_docs = self.service.getAvailableDocuments(workflow) - available_connections = self.service.getConnectionReferenceList() - - # Log available resources for debugging - logger.info("=== AVAILABLE RESOURCES FOR ACTION GENERATION ===") - logger.info(f"Available Documents: {available_docs}") - # Note: available_docs is now a string description, not a list - logger.info(f"Available Connections: {len(available_connections) if available_connections else 0}") - if available_connections: - for i, conn in enumerate(available_connections[:5]): # Show first 5 - logger.info(f" Conn {i+1}: {conn}") - if len(available_connections) > 5: - logger.info(f" ... and {len(available_connections) - 5} more connections") - logger.info("=== END AVAILABLE RESOURCES ===") + # Log that we're starting action generation + logger.info("=== STARTING ACTION GENERATION ===") # Create proper context object for action definition if enhanced_context and isinstance(enhanced_context, TaskContext): @@ -326,8 +315,8 @@ class HandlingTasks: task_step=enhanced_context.task_step, workflow=enhanced_context.workflow, workflow_id=enhanced_context.workflow_id, - available_documents=enhanced_context.available_documents or available_docs, - available_connections=enhanced_context.available_connections or available_connections, + available_documents=enhanced_context.available_documents, + available_connections=enhanced_context.available_connections, previous_results=enhanced_context.previous_results or previous_results or [], previous_handover=enhanced_context.previous_handover, improvements=enhanced_context.improvements or [], @@ -346,8 +335,8 @@ class HandlingTasks: task_step=task_step, workflow=workflow, workflow_id=workflow.id, - available_documents=available_docs, - available_connections=available_connections, + available_documents=None, + available_connections=None, previous_results=previous_results or [], previous_handover=None, improvements=[], @@ -364,30 +353,22 @@ class HandlingTasks: # Check workflow status before calling AI service self._checkWorkflowStopped() - # Log the final action context being sent to AI - logger.info("=== FINAL ACTION CONTEXT FOR AI ===") - logger.info(f"Task Step ID: {action_context.task_step.id if action_context.task_step else 'None'}") - logger.info(f"Task Step Objective: {action_context.task_step.objective if action_context.task_step else 'None'}") - logger.info(f"Workflow ID: {action_context.workflow_id}") - logger.info(f"Available Documents: {action_context.available_documents or 'No documents available'}") - logger.info(f"Available Connections Count: {len(action_context.available_connections) if action_context.available_connections else 0}") - logger.info(f"Previous Results Count: {len(action_context.previous_results) if action_context.previous_results else 0}") - logger.info(f"Retry Count: {action_context.retry_count}") - logger.info(f"Is Regeneration: {action_context.is_regeneration}") - logger.info("=== END ACTION CONTEXT ===") - # Generate the action definition prompt action_prompt = await createActionDefinitionPrompt(action_context, self.service) + # Trace action planning prompt + self.service.writeTraceLog("Action Plan Prompt", action_prompt) + prompt = await self.service.callAiTextAdvanced(action_prompt) # Check if AI response is valid if not prompt: raise ValueError("AI service returned no response") - # Log the full AI response for debugging - logger.debug("=== FULL AI RESPONSE ===") - logger.debug(prompt) - logger.debug("=== END AI RESPONSE ===") + # Log action response received + logger.info("=== ACTION PLAN AI RESPONSE RECEIVED ===") + logger.info(f"Response length: {len(prompt) if prompt else 0}") + # Trace action planning response + self.service.writeTraceLog("Action Plan Response", prompt) # Inline parseActionResponse logic here json_start = prompt.find('{') @@ -875,23 +856,21 @@ class HandlingTasks: # Use promptFactory for review prompt prompt = createResultReviewPrompt(review_context, self.service) - # Log the full result review prompt being sent to AI for debugging + # Log result review prompt sent to AI logger.info("=== RESULT REVIEW PROMPT SENT TO AI ===") logger.info(f"Task: {task_step.objective}") logger.info(f"Action Results Count: {len(review_context.action_results) if review_context.action_results else 0}") logger.info(f"Task Actions Count: {len(review_context.task_actions) if review_context.task_actions else 0}") - logger.info("=== FULL RESULT REVIEW PROMPT ===") - logger.info(prompt) - logger.info("=== END RESULT REVIEW PROMPT ===") + # Trace result review prompt + self.service.writeTraceLog("Result Review Prompt", prompt) response = await self.service.callAiTextAdvanced(prompt) - # Log the full AI response for result review + # Log result review response received logger.info("=== RESULT REVIEW AI RESPONSE RECEIVED ===") logger.info(f"Response length: {len(response) if response else 0}") - logger.debug("=== FULL RESULT REVIEW AI RESPONSE ===") - logger.debug(response) - logger.debug("=== END RESULT REVIEW AI RESPONSE ===") + # Trace result review response + self.service.writeTraceLog("Result Review Response", response) # Inline parseReviewResponse logic here json_start = response.find('{') @@ -1095,6 +1074,17 @@ class HandlingTasks: ) result_label = action.execResultLabel + # Trace action result (without document data) + action_result_trace = { + "method": action.execMethod, + "action": action.execAction, + "success": result.success, + "error": result.error, + "resultLabel": result_label, + "documentsCount": len(result.documents) if result.documents else 0 + } + self.service.writeTraceLog("Action Result", action_result_trace) + # Process documents from the action result created_documents = [] if result.success: diff --git a/modules/chat/handling/promptFactory.py b/modules/chat/handling/promptFactory.py index ada386ba..2890e7bf 100644 --- a/modules/chat/handling/promptFactory.py +++ b/modules/chat/handling/promptFactory.py @@ -3,14 +3,68 @@ import json import logging -from typing import Any, Dict +from typing import Any, Dict, List from modules.interfaces.interfaceChatModel import TaskContext, ReviewContext +from modules.chat.documents.documentUtility import getFileExtension # Set up logger logger = logging.getLogger(__name__) # Prompt creation helpers extracted from managerChat.py +def _getAvailableDocuments(workflow) -> str: + """ + Get simple description of available documents for task planning. + + Args: + workflow: ChatWorkflow object + + Returns: + str: Simple description of document availability + """ + total_documents = 0 + document_types = set() + + for message in workflow.messages: + if message.documents: + total_documents += len(message.documents) + for doc in message.documents: + try: + file_extension = getFileExtension(doc.fileName) + if file_extension: + document_types.add(file_extension.upper()) + except: + pass + + if total_documents == 0: + return "No documents available" + elif len(document_types) == 0: + return f"{total_documents} document(s) available" + else: + types_str = ", ".join(sorted(document_types)) + return f"{total_documents} document(s) available ({types_str} files)" + +def _getConnectionReferenceList(service) -> List[str]: + """Get list of all UserConnection objects as references with enhanced state information""" + connections = [] + # Get user connections through AppObjects interface + user_connections = service.interfaceApp.getUserConnections(service.user.id) + + refreshed_count = 0 + for conn in user_connections: + # Get enhanced connection reference with state information + enhanced_ref = service.getConnectionReferenceFromUserConnection(conn) + connections.append(enhanced_ref) + + # Count refreshed tokens + if "refreshed" in enhanced_ref: + refreshed_count += 1 + + # Sort by connection reference + if refreshed_count > 0: + logger.info(f"Refreshed {refreshed_count} connection tokens while building action planning prompt") + return sorted(connections) + def _getPreviousRoundContext(service, workflow) -> str: """Get context from previous workflow rounds to help understand follow-up prompts""" try: @@ -98,8 +152,8 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str: # Extract user request from context - use Pydantic model directly user_request = context.task_step.objective if context.task_step else 'No request specified' - # Extract available documents from context - use Pydantic model directly - available_documents = context.available_documents or "No documents available" + # Get available documents using generic function + available_documents = _getAvailableDocuments(context.workflow) if context.workflow else "No documents available" # Get previous workflow round context for better understanding of follow-up prompts previous_round_context = _getPreviousRoundContext(service, context.workflow) @@ -226,7 +280,9 @@ async def createActionDefinitionPrompt(context: TaskContext, service) -> str: # Get enhanced document context using the new method available_documents_str = service.getEnhancedDocumentContext() - connRefs = service.getConnectionReferenceList() + # Get available documents and connections using generic functions + available_docs_summary = _getAvailableDocuments(context.workflow) + connRefs = _getConnectionReferenceList(service) # Create a structured JSON format for better AI parsing # This replaces the old hard-to-read format with a clean JSON structure @@ -603,7 +659,8 @@ IMPORTANT NOTES: - Always include a user-friendly userMessage for each action in the user's language ({user_language}). - The examples above show German user messages as reference - adapt the language to match the USER LANGUAGE specified above.""" - logging.debug(f"[ACTION PLAN PROMPT] Enhanced Document Context:\n{available_documents_str}\nUser Connections Section:\n{available_connections_str}\nAvailable Methods (detailed):\n{available_methods_str}") + # Removed sensitive data from debug logging + logging.debug(f"[ACTION PLAN PROMPT] Document context and methods prepared") return prompt diff --git a/modules/chat/managerChat.py b/modules/chat/managerChat.py index 6be21739..882d46e3 100644 --- a/modules/chat/managerChat.py +++ b/modules/chat/managerChat.py @@ -2,9 +2,8 @@ import logging from typing import Dict, Any, List from modules.interfaces.interfaceAppModel import User from modules.interfaces.interfaceChatModel import ChatWorkflow, UserInputRequest, TaskStep, TaskAction, ActionResult, ReviewResult, TaskPlan, WorkflowResult, TaskContext -from modules.chat.serviceCenter import ServiceCenter from modules.interfaces.interfaceChatObjects import ChatObjects -from .handling.handlingTasks import HandlingTasks, WorkflowStoppedException +from modules.chat.handling.handlingTasks import HandlingTasks, WorkflowStoppedException logger = logging.getLogger(__name__) @@ -16,21 +15,19 @@ class ChatManager: def __init__(self, currentUser: User, chatInterface: ChatObjects): self.currentUser = currentUser self.chatInterface = chatInterface - self.service: ServiceCenter = None self.workflow: ChatWorkflow = None self.handlingTasks: HandlingTasks = None async def initialize(self, workflow: ChatWorkflow) -> None: """Initialize chat manager with workflow""" self.workflow = workflow - self.service = ServiceCenter(self.currentUser, self.workflow) - self.handlingTasks = HandlingTasks(self.chatInterface, self.service, self.workflow) + self.handlingTasks = HandlingTasks(self.chatInterface, self.currentUser, self.workflow) + async def executeUnifiedWorkflow(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> WorkflowResult: """Unified Workflow Execution""" try: logger.info(f"Starting unified workflow execution for workflow {workflow.id}") - logger.debug(f"User request: {userInput.prompt}") # Phase 1: High-Level Task Planning logger.info("Phase 1: Generating task plan") @@ -54,8 +51,8 @@ class ChatManager: task_step=task_step, workflow=workflow, workflow_id=workflow.id, - available_documents=self.service.getAvailableDocuments(workflow), - available_connections=self.service.getConnectionReferenceList(), + available_documents=None, + available_connections=None, previous_results=previous_results, previous_handover=None, improvements=[], diff --git a/modules/chat/serviceCenter.py b/modules/chat/serviceCenter.py index 9160a3ae..55648ead 100644 --- a/modules/chat/serviceCenter.py +++ b/modules/chat/serviceCenter.py @@ -15,7 +15,7 @@ from modules.interfaces.interfaceComponentObjects import getInterface as getComp from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects from modules.chat.documents.documentExtraction import DocumentExtraction from modules.chat.documents.documentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData -from modules.chat.methodBase import MethodBase +from modules.methods.methodBase import MethodBase from modules.shared.timezoneUtils import get_utc_timestamp import uuid @@ -584,38 +584,7 @@ Please provide a comprehensive summary of this conversation.""" logger.error(f"Error getting documents from document list: {str(e)}") return [] - # ===== Functions for Prompts + Actions: Connection References generation and resolution ===== - - def getConnectionReferenceList(self) -> List[str]: - """Get list of all UserConnection objects as references with enhanced state information""" - connections = [] - # Get user connections through AppObjects interface - logger.debug(f"getConnectionReferenceList: Service center user ID: {self.user.id}") - logger.debug(f"getConnectionReferenceList: Service center user type: {type(self.user)}") - logger.debug(f"getConnectionReferenceList: Service center user object: {self.user}") - - user_connections = self.interfaceApp.getUserConnections(self.user.id) - logger.debug(f"getConnectionReferenceList: User ID: {self.user.id}") - logger.debug(f"getConnectionReferenceList: Raw user connections: {user_connections}") - logger.debug(f"getConnectionReferenceList: User connections type: {type(user_connections)}") - logger.debug(f"getConnectionReferenceList: User connections length: {len(user_connections) if user_connections else 0}") - - refreshed_count = 0 - for conn in user_connections: - # Get enhanced connection reference with state information - enhanced_ref = self.getConnectionReferenceFromUserConnection(conn) - logger.debug(f"getConnectionReferenceList: Enhanced ref for connection {conn.id}: {enhanced_ref}") - connections.append(enhanced_ref) - - # Count refreshed tokens - if "refreshed" in enhanced_ref: - refreshed_count += 1 - - # Sort by connection reference - logger.debug(f"getConnectionReferenceList: Final connections list: {connections}") - if refreshed_count > 0: - logger.info(f"Refreshed {refreshed_count} connection tokens while building action planning prompt") - return sorted(connections) + # ===== Functions for Prompts + Actions: Connection References generation and resolution ===== def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str: """Get connection reference from UserConnection with enhanced state information""" @@ -692,12 +661,12 @@ Please provide a comprehensive summary of this conversation.""" # Try advanced AI first, with retries for attempt in range(max_retries): try: - prompt_size = self.calculateObjectSize(prompt) + prompt_size = self._calculateObjectSize(prompt) if context: - prompt_size += self.calculateObjectSize(context) + prompt_size += self._calculateObjectSize(context) response = await self.interfaceAiCalls.callAiTextAdvanced(prompt, context) - response_size = self.calculateObjectSize(response) - self.updateWorkflowStats(eventLabel="aicall.anthropic.text", bytesSent=prompt_size, bytesReceived=response_size) + response_size = self._calculateObjectSize(response) + self._updateWorkflowStats(eventLabel="aicall.anthropic.text", bytesSent=prompt_size, bytesReceived=response_size) return response except Exception as e: last_error = e @@ -726,12 +695,12 @@ Please provide a comprehensive summary of this conversation.""" last_error = None for attempt in range(max_retries): try: - prompt_size = self.calculateObjectSize(prompt) + prompt_size = self._calculateObjectSize(prompt) if context: - prompt_size += self.calculateObjectSize(context) + prompt_size += self._calculateObjectSize(context) response = await self.interfaceAiCalls.callAiTextBasic(prompt, context) - response_size = self.calculateObjectSize(response) - self.updateWorkflowStats(eventLabel="aicall.openai.text", bytesSent=prompt_size, bytesReceived=response_size) + response_size = self._calculateObjectSize(response) + self._updateWorkflowStats(eventLabel="aicall.openai.text", bytesSent=prompt_size, bytesReceived=response_size) return response except Exception as e: last_error = e @@ -745,34 +714,34 @@ Please provide a comprehensive summary of this conversation.""" async def callAiImageBasic(self, prompt: str, imageData: str, mimeType: str) -> str: """Basic image processing using OpenAI""" # Calculate prompt size for stats - prompt_size = self.calculateObjectSize(prompt) - prompt_size += self.calculateObjectSize(imageData) + prompt_size = self._calculateObjectSize(prompt) + prompt_size += self._calculateObjectSize(imageData) # Call AI response = await self.interfaceAiCalls.callAiImageBasic(prompt, imageData, mimeType) # Calculate response size for stats - response_size = self.calculateObjectSize(response) + response_size = self._calculateObjectSize(response) # Update stats - self.updateWorkflowStats(eventLabel="aicall.openai.image", bytesSent=prompt_size, bytesReceived=response_size) + self._updateWorkflowStats(eventLabel="aicall.openai.image", bytesSent=prompt_size, bytesReceived=response_size) return response async def callAiImageAdvanced(self, prompt: str, imageData: str, mimeType: str) -> str: """Advanced image processing using Anthropic""" # Calculate prompt size for stats - prompt_size = self.calculateObjectSize(prompt) - prompt_size += self.calculateObjectSize(imageData) + prompt_size = self._calculateObjectSize(prompt) + prompt_size += self._calculateObjectSize(imageData) # Call AI response = await self.interfaceAiCalls.callAiImageAdvanced(prompt, imageData, mimeType) # Calculate response size for stats - response_size = self.calculateObjectSize(response) + response_size = self._calculateObjectSize(response) # Update stats - self.updateWorkflowStats(eventLabel="aicall.anthropic.image", bytesSent=prompt_size, bytesReceived=response_size) + self._updateWorkflowStats(eventLabel="aicall.anthropic.image", bytesSent=prompt_size, bytesReceived=response_size) return response @@ -957,9 +926,9 @@ Please provide a comprehensive summary of this conversation.""" return document - # ===== Internal public helper functions ===== + # ===== Internal helper functions ===== - def updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None: + def _updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None: """ Centralized function to update workflow statistics in database and running workflow. @@ -983,7 +952,7 @@ Please provide a comprehensive summary of this conversation.""" except Exception as e: logger.error(f"Error updating workflow stats: {str(e)}") - def calculateObjectSize(self, obj: Any) -> int: + def _calculateObjectSize(self, obj: Any) -> int: """ Calculate the size of an object in bytes. @@ -1008,38 +977,6 @@ Please provide a comprehensive summary of this conversation.""" logger.error(f"Error calculating object size: {str(e)}") return 0 - def getAvailableDocuments(self, workflow) -> str: - """ - Get simple description of available documents for task planning. - - Args: - workflow: ChatWorkflow object - - Returns: - str: Simple description of document availability - """ - total_documents = 0 - document_types = set() - - for message in workflow.messages: - if message.documents: - total_documents += len(message.documents) - for doc in message.documents: - try: - file_extension = getFileExtension(doc.fileName) - if file_extension: - document_types.add(file_extension.upper()) - except: - pass - - if total_documents == 0: - return "No documents available" - elif len(document_types) == 0: - return f"{total_documents} document(s) available" - else: - types_str = ", ".join(sorted(document_types)) - return f"{total_documents} document(s) available ({types_str} files)" - # ===== Functions for Manager: Execution Tools ===== async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult: @@ -1090,6 +1027,85 @@ Please provide a comprehensive summary of this conversation.""" """Set user language for the service center""" self.user.language = language + def writeTraceLog(self, contextText: str, data: Any) -> None: + """Write trace data to configured trace file if in debug mode""" + try: + import logging + import os + from datetime import datetime, UTC + from modules.shared.configuration import APP_CONFIG + + # Only write if logger is in debug mode + if logger.level > logging.DEBUG: + return + + # Get log directory from configuration + logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./") + if not os.path.isabs(logDir): + # If relative path, make it relative to the gateway directory + gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + logDir = os.path.join(gatewayDir, logDir) + + # Ensure log directory exists + os.makedirs(logDir, exist_ok=True) + + # Create trace file path + trace_file = os.path.join(logDir, "log_trace.log") + + # Format the trace entry + timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] + trace_entry = f"[{timestamp}] {contextText}\n" + + # Add data if provided + if data is not None: + if isinstance(data, (dict, list)): + import json + trace_entry += f"Data: {json.dumps(data, indent=2, default=str)}\n" + else: + trace_entry += f"Data: {str(data)}\n" + + trace_entry += "-" * 80 + "\n\n" + + # Write to trace file + with open(trace_file, "a", encoding="utf-8") as f: + f.write(trace_entry) + + except Exception as e: + # Don't log trace errors to avoid recursion + pass + + def clearTraceLog(self) -> None: + """Clear the trace log file""" + try: + import logging + import os + from modules.shared.configuration import APP_CONFIG + + # Get log directory from configuration + logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./") + if not os.path.isabs(logDir): + # If relative path, make it relative to the gateway directory + gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + logDir = os.path.join(gatewayDir, logDir) + + # Create trace file path + trace_file = os.path.join(logDir, "log_trace.log") + + # Only clear if logger is in debug mode + if logger.level > logging.DEBUG: + # Delete file if not in debug mode + if os.path.exists(trace_file): + os.remove(trace_file) + return + + # Create empty file if in debug mode + with open(trace_file, "w", encoding="utf-8") as f: + f.write("") + + except Exception as e: + # Don't log trace errors to avoid recursion + pass + # ===== Functions for Manager: Workflow Tools ===== def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None): diff --git a/modules/connectors/connectorGoogleSpeech.py b/modules/connectors/connectorGoogleSpeech.py index 1243b70e..ed547b2b 100644 --- a/modules/connectors/connectorGoogleSpeech.py +++ b/modules/connectors/connectorGoogleSpeech.py @@ -29,10 +29,10 @@ class ConnectorGoogleSpeech: """ try: # Get JSON key from config.ini - api_key = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY") + api_key = APP_CONFIG.get("Connector_GoogleSpeech_API_KEY_SECRET") if not api_key or api_key == "YOUR_GOOGLE_SERVICE_ACCOUNT_JSON_KEY_HERE": - raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY in config.ini with the full service account JSON key") + raise ValueError("Google Speech API key not configured. Please set Connector_GoogleSpeech_API_KEY_SECRET in config.ini with the full service account JSON key") # Parse the JSON key and set up authentication try: diff --git a/modules/connectors/connectorWebTavily.py b/modules/connectors/connectorWebTavily.py index 7a9ec038..97410493 100644 --- a/modules/connectors/connectorWebTavily.py +++ b/modules/connectors/connectorWebTavily.py @@ -69,9 +69,9 @@ class ConnectorTavily(WebSearchBase, WebCrawlBase, WebScrapeBase): @classmethod async def create(cls): - api_key = APP_CONFIG.get("Connector_WebTavily_API_KEY") + api_key = APP_CONFIG.get("Connector_WebTavily_API_KEY_SECRET") if not api_key: - raise ValueError("Tavily API key not configured. Please set Connector_WebTavily_API_KEY in config.ini") + raise ValueError("Tavily API key not configured. Please set Connector_WebTavily_API_KEY_SECRET in config.ini") return cls(client=AsyncTavilyClient(api_key=api_key)) async def search_urls(self, request: WebSearchRequest) -> WebSearchActionResult: diff --git a/modules/services/serviceValueonChat.py b/modules/features/featureChatPlayground.py similarity index 98% rename from modules/services/serviceValueonChat.py rename to modules/features/featureChatPlayground.py index 1d50b134..85d8c2d4 100644 --- a/modules/services/serviceValueonChat.py +++ b/modules/features/featureChatPlayground.py @@ -30,7 +30,7 @@ class WorkflowManager: await self.chatManager.initialize(workflow) # Set user language - self.chatManager.service.setUserLanguage(userInput.userLanguage) + self.chatManager.handlingTasks.service.setUserLanguage(userInput.userLanguage) # Send first message message = await self._sendFirstMessage(userInput, workflow) @@ -170,10 +170,13 @@ class WorkflowManager: if message: workflow.messages.append(message) + # Clear trace log for new workflow session + self.chatManager.handlingTasks.service.clearTraceLog() + # Add documents if any, now with messageId if userInput.listFileId: # Process file IDs and add to message data - documents = await self.chatManager.service.processFileIds(userInput.listFileId, message.id) + documents = await self.chatManager.handlingTasks.service.processFileIds(userInput.listFileId, message.id) message.documents = documents # Update the message with documents in database self.chatInterface.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]}) diff --git a/modules/services/serviceNeutralization.py b/modules/features/featureNeutralizePlayground.py similarity index 100% rename from modules/services/serviceNeutralization.py rename to modules/features/featureNeutralizePlayground.py diff --git a/modules/services/serviceDeltaSync.py b/modules/features/featureSyncDelta.py similarity index 100% rename from modules/services/serviceDeltaSync.py rename to modules/features/featureSyncDelta.py diff --git a/modules/interfaces/interfaceChatObjects.py b/modules/interfaces/interfaceChatObjects.py index 63cec9c7..7b6806da 100644 --- a/modules/interfaces/interfaceChatObjects.py +++ b/modules/interfaces/interfaceChatObjects.py @@ -1155,7 +1155,7 @@ class ChatObjects: # Remove the 'Workflow started' log entry # Start workflow processing - from modules.services.serviceValueonChat import WorkflowManager + from modules.features.featureChatPlayground import WorkflowManager workflowManager = WorkflowManager(self, currentUser) # Start the workflow processing asynchronously diff --git a/modules/methods/methodAi.py b/modules/methods/methodAi.py index e69decd9..eda36f69 100644 --- a/modules/methods/methodAi.py +++ b/modules/methods/methodAi.py @@ -7,7 +7,7 @@ import logging from typing import Dict, Any, List, Optional from datetime import datetime, UTC -from modules.chat.methodBase import MethodBase, action +from modules.methods.methodBase import MethodBase, action from modules.interfaces.interfaceChatModel import ActionResult from modules.shared.timezoneUtils import get_utc_timestamp diff --git a/modules/chat/methodBase.py b/modules/methods/methodBase.py similarity index 100% rename from modules/chat/methodBase.py rename to modules/methods/methodBase.py diff --git a/modules/methods/methodDocument.py b/modules/methods/methodDocument.py index 89e35a88..8cd3ac1c 100644 --- a/modules/methods/methodDocument.py +++ b/modules/methods/methodDocument.py @@ -9,7 +9,7 @@ import re from typing import Dict, Any, List, Optional from datetime import datetime, UTC -from modules.chat.methodBase import MethodBase, action +from modules.methods.methodBase import MethodBase, action from modules.interfaces.interfaceChatModel import ActionResult from modules.shared.timezoneUtils import get_utc_timestamp diff --git a/modules/methods/methodOutlook.py b/modules/methods/methodOutlook.py index 8a2b5b7d..23b17985 100644 --- a/modules/methods/methodOutlook.py +++ b/modules/methods/methodOutlook.py @@ -81,7 +81,7 @@ from datetime import datetime, UTC import json import uuid -from modules.chat.methodBase import MethodBase, action +from modules.methods.methodBase import MethodBase, action from modules.interfaces.interfaceChatModel import ActionResult from modules.interfaces.interfaceAppModel import ConnectionStatus from modules.shared.timezoneUtils import get_utc_timestamp diff --git a/modules/methods/methodSharepoint.py b/modules/methods/methodSharepoint.py index 21decac0..ca99c06a 100644 --- a/modules/methods/methodSharepoint.py +++ b/modules/methods/methodSharepoint.py @@ -13,7 +13,7 @@ from urllib.parse import urlparse import aiohttp import asyncio -from modules.chat.methodBase import MethodBase, action +from modules.methods.methodBase import MethodBase, action from modules.interfaces.interfaceChatModel import ActionResult from modules.shared.timezoneUtils import get_utc_timestamp diff --git a/modules/methods/methodWeb.py b/modules/methods/methodWeb.py index 96c597db..014498de 100644 --- a/modules/methods/methodWeb.py +++ b/modules/methods/methodWeb.py @@ -2,7 +2,7 @@ import logging import csv import io from typing import Any, Dict -from modules.chat.methodBase import MethodBase, action +from modules.methods.methodBase import MethodBase, action from modules.interfaces.interfaceChatModel import ActionResult, ActionDocument from modules.interfaces.interfaceWebObjects import WebInterface from modules.interfaces.interfaceWebModel import ( diff --git a/modules/neutralizer/neutralizer.py b/modules/neutralizer/neutralizer.py index 87427611..f8677465 100644 --- a/modules/neutralizer/neutralizer.py +++ b/modules/neutralizer/neutralizer.py @@ -4,481 +4,109 @@ Unterstützt TXT, JSON, CSV, Excel und Word-Dateien Mehrsprachig: DE, EN, FR, IT """ -import re -import json -import pandas as pd -import docx -from pathlib import Path -from typing import Dict, List, Tuple, Any, Union, Optional -from dataclasses import dataclass -import uuid import logging -import traceback -import csv -from datetime import datetime -import xml.etree.ElementTree as ET -import os -import random -from io import StringIO -from modules.neutralizer.patterns import Pattern, HeaderPatterns, DataPatterns, get_pattern_for_header, find_patterns_in_text, TextTablePatterns -import base64 +from typing import Dict, List, Any + +# Import all necessary classes and functions +from modules.neutralizer.subProcessCommon import ProcessResult, CommonUtils +from modules.neutralizer.subProcessText import TextProcessor, PlainText +from modules.neutralizer.subProcessList import ListProcessor, TableData +from modules.neutralizer.subProcessBinary import BinaryProcessor, BinaryData +from modules.neutralizer.subParseString import StringParser +from modules.neutralizer.subPatterns import Pattern, HeaderPatterns, DataPatterns, TextTablePatterns # Configure logging logger = logging.getLogger(__name__) -@dataclass -class TableData: - """Repräsentiert Tabellendaten""" - headers: List[str] - rows: List[List[str]] - source_type: str # 'csv', 'json', 'xml', 'text_table' - -@dataclass -class PlainText: - """Repräsentiert normalen Text""" - content: str - source_type: str # 'txt', 'docx', 'text_plain' - -@dataclass -class ProcessResult: - """Result of content processing""" - data: Any - mapping: Dict[str, str] - replaced_fields: List[str] - processed_info: Dict[str, Any] # Additional processing information +# Export all classes and functions for external use +__all__ = [ + 'DataAnonymizer', + 'ProcessResult', + 'CommonUtils', + 'TextProcessor', + 'PlainText', + 'ListProcessor', + 'TableData', + 'BinaryProcessor', + 'BinaryData', + 'StringParser', + 'Pattern', + 'HeaderPatterns', + 'DataPatterns', + 'TextTablePatterns' +] class DataAnonymizer: """Hauptklasse für die Datenanonymisierung""" def __init__(self, names_to_parse: List[str] = None): - """Initialize the anonymizer with patterns and custom names + """Initialize the anonymizer with specialized processors Args: names_to_parse: List of names to parse and replace (case-insensitive) """ - self.header_patterns = HeaderPatterns.patterns - self.data_patterns = DataPatterns.patterns self.names_to_parse = names_to_parse or [] - self.replaced_fields = set() - self.mapping = {} - self.processing_info = [] + + # Initialize specialized processors + self.text_processor = TextProcessor(names_to_parse) + self.list_processor = ListProcessor(names_to_parse) + self.binary_processor = BinaryProcessor() + + # Common utilities + self.common_utils = CommonUtils() - def _normalize_whitespace(self, text: str) -> str: - """Normalize whitespace in text""" - text = re.sub(r'\s+', ' ', text) - text = text.replace('\r\n', '\n').replace('\r', '\n') - return text.strip() - - - def _is_table_line(self, line: str) -> bool: - """Check if a line represents a table row""" - return bool(re.match(r'^\s*[^:]+:\s*[^:]+$', line) or - re.match(r'^\s*[^\t]+\t[^\t]+$', line)) - - def _extract_tables_from_text(self, content: str) -> Tuple[List[TableData], List[PlainText]]: - """ - Extract tables and plain text from content - - Args: - content: Content to process - - Returns: - Tuple of (list of tables, list of plain text sections) - """ - tables = [] - plain_texts = [] - - # Process the entire content as plain text - plain_texts.append(PlainText(content=content, source_type='text_plain')) - - return tables, plain_texts - - def _anonymize_table(self, table: TableData) -> TableData: - """Anonymize table data""" - try: - anonymized_table = TableData( - headers=table.headers.copy(), - rows=[row.copy() for row in table.rows], - source_type=table.source_type - ) - - for i, header in enumerate(anonymized_table.headers): - pattern = get_pattern_for_header(header, self.header_patterns) - if pattern: - for row in anonymized_table.rows: - if row[i] is not None: - original = str(row[i]) - if original not in self.mapping: - self.mapping[original] = pattern.replacement_template.format(len(self.mapping) + 1) - row[i] = self.mapping[original] - - return anonymized_table - - except Exception as e: - logger.error(f"Error anonymizing table: {str(e)}") - raise - - def _anonymize_plain_text(self, text: PlainText) -> PlainText: - """Anonymize plain text content using simple search-and-replace approach""" - try: - current_text = text.content - - # Step 1: Replace custom names first (simple regex search-and-replace) - for name in self.names_to_parse: - if not name.strip(): - continue - - # Create case-insensitive regex pattern with word boundaries - pattern = re.compile(r'\b' + re.escape(name.strip()) + r'\b', re.IGNORECASE) - - # Find all matches for this name - matches = list(pattern.finditer(current_text)) - - # Replace each match with a placeholder - for match in reversed(matches): # Process from right to left to avoid position shifts - matched_text = match.group() - if matched_text not in self.mapping: - # Generate a UUID for the placeholder - import uuid - placeholder_id = str(uuid.uuid4()) - self.mapping[matched_text] = f"[name.{placeholder_id}]" - - replacement = self.mapping[matched_text] - start, end = match.span() - current_text = current_text[:start] + replacement + current_text[end:] - - # Step 2: Replace pattern-based matches (emails, phones, etc.) - # Use the same simple approach for patterns - pattern_matches = find_patterns_in_text(current_text, self.data_patterns) - - # Process pattern matches from right to left to avoid position shifts - for pattern_name, matched_text, start, end in reversed(pattern_matches): - # Skip if already a placeholder - if re.match(r'\[[a-z]+\.[a-f0-9-]+\]', matched_text): - continue - - # Skip if contains placeholder characters - if '[' in matched_text or ']' in matched_text: - continue - - if matched_text not in self.mapping: - # Generate a UUID for the placeholder - import uuid - placeholder_id = str(uuid.uuid4()) - # Create placeholder in format [type.uuid] - type_mapping = { - 'email': 'email', - 'phone': 'phone', - 'address': 'address', - 'id': 'id' - } - placeholder_type = type_mapping.get(pattern_name, 'data') - self.mapping[matched_text] = f"[{placeholder_type}.{placeholder_id}]" - - replacement = self.mapping[matched_text] - current_text = current_text[:start] + replacement + current_text[end:] - - return PlainText(content=current_text, source_type=text.source_type) - - except Exception as e: - logger.error(f"Error anonymizing plain text: {str(e)}") - raise - - def _anonymize_json_value(self, value: Any, key: str = None) -> Any: - """ - Recursively anonymize JSON values based on their keys and content - - Args: - value: Value to anonymize - key: Key name (if part of a key-value pair) - - Returns: - Anonymized value - """ - if isinstance(value, dict): - return {k: self._anonymize_json_value(v, k) for k, v in value.items()} - elif isinstance(value, list): - return [self._anonymize_json_value(item) for item in value] - elif isinstance(value, str): - # Check if this is a key we should process - if key: - pattern = get_pattern_for_header(key, self.header_patterns) - if pattern: - if value not in self.mapping: - # Generate a UUID for the placeholder - import uuid - placeholder_id = str(uuid.uuid4()) - # Create placeholder in format [type.uuid] - type_mapping = { - 'email': 'email', - 'phone': 'phone', - 'name': 'name', - 'address': 'address', - 'id': 'id' - } - placeholder_type = type_mapping.get(pattern.name, 'data') - self.mapping[value] = f"[{placeholder_type}.{placeholder_id}]" - return self.mapping[value] - - # Check if the value itself matches any patterns - pattern_matches = find_patterns_in_text(value, self.data_patterns) - custom_name_matches = self._find_custom_names(value) - - if pattern_matches or custom_name_matches: - # Use the first match's pattern or custom name - if pattern_matches: - pattern_name = pattern_matches[0][0] - if value not in self.mapping: - # Generate a UUID for the placeholder - import uuid - placeholder_id = str(uuid.uuid4()) - # Create placeholder in format [type.uuid] - type_mapping = { - 'email': 'email', - 'phone': 'phone', - 'name': 'name', - 'address': 'address', - 'id': 'id' - } - placeholder_type = type_mapping.get(pattern_name, 'data') - self.mapping[value] = f"[{placeholder_type}.{placeholder_id}]" - elif custom_name_matches: - if value not in self.mapping: - # Generate a UUID for the placeholder - import uuid - placeholder_id = str(uuid.uuid4()) - self.mapping[value] = f"[name.{placeholder_id}]" - return self.mapping[value] - - return value - else: - return value - - def _anonymize_xml_element(self, element: ET.Element, indent: str = '') -> str: - """ - Recursively process XML element and return formatted string - - Args: - element: XML element to process - indent: Current indentation level - - Returns: - Formatted XML string - """ - # Process attributes - processed_attrs = {} - for attr_name, attr_value in element.attrib.items(): - # Check if attribute name matches any header patterns - pattern = get_pattern_for_header(attr_name, self.header_patterns) - if pattern: - if attr_value not in self.mapping: - # Generate a UUID for the placeholder - import uuid - placeholder_id = str(uuid.uuid4()) - # Create placeholder in format [type.uuid] - type_mapping = { - 'email': 'email', - 'phone': 'phone', - 'name': 'name', - 'address': 'address', - 'id': 'id' - } - placeholder_type = type_mapping.get(pattern.name, 'data') - self.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]" - processed_attrs[attr_name] = self.mapping[attr_value] - else: - # Check if attribute value matches any data patterns - matches = find_patterns_in_text(attr_value, self.data_patterns) - if matches: - pattern_name = matches[0][0] - pattern = next((p for p in self.data_patterns if p.name == pattern_name), None) - if pattern: - if attr_value not in self.mapping: - # Generate a UUID for the placeholder - import uuid - placeholder_id = str(uuid.uuid4()) - # Create placeholder in format [type.uuid] - type_mapping = { - 'email': 'email', - 'phone': 'phone', - 'name': 'name', - 'address': 'address', - 'id': 'id' - } - placeholder_type = type_mapping.get(pattern_name, 'data') - self.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]" - processed_attrs[attr_name] = self.mapping[attr_value] - else: - processed_attrs[attr_name] = attr_value - else: - processed_attrs[attr_name] = attr_value - - attrs = ' '.join(f'{k}="{v}"' for k, v in processed_attrs.items()) - attrs = f' {attrs}' if attrs else '' - - # Process text content - text = element.text.strip() if element.text and element.text.strip() else '' - if text: - # Check if text matches any patterns or custom names - pattern_matches = find_patterns_in_text(text, self.data_patterns) - custom_name_matches = self._find_custom_names(text) - - if pattern_matches or custom_name_matches: - if pattern_matches: - pattern_name = pattern_matches[0][0] - pattern = next((p for p in self.data_patterns if p.name == pattern_name), None) - if pattern: - if text not in self.mapping: - # Generate a UUID for the placeholder - import uuid - placeholder_id = str(uuid.uuid4()) - # Create placeholder in format [type.uuid] - type_mapping = { - 'email': 'email', - 'phone': 'phone', - 'name': 'name', - 'address': 'address', - 'id': 'id' - } - placeholder_type = type_mapping.get(pattern_name, 'data') - self.mapping[text] = f"[{placeholder_type}.{placeholder_id}]" - text = self.mapping[text] - elif custom_name_matches: - if text not in self.mapping: - # Generate a UUID for the placeholder - import uuid - placeholder_id = str(uuid.uuid4()) - self.mapping[text] = f"[name.{placeholder_id}]" - text = self.mapping[text] - - # Process child elements - children = [] - for child in element: - child_str = self._anonymize_xml_element(child, indent + ' ') - children.append(child_str) - - # Build element string - if not children and not text: - return f"{indent}<{element.tag}{attrs}/>" - elif not children: - return f"{indent}<{element.tag}{attrs}>{text}" - else: - result = [f"{indent}<{element.tag}{attrs}>"] - if text: - result.append(f"{indent} {text}") - result.extend(children) - result.append(f"{indent}") - return '\n'.join(result) - - def process_content(self, content: str, content_type: str) -> ProcessResult: + def process_content(self, content: str, content_type: str = None) -> ProcessResult: """ Process content and return anonymized data Args: content: Content to process - content_type: Type of content ('csv', 'json', 'xml', 'text') + content_type: Type of content ('csv', 'json', 'xml', 'text', 'binary') + If None, will auto-detect Returns: ProcessResult: Contains anonymized data, mapping, replaced fields and processing info """ try: + # Auto-detect content type if not provided + if content_type is None: + content_type = self.common_utils.detect_content_type(content) # Check if content is binary data - is_binary = False - try: - # First, check if content looks like base64 (contains only base64 characters) - if re.match(r'^[A-Za-z0-9+/]*={0,2}$', content.strip()): - # Try to decode base64 if it looks like base64 - try: - decoded = base64.b64decode(content) - # If it's not valid text, consider it binary - decoded.decode('utf-8') - is_binary = True - except (base64.binascii.Error, UnicodeDecodeError): - is_binary = False - else: - is_binary = False - except Exception as e: - is_binary = False - - if is_binary: - # TODO: Implement binary data neutralization - # This would require: - # 1. Detecting binary data types (images, audio, video, etc.) - # 2. Implementing specific neutralization for each type - # 3. Handling metadata and embedded content - # 4. Preserving binary integrity while removing sensitive data - return ProcessResult(content, self.mapping, [], {'type': 'binary', 'status': 'not_implemented'}) - - replaced_fields = [] - processed_info = {} + if self.binary_processor.is_binary_content(content): + return self.binary_processor.process_binary_content(content) + # Route to appropriate processor based on content type if content_type in ['csv', 'json', 'xml']: - # Handle as table if content_type == 'csv': - df = pd.read_csv(StringIO(content), encoding='utf-8') - table = TableData( - headers=df.columns.tolist(), - rows=df.values.tolist(), - source_type='csv' - ) - processed_info['type'] = 'table' - processed_info['headers'] = table.headers - processed_info['row_count'] = len(table.rows) + result, mapping, replaced_fields, processed_info = self.list_processor.process_csv_content(content) elif content_type == 'json': - data = json.loads(content) - # Process JSON recursively - result = self._anonymize_json_value(data) - processed_info['type'] = 'json' - return ProcessResult(result, self.mapping, replaced_fields, processed_info) + result, mapping, replaced_fields, processed_info = self.list_processor.process_json_content(content) else: # xml - root = ET.fromstring(content) - # Process XML recursively with proper formatting - result = self._anonymize_xml_element(root) - processed_info['type'] = 'xml' - return ProcessResult(result, self.mapping, replaced_fields, processed_info) + result, mapping, replaced_fields, processed_info = self.list_processor.process_xml_content(content) - if not table.rows: - return ProcessResult(None, self.mapping, [], processed_info) - - anonymized_table = self._anonymize_table(table) - - # Track replaced fields - for i, header in enumerate(anonymized_table.headers): - for orig_row, anon_row in zip(table.rows, anonymized_table.rows): - if anon_row[i] != orig_row[i]: - replaced_fields.append(header) - - # Convert back to original format - if content_type == 'csv': - result = pd.DataFrame(anonymized_table.rows, columns=anonymized_table.headers) - elif content_type == 'json': - if len(anonymized_table.headers) == 1 and anonymized_table.headers[0] == 'value': - result = anonymized_table.rows[0][0] - else: - result = dict(zip(anonymized_table.headers, anonymized_table.rows[0])) - else: # xml - result = ET.tostring(root, encoding='unicode') - - return ProcessResult(result, self.mapping, replaced_fields, processed_info) + return ProcessResult(result, mapping, replaced_fields, processed_info) else: # Handle as text - # First, identify what needs to be replaced using table detection - tables, plain_texts = self._extract_tables_from_text(content) - processed_info['type'] = 'text' - processed_info['tables'] = [{'headers': t.headers, 'row_count': len(t.rows)} for t in tables] - - # Process plain text sections - anonymized_texts = [self._anonymize_plain_text(text) for text in plain_texts] - - # Combine all processed content - result = content - for i, (text, anonymized_text) in enumerate(zip(plain_texts, anonymized_texts)): - if text.content != anonymized_text.content: - result = result.replace(text.content, anonymized_text.content) - - return ProcessResult(result, self.mapping, replaced_fields, processed_info) + result, mapping, replaced_fields, processed_info = self.text_processor.process_text_content(content) + return ProcessResult(result, mapping, replaced_fields, processed_info) except Exception as e: logger.error(f"Error processing content: {str(e)}") - return ProcessResult(None, self.mapping, [], {'type': 'error', 'error': str(e)}) \ No newline at end of file + return ProcessResult(None, {}, [], {'type': 'error', 'error': str(e)}) + + def get_mapping(self) -> Dict[str, str]: + """ + Get the combined mapping from all processors + + Returns: + Dict[str, str]: Combined mapping dictionary + """ + text_mapping = self.text_processor.get_mapping() + list_mapping = self.list_processor.get_mapping() + return self.common_utils.merge_mappings(text_mapping, list_mapping) + + def clear_mapping(self): + """Clear the mapping in all processors""" + self.text_processor.clear_mapping() + self.list_processor.clear_mapping() \ No newline at end of file diff --git a/modules/neutralizer/readme.md b/modules/neutralizer/readme.md new file mode 100644 index 00000000..20d00816 --- /dev/null +++ b/modules/neutralizer/readme.md @@ -0,0 +1,91 @@ +# Neutralizer Module Structure + +This module provides DSGVO-compliant data anonymization for AI agent systems. The code has been refactored into specialized sub-modules for better maintainability and code reuse. + +## Module Overview + +### Core Module +- **`neutralizer.py`** - Main DataAnonymizer class that orchestrates all processing + +### Specialized Processors +- **`subProcessText.py`** - Handles plain text processing without header information +- **`subProcessList.py`** - Handles structured data with headers (CSV, JSON, XML) +- **`subProcessBinary.py`** - Handles binary data types (images, audio, video, etc.) + +### Utility Modules +- **`subParseString.py`** - String parsing and replacement utilities for emails, phones, addresses, IDs and names +- **`subProcessCommon.py`** - Common utilities and data structures shared across modules +- **`patterns.py`** - Pattern definitions for data anonymization + +## Key Features + +### 1. Modular Architecture +- **Separation of Concerns**: Each module handles a specific type of data processing +- **Code Reuse**: Common functionality is centralized in utility modules +- **Maintainability**: Easier to modify and extend individual components + +### 2. Processing Order +1. **Pattern-based matches** (emails, phones, addresses, etc.) are processed FIRST +2. **Custom names** from the user list are processed SECOND +3. **Already anonymized content** (placeholders) is skipped + +### 3. Supported Data Types +- **Text**: Plain text documents, emails, etc. +- **Structured Data**: CSV, JSON, XML with headers +- **Binary Data**: Images, audio, video (framework ready, implementation pending) + +### 4. Placeholder Protection +- Prevents re-anonymization of already processed content +- Uses format `[tag.uuid]` for placeholders +- Validates placeholder format before processing + +## Usage Example + +```python +from modules.neutralizer import DataAnonymizer + +# Initialize with custom names +anonymizer = DataAnonymizer(names_to_parse=['John Doe', 'Jane Smith']) + +# Process content (auto-detects type) +result = anonymizer.process_content(content, content_type='text') + +# Or specify content type explicitly +result = anonymizer.process_content(content, content_type='csv') + +# Get mapping of original values to placeholders +mapping = anonymizer.get_mapping() +``` + +## Module Dependencies + +``` +neutralizer.py +├── subProcessCommon.py (ProcessResult, CommonUtils) +├── subProcessText.py (TextProcessor) +├── subProcessList.py (ListProcessor) +├── subProcessBinary.py (BinaryProcessor) +└── patterns.py (Pattern definitions) + +subProcessText.py +└── subParseString.py (StringParser) + +subProcessList.py +├── subParseString.py (StringParser) +└── patterns.py (HeaderPatterns) + +subProcessBinary.py +└── (standalone) + +subParseString.py +└── patterns.py (DataPatterns) +``` + +## Benefits of New Structure + +1. **Single Responsibility**: Each module has one clear purpose +2. **DRY Principle**: No code duplication across modules +3. **Testability**: Individual modules can be tested in isolation +4. **Extensibility**: Easy to add new data types or processing methods +5. **Maintainability**: Changes to one module don't affect others +6. **Performance**: Specialized processors are optimized for their data types diff --git a/modules/neutralizer/subParseString.py b/modules/neutralizer/subParseString.py new file mode 100644 index 00000000..a2b39333 --- /dev/null +++ b/modules/neutralizer/subParseString.py @@ -0,0 +1,162 @@ +""" +String parsing and replacement utilities for data anonymization +Handles pattern matching and replacement for emails, phones, addresses, IDs and names +""" + +import re +import uuid +from typing import Dict, List, Tuple, Any +from modules.neutralizer.subPatterns import DataPatterns, find_patterns_in_text + +class StringParser: + """Handles string parsing and replacement operations""" + + def __init__(self, names_to_parse: List[str] = None): + """ + Initialize the string parser + + Args: + names_to_parse: List of names to parse and replace (case-insensitive) + """ + self.data_patterns = DataPatterns.patterns + self.names_to_parse = names_to_parse or [] + self.mapping = {} + + def is_placeholder(self, text: str) -> bool: + """ + Check if text is already a placeholder in format [tag.uuid] + + Args: + text: Text to check + + Returns: + bool: True if text is a placeholder + """ + return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', text)) + + def replace_pattern_matches(self, text: str) -> str: + """ + Replace pattern-based matches (emails, phones, etc.) in text + + Args: + text: Text to process + + Returns: + str: Text with pattern matches replaced + """ + pattern_matches = find_patterns_in_text(text, self.data_patterns) + + # Process pattern matches from right to left to avoid position shifts + for pattern_name, matched_text, start, end in reversed(pattern_matches): + # Skip if already a placeholder + if self.is_placeholder(matched_text): + continue + + # Skip if contains placeholder characters + if '[' in matched_text or ']' in matched_text: + continue + + if matched_text not in self.mapping: + # Generate a UUID for the placeholder + placeholder_id = str(uuid.uuid4()) + # Create placeholder in format [type.uuid] + type_mapping = { + 'email': 'email', + 'phone': 'phone', + 'address': 'address', + 'id': 'id' + } + placeholder_type = type_mapping.get(pattern_name, 'data') + self.mapping[matched_text] = f"[{placeholder_type}.{placeholder_id}]" + + replacement = self.mapping[matched_text] + text = text[:start] + replacement + text[end:] + + return text + + def replace_custom_names(self, text: str) -> str: + """ + Replace custom names from the user list in text + + Args: + text: Text to process + + Returns: + str: Text with custom names replaced + """ + for name in self.names_to_parse: + if not name.strip(): + continue + + # Create case-insensitive regex pattern with word boundaries + pattern = re.compile(r'\b' + re.escape(name.strip()) + r'\b', re.IGNORECASE) + + # Find all matches for this name + matches = list(pattern.finditer(text)) + + # Replace each match with a placeholder + for match in reversed(matches): # Process from right to left to avoid position shifts + matched_text = match.group() + if matched_text not in self.mapping: + # Generate a UUID for the placeholder + placeholder_id = str(uuid.uuid4()) + self.mapping[matched_text] = f"[name.{placeholder_id}]" + + replacement = self.mapping[matched_text] + start, end = match.span() + text = text[:start] + replacement + text[end:] + + return text + + def process_string(self, text: str) -> str: + """ + Process a string by replacing patterns first, then custom names + + Args: + text: Text to process + + Returns: + str: Processed text with replacements + """ + if self.is_placeholder(text): + return text + + # Step 1: Replace pattern-based matches FIRST + text = self.replace_pattern_matches(text) + + # Step 2: Replace custom names SECOND + text = self.replace_custom_names(text) + + return text + + def process_json_value(self, value: Any) -> Any: + """ + Process a JSON value for anonymization + + Args: + value: Value to process + + Returns: + Any: Processed value + """ + if isinstance(value, str): + return self.process_string(value) + elif isinstance(value, dict): + return {k: self.process_json_value(v) for k, v in value.items()} + elif isinstance(value, list): + return [self.process_json_value(item) for item in value] + else: + return value + + def get_mapping(self) -> Dict[str, str]: + """ + Get the current mapping of original values to placeholders + + Returns: + Dict[str, str]: Mapping dictionary + """ + return self.mapping.copy() + + def clear_mapping(self): + """Clear the current mapping""" + self.mapping.clear() diff --git a/modules/neutralizer/patterns.py b/modules/neutralizer/subPatterns.py similarity index 100% rename from modules/neutralizer/patterns.py rename to modules/neutralizer/subPatterns.py diff --git a/modules/neutralizer/subProcessBinary.py b/modules/neutralizer/subProcessBinary.py new file mode 100644 index 00000000..67c73bc1 --- /dev/null +++ b/modules/neutralizer/subProcessBinary.py @@ -0,0 +1,101 @@ +""" +Binary data processing module for data anonymization +Handles binary data types (images, audio, video, etc.) +""" + +import base64 +import re +from typing import Dict, Any, Tuple +from dataclasses import dataclass + +@dataclass +class BinaryData: + """Repräsentiert Binärdaten""" + content: str + data_type: str # 'image', 'audio', 'video', 'document', 'unknown' + encoding: str # 'base64', 'hex', 'raw' + +class BinaryProcessor: + """Handles binary data processing for anonymization""" + + def __init__(self): + """Initialize the binary processor""" + self.supported_types = { + 'image': ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.webp'], + 'audio': ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a'], + 'video': ['.mp4', '.avi', '.mov', '.wmv', '.flv', '.mkv', '.webm'], + 'document': ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx'] + } + + def detect_binary_type(self, content: str) -> str: + """ + Detect if content is binary data and determine type + + Args: + content: Content to analyze + + Returns: + str: Binary type or 'text' if not binary + """ + # Check if content looks like base64 + if re.match(r'^[A-Za-z0-9+/]*={0,2}$', content.strip()): + try: + decoded = base64.b64decode(content) + # Try to decode as text + decoded.decode('utf-8') + return 'text' # It's base64 encoded text + except (base64.binascii.Error, UnicodeDecodeError): + # It's binary data + return 'binary' + + # Check for binary patterns + if len(content) > 100 and '\x00' in content: + return 'binary' + + return 'text' + + def is_binary_content(self, content: str) -> bool: + """ + Check if content is binary data + + Args: + content: Content to check + + Returns: + bool: True if content is binary + """ + return self.detect_binary_type(content) == 'binary' + + def process_binary_content(self, content: str) -> Tuple[Any, Dict[str, str], list, Dict[str, Any]]: + """ + Process binary content for anonymization + + Args: + content: Binary content to process + + Returns: + Tuple of (processed_data, mapping, replaced_fields, processed_info) + """ + # TODO: Implement binary data neutralization + # This would require: + # 1. Detecting binary data types (images, audio, video, etc.) + # 2. Implementing specific neutralization for each type + # 3. Handling metadata and embedded content + # 4. Preserving binary integrity while removing sensitive data + + processed_info = { + 'type': 'binary', + 'status': 'not_implemented', + 'message': 'Binary data neutralization not yet implemented' + } + + return content, {}, [], processed_info + + def get_supported_types(self) -> Dict[str, list]: + """ + Get list of supported binary file types + + Returns: + Dict[str, list]: Dictionary of supported types and their extensions + """ + return self.supported_types.copy() diff --git a/modules/neutralizer/subProcessCommon.py b/modules/neutralizer/subProcessCommon.py new file mode 100644 index 00000000..6ffc2166 --- /dev/null +++ b/modules/neutralizer/subProcessCommon.py @@ -0,0 +1,143 @@ +""" +Common processing utilities for data anonymization +Shared functions and data structures +""" + +import re +from typing import Dict, List, Any, Union, Optional +from dataclasses import dataclass + +@dataclass +class ProcessResult: + """Result of content processing""" + data: Any + mapping: Dict[str, str] + replaced_fields: List[str] + processed_info: Dict[str, Any] # Additional processing information + +class CommonUtils: + """Common utility functions for data processing""" + + @staticmethod + def normalize_whitespace(text: str) -> str: + """ + Normalize whitespace in text + + Args: + text: Text to normalize + + Returns: + str: Normalized text + """ + text = re.sub(r'\s+', ' ', text) + text = text.replace('\r\n', '\n').replace('\r', '\n') + return text.strip() + + @staticmethod + def is_table_line(line: str) -> bool: + """ + Check if a line represents a table row + + Args: + line: Line to check + + Returns: + bool: True if line is a table row + """ + return bool(re.match(r'^\s*[^:]+:\s*[^:]+$', line) or + re.match(r'^\s*[^\t]+\t[^\t]+$', line)) + + @staticmethod + def detect_content_type(content: str) -> str: + """ + Detect the type of content based on its structure + + Args: + content: Content to analyze + + Returns: + str: Content type ('csv', 'json', 'xml', 'text', 'binary') + """ + content = content.strip() + + # Check for JSON + if content.startswith('{') and content.endswith('}'): + return 'json' + if content.startswith('[') and content.endswith(']'): + return 'json' + + # Check for XML + if content.startswith('<') and content.endswith('>'): + return 'xml' + + # Check for CSV (has commas and newlines) + if ',' in content and '\n' in content: + lines = content.split('\n') + if len(lines) > 1 and all(',' in line for line in lines[:3]): + return 'csv' + + # Check for binary + if len(content) > 100 and '\x00' in content: + return 'binary' + + # Default to text + return 'text' + + @staticmethod + def merge_mappings(*mappings: Dict[str, str]) -> Dict[str, str]: + """ + Merge multiple mapping dictionaries + + Args: + *mappings: Mapping dictionaries to merge + + Returns: + Dict[str, str]: Merged mapping dictionary + """ + merged = {} + for mapping in mappings: + merged.update(mapping) + return merged + + @staticmethod + def create_placeholder(placeholder_type: str, placeholder_id: str) -> str: + """ + Create a placeholder string in the format [type.uuid] + + Args: + placeholder_type: Type of placeholder (email, phone, name, etc.) + placeholder_id: Unique identifier for the placeholder + + Returns: + str: Formatted placeholder string + """ + return f"[{placeholder_type}.{placeholder_id}]" + + @staticmethod + def validate_placeholder(placeholder: str) -> bool: + """ + Validate if a string is a valid placeholder + + Args: + placeholder: String to validate + + Returns: + bool: True if valid placeholder + """ + return bool(re.match(r'^\[[a-z]+\.[a-f0-9-]+\]$', placeholder)) + + @staticmethod + def extract_placeholder_info(placeholder: str) -> Optional[tuple]: + """ + Extract type and ID from a placeholder + + Args: + placeholder: Placeholder string + + Returns: + Optional[tuple]: (type, id) or None if invalid + """ + match = re.match(r'^\[([a-z]+)\.([a-f0-9-]+)\]$', placeholder) + if match: + return match.group(1), match.group(2) + return None diff --git a/modules/neutralizer/subProcessList.py b/modules/neutralizer/subProcessList.py new file mode 100644 index 00000000..58981333 --- /dev/null +++ b/modules/neutralizer/subProcessList.py @@ -0,0 +1,279 @@ +""" +List processing module for data anonymization +Handles structured data with headers (CSV, JSON, XML) +""" + +import json +import pandas as pd +import xml.etree.ElementTree as ET +from typing import Dict, List, Any, Union +from dataclasses import dataclass +from io import StringIO +from modules.neutralizer.subParseString import StringParser +from modules.neutralizer.subPatterns import get_pattern_for_header, HeaderPatterns + +@dataclass +class TableData: + """Repräsentiert Tabellendaten""" + headers: List[str] + rows: List[List[str]] + source_type: str # 'csv', 'json', 'xml', 'text_table' + +class ListProcessor: + """Handles structured data processing with headers for anonymization""" + + def __init__(self, names_to_parse: List[str] = None): + """ + Initialize the list processor + + Args: + names_to_parse: List of names to parse and replace + """ + self.string_parser = StringParser(names_to_parse) + self.header_patterns = HeaderPatterns.patterns + + def anonymize_table(self, table: TableData) -> TableData: + """ + Anonymize table data based on headers + + Args: + table: TableData object to anonymize + + Returns: + TableData: Anonymized table + """ + anonymized_table = TableData( + headers=table.headers.copy(), + rows=[row.copy() for row in table.rows], + source_type=table.source_type + ) + + for i, header in enumerate(anonymized_table.headers): + pattern = get_pattern_for_header(header, self.header_patterns) + if pattern: + for row in anonymized_table.rows: + if row[i] is not None: + original = str(row[i]) + if original not in self.string_parser.mapping: + # Generate a UUID for the placeholder + import uuid + placeholder_id = str(uuid.uuid4()) + self.string_parser.mapping[original] = pattern.replacement_template.format(len(self.string_parser.mapping) + 1) + row[i] = self.string_parser.mapping[original] + + return anonymized_table + + def process_csv_content(self, content: str) -> tuple: + """ + Process CSV content + + Args: + content: CSV content to process + + Returns: + Tuple of (processed_data, mapping, replaced_fields, processed_info) + """ + df = pd.read_csv(StringIO(content), encoding='utf-8') + table = TableData( + headers=df.columns.tolist(), + rows=df.values.tolist(), + source_type='csv' + ) + + if not table.rows: + return None, self.string_parser.get_mapping(), [], {'type': 'table', 'headers': table.headers, 'row_count': 0} + + anonymized_table = self.anonymize_table(table) + + # Track replaced fields + replaced_fields = [] + for i, header in enumerate(anonymized_table.headers): + for orig_row, anon_row in zip(table.rows, anonymized_table.rows): + if anon_row[i] != orig_row[i]: + replaced_fields.append(header) + + # Convert back to DataFrame + result = pd.DataFrame(anonymized_table.rows, columns=anonymized_table.headers) + + processed_info = { + 'type': 'table', + 'headers': table.headers, + 'row_count': len(table.rows) + } + + return result, self.string_parser.get_mapping(), replaced_fields, processed_info + + def process_json_content(self, content: str) -> tuple: + """ + Process JSON content + + Args: + content: JSON content to process + + Returns: + Tuple of (processed_data, mapping, replaced_fields, processed_info) + """ + data = json.loads(content) + + # Process JSON recursively using string parser + result = self.string_parser.process_json_value(data) + + processed_info = {'type': 'json'} + + return result, self.string_parser.get_mapping(), [], processed_info + + def anonymize_xml_element(self, element: ET.Element, indent: str = '') -> str: + """ + Recursively process XML element and return formatted string + + Args: + element: XML element to process + indent: Current indentation level + + Returns: + Formatted XML string + """ + # Process attributes + processed_attrs = {} + for attr_name, attr_value in element.attrib.items(): + # Check if attribute name matches any header patterns + pattern = get_pattern_for_header(attr_name, self.header_patterns) + if pattern: + if attr_value not in self.string_parser.mapping: + # Generate a UUID for the placeholder + import uuid + placeholder_id = str(uuid.uuid4()) + # Create placeholder in format [type.uuid] + type_mapping = { + 'email': 'email', + 'phone': 'phone', + 'name': 'name', + 'address': 'address', + 'id': 'id' + } + placeholder_type = type_mapping.get(pattern.name, 'data') + self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]" + processed_attrs[attr_name] = self.string_parser.mapping[attr_value] + else: + # Check if attribute value matches any data patterns + from modules.neutralizer.subPatterns import find_patterns_in_text, DataPatterns + matches = find_patterns_in_text(attr_value, DataPatterns.patterns) + if matches: + pattern_name = matches[0][0] + pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None) + if pattern: + if attr_value not in self.string_parser.mapping: + # Generate a UUID for the placeholder + import uuid + placeholder_id = str(uuid.uuid4()) + # Create placeholder in format [type.uuid] + type_mapping = { + 'email': 'email', + 'phone': 'phone', + 'name': 'name', + 'address': 'address', + 'id': 'id' + } + placeholder_type = type_mapping.get(pattern_name, 'data') + self.string_parser.mapping[attr_value] = f"[{placeholder_type}.{placeholder_id}]" + processed_attrs[attr_name] = self.string_parser.mapping[attr_value] + else: + processed_attrs[attr_name] = attr_value + else: + processed_attrs[attr_name] = attr_value + + attrs = ' '.join(f'{k}="{v}"' for k, v in processed_attrs.items()) + attrs = f' {attrs}' if attrs else '' + + # Process text content + text = element.text.strip() if element.text and element.text.strip() else '' + if text: + # Skip if already a placeholder + if not self.string_parser.is_placeholder(text): + # Check if text matches any patterns + from modules.neutralizer.subPatterns import find_patterns_in_text, DataPatterns + pattern_matches = find_patterns_in_text(text, DataPatterns.patterns) + + if pattern_matches: + pattern_name = pattern_matches[0][0] + pattern = next((p for p in DataPatterns.patterns if p.name == pattern_name), None) + if pattern: + if text not in self.string_parser.mapping: + # Generate a UUID for the placeholder + import uuid + placeholder_id = str(uuid.uuid4()) + # Create placeholder in format [type.uuid] + type_mapping = { + 'email': 'email', + 'phone': 'phone', + 'name': 'name', + 'address': 'address', + 'id': 'id' + } + placeholder_type = type_mapping.get(pattern_name, 'data') + self.string_parser.mapping[text] = f"[{placeholder_type}.{placeholder_id}]" + text = self.string_parser.mapping[text] + else: + # Check if text matches any custom names from the user list + for name in self.string_parser.names_to_parse: + if not name.strip(): + continue + if text.lower().strip() == name.lower().strip(): + if text not in self.string_parser.mapping: + # Generate a UUID for the placeholder + import uuid + placeholder_id = str(uuid.uuid4()) + self.string_parser.mapping[text] = f"[name.{placeholder_id}]" + text = self.string_parser.mapping[text] + break + + # Process child elements + children = [] + for child in element: + child_str = self.anonymize_xml_element(child, indent + ' ') + children.append(child_str) + + # Build element string + if not children and not text: + return f"{indent}<{element.tag}{attrs}/>" + elif not children: + return f"{indent}<{element.tag}{attrs}>{text}" + else: + result = [f"{indent}<{element.tag}{attrs}>"] + if text: + result.append(f"{indent} {text}") + result.extend(children) + result.append(f"{indent}") + return '\n'.join(result) + + def process_xml_content(self, content: str) -> tuple: + """ + Process XML content + + Args: + content: XML content to process + + Returns: + Tuple of (processed_data, mapping, replaced_fields, processed_info) + """ + root = ET.fromstring(content) + + # Process XML recursively with proper formatting + result = self.anonymize_xml_element(root) + + processed_info = {'type': 'xml'} + + return result, self.string_parser.get_mapping(), [], processed_info + + def get_mapping(self) -> Dict[str, str]: + """ + Get the current mapping of original values to placeholders + + Returns: + Dict[str, str]: Mapping dictionary + """ + return self.string_parser.get_mapping() + + def clear_mapping(self): + """Clear the current mapping""" + self.string_parser.clear_mapping() diff --git a/modules/neutralizer/subProcessText.py b/modules/neutralizer/subProcessText.py new file mode 100644 index 00000000..c9ad872f --- /dev/null +++ b/modules/neutralizer/subProcessText.py @@ -0,0 +1,101 @@ +""" +Text processing module for data anonymization +Handles plain text processing without header information +""" + +from typing import Dict, List, Any +from dataclasses import dataclass +from modules.neutralizer.subParseString import StringParser + +@dataclass +class PlainText: + """Repräsentiert normalen Text""" + content: str + source_type: str # 'txt', 'docx', 'text_plain' + +class TextProcessor: + """Handles plain text processing for anonymization""" + + def __init__(self, names_to_parse: List[str] = None): + """ + Initialize the text processor + + Args: + names_to_parse: List of names to parse and replace + """ + self.string_parser = StringParser(names_to_parse) + + def extract_tables_from_text(self, content: str) -> tuple: + """ + Extract tables and plain text from content + + Args: + content: Content to process + + Returns: + Tuple of (list of tables, list of plain text sections) + """ + # For now, process the entire content as plain text + # This can be extended later to detect table-like structures + tables = [] + plain_texts = [PlainText(content=content, source_type='text_plain')] + + return tables, plain_texts + + def anonymize_plain_text(self, text: PlainText) -> PlainText: + """ + Anonymize plain text content + + Args: + text: PlainText object to anonymize + + Returns: + PlainText: Anonymized text + """ + # Use the string parser to process the content + anonymized_content = self.string_parser.process_string(text.content) + + return PlainText(content=anonymized_content, source_type=text.source_type) + + def process_text_content(self, content: str) -> tuple: + """ + Process text content and return anonymized data + + Args: + content: Text content to process + + Returns: + Tuple of (anonymized_content, mapping, replaced_fields, processed_info) + """ + # Extract tables and plain text sections + tables, plain_texts = self.extract_tables_from_text(content) + + # Process plain text sections + anonymized_texts = [self.anonymize_plain_text(text) for text in plain_texts] + + # Combine all processed content + result = content + for text, anonymized_text in zip(plain_texts, anonymized_texts): + if text.content != anonymized_text.content: + result = result.replace(text.content, anonymized_text.content) + + # Get processing information + processed_info = { + 'type': 'text', + 'tables': [{'headers': t.headers, 'row_count': len(t.rows)} for t in tables] if hasattr(tables[0], 'headers') else [] + } + + return result, self.string_parser.get_mapping(), [], processed_info + + def get_mapping(self) -> Dict[str, str]: + """ + Get the current mapping of original values to placeholders + + Returns: + Dict[str, str]: Mapping dictionary + """ + return self.string_parser.get_mapping() + + def clear_mapping(self): + """Clear the current mapping""" + self.string_parser.clear_mapping() diff --git a/modules/routes/routeDataFiles.py b/modules/routes/routeDataFiles.py index 3243bc21..f0feef25 100644 --- a/modules/routes/routeDataFiles.py +++ b/modules/routes/routeDataFiles.py @@ -18,7 +18,7 @@ import modules.interfaces.interfaceComponentObjects as interfaceComponentObjects from modules.interfaces.interfaceComponentModel import FileItem, FilePreview from modules.shared.attributeUtils import getModelAttributeDefinitions, AttributeResponse, AttributeDefinition from modules.interfaces.interfaceAppModel import User, DataNeutraliserConfig, DataNeutralizerAttributes -from modules.services.serviceNeutralization import NeutralizationService +from modules.features.featureNeutralizePlayground import NeutralizationService # Configure logger logger = logging.getLogger(__name__) diff --git a/modules/routes/routeSecurityGoogle.py b/modules/routes/routeSecurityGoogle.py index 2967e1fc..9cca2b3a 100644 --- a/modules/routes/routeSecurityGoogle.py +++ b/modules/routes/routeSecurityGoogle.py @@ -590,6 +590,20 @@ async def logout( try: appInterface = getInterface(currentUser) appInterface.logout() + + # Log successful logout + try: + from modules.shared.auditLogger import audit_logger + audit_logger.log_user_access( + user_id=str(currentUser.id), + mandate_id=str(currentUser.mandateId), + action="logout", + success_info="google_auth_logout" + ) + except Exception: + # Don't fail if audit logging fails + pass + return {"message": "Logged out successfully"} except Exception as e: logger.error(f"Error during logout: {str(e)}") diff --git a/modules/routes/routeSecurityLocal.py b/modules/routes/routeSecurityLocal.py index c0b176b0..15f998f9 100644 --- a/modules/routes/routeSecurityLocal.py +++ b/modules/routes/routeSecurityLocal.py @@ -124,6 +124,19 @@ async def login( # Save access token userInterface.saveAccessToken(token) + # Log successful login + try: + from modules.shared.auditLogger import audit_logger + audit_logger.log_user_access( + user_id=str(user.id), + mandate_id=str(user.mandateId), + action="login", + success_info="local_auth_success" + ) + except Exception: + # Don't fail if audit logging fails + pass + # Create response data response_data = { "type": "local_auth_success", @@ -138,6 +151,20 @@ async def login( # Handle authentication errors error_msg = str(e) logger.warning(f"Authentication failed for user {formData.username}: {error_msg}") + + # Log failed login attempt + try: + from modules.shared.auditLogger import audit_logger + audit_logger.log_user_access( + user_id="unknown", + mandate_id="unknown", + action="login", + success_info=f"failed: {error_msg}" + ) + except Exception: + # Don't fail if audit logging fails + pass + raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail=error_msg, @@ -253,6 +280,19 @@ async def logout(request: Request, currentUser: User = Depends(getCurrentUser)) appInterface.revokeTokenById(jti, revokedBy=currentUser.id, reason="logout") revoked = 1 + # Log successful logout + try: + from modules.shared.auditLogger import audit_logger + audit_logger.log_user_access( + user_id=str(currentUser.id), + mandate_id=str(currentUser.mandateId), + action="logout", + success_info=f"revoked_tokens: {revoked}" + ) + except Exception: + # Don't fail if audit logging fails + pass + return JSONResponse({ "message": "Successfully logged out", "revokedTokens": revoked diff --git a/modules/routes/routeSecurityMsft.py b/modules/routes/routeSecurityMsft.py index efde94a3..8c2d8856 100644 --- a/modules/routes/routeSecurityMsft.py +++ b/modules/routes/routeSecurityMsft.py @@ -463,6 +463,20 @@ async def logout( try: appInterface = getInterface(currentUser) appInterface.logout() + + # Log successful logout + try: + from modules.shared.auditLogger import audit_logger + audit_logger.log_user_access( + user_id=str(currentUser.id), + mandate_id=str(currentUser.mandateId), + action="logout", + success_info="microsoft_auth_logout" + ) + except Exception: + # Don't fail if audit logging fails + pass + return {"message": "Logged out successfully"} except Exception as e: logger.error(f"Error during logout: {str(e)}") diff --git a/modules/routes/routeVoiceGoogle.py b/modules/routes/routeVoiceGoogle.py index 048e483d..2b68299b 100644 --- a/modules/routes/routeVoiceGoogle.py +++ b/modules/routes/routeVoiceGoogle.py @@ -161,12 +161,12 @@ async def realtime_interpreter( logger.info(f"📊 Audio file size: {len(audio_content)} bytes") # Save audio file for debugging with correct extension - file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav" - debug_filename = f"debug_audio/audio_google_{audio_file.filename.replace('.wav', '.webm')}" - os.makedirs("debug_audio", exist_ok=True) - with open(debug_filename, "wb") as f: - f.write(audio_content) - logger.info(f"💾 Saved audio file for debugging: {debug_filename}") + # file_extension = "webm" if audio_file.filename.endswith('.webm') else "wav" + # debug_filename = f"debug_audio/audio_google_{audio_file.filename.replace('.wav', '.webm')}" + # os.makedirs("debug_audio", exist_ok=True) + # with open(debug_filename, "wb") as f: + # f.write(audio_content) + # logger.info(f"💾 Saved audio file for debugging: {debug_filename}") # Validate audio format connector = get_google_speech_connector() diff --git a/modules/security/auth.py b/modules/security/auth.py index f314b065..4ada086c 100644 --- a/modules/security/auth.py +++ b/modules/security/auth.py @@ -19,7 +19,7 @@ from modules.interfaces.interfaceAppObjects import getRootInterface from modules.interfaces.interfaceAppModel import User, AuthAuthority, Token # Get Config Data -SECRET_KEY = APP_CONFIG.get("APP_JWT_SECRET_SECRET") +SECRET_KEY = APP_CONFIG.get("APP_JWT_KEY_SECRET") ALGORITHM = APP_CONFIG.get("Auth_ALGORITHM") ACCESS_TOKEN_EXPIRE_MINUTES = int(APP_CONFIG.get("APP_TOKEN_EXPIRY")) REFRESH_TOKEN_EXPIRE_DAYS = int(APP_CONFIG.get("APP_REFRESH_TOKEN_EXPIRY", "7")) diff --git a/modules/shared/auditLogger.py b/modules/shared/auditLogger.py new file mode 100644 index 00000000..dab32fa9 --- /dev/null +++ b/modules/shared/auditLogger.py @@ -0,0 +1,202 @@ +""" +Audit Logging System for PowerOn Gateway + +This module provides centralized audit logging functionality for security events, +user actions, and system access patterns. +""" + +import logging +import os +from datetime import datetime +from typing import Optional, Dict, Any +from logging.handlers import RotatingFileHandler +from modules.shared.configuration import APP_CONFIG + + +class DailyRotatingFileHandler(RotatingFileHandler): + """ + A rotating file handler that automatically switches to a new file when the date changes. + The log file name includes the current date and switches at midnight. + """ + + def __init__(self, log_dir, filename_prefix, max_bytes=10485760, backup_count=5, **kwargs): + self.log_dir = log_dir + self.filename_prefix = filename_prefix + self.current_date = None + self.current_file = None + + # Initialize with today's file + self._update_file_if_needed() + + # Call parent constructor with current file + super().__init__(self.current_file, maxBytes=max_bytes, backupCount=backup_count, **kwargs) + + def _update_file_if_needed(self): + """Update the log file if the date has changed""" + today = datetime.now().strftime("%Y%m%d") + + if self.current_date != today: + self.current_date = today + new_file = os.path.join(self.log_dir, f"{self.filename_prefix}_{today}.log") + + if self.current_file != new_file: + self.current_file = new_file + return True + return False + + def emit(self, record): + """Emit a log record, switching files if date has changed""" + # Check if we need to switch to a new file + if self._update_file_if_needed(): + # Close current file and open new one + if self.stream: + self.stream.close() + self.stream = None + + # Update the baseFilename for the parent class + self.baseFilename = self.current_file + # Reopen the stream + if not self.delay: + self.stream = self._open() + + # Call parent emit method + super().emit(record) + + +class AuditLogger: + """Centralized audit logging system""" + + def __init__(self): + self.logger = None + self._setup_audit_logger() + + def _setup_audit_logger(self): + """Setup the audit logger with daily file rotation""" + try: + # Get log directory from config + logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./") + if not os.path.isabs(logDir): + # If relative path, make it relative to the gateway directory + gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + logDir = os.path.join(gatewayDir, logDir) + + # Ensure log directory exists + os.makedirs(logDir, exist_ok=True) + + # Create audit logger + self.logger = logging.getLogger('audit') + self.logger.setLevel(logging.INFO) + + # Remove any existing handlers to avoid duplicates + for handler in self.logger.handlers[:]: + self.logger.removeHandler(handler) + + # Create daily rotating file handler for audit log + rotationSize = int(APP_CONFIG.get("APP_LOGGING_ROTATION_SIZE", 10485760)) # Default: 10MB + backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5)) + + fileHandler = DailyRotatingFileHandler( + log_dir=logDir, + filename_prefix="log_audit", + max_bytes=rotationSize, + backup_count=backupCount + ) + + # Create formatter for audit log + auditFormatter = logging.Formatter( + fmt="%(asctime)s | %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + ) + fileHandler.setFormatter(auditFormatter) + + # Add handler to logger + self.logger.addHandler(fileHandler) + + # Prevent propagation to root logger + self.logger.propagate = False + + except Exception as e: + # Fallback to standard logger if audit setup fails + self.logger = logging.getLogger(__name__) + self.logger.error(f"Failed to setup audit logger: {str(e)}") + + def log_event(self, + user_id: str, + mandate_id: str, + category: str, + action: str, + details: str = "", + timestamp: Optional[datetime] = None) -> None: + """ + Log an audit event + + Args: + user_id: User identifier + mandate_id: Mandate identifier (can be empty if not applicable) + category: Event category (e.g., 'key', 'access', 'data') + action: Specific action (e.g., 'decode', 'login', 'logout') + details: Additional details about the event + timestamp: Optional custom timestamp (defaults to current time) + """ + try: + if not self.logger: + return + + # Use provided timestamp or current time + if timestamp is None: + timestamp = datetime.now() + + # Format the audit log entry + # Format: timestamp | userid | mandateid | category | action | details + audit_entry = f"{user_id} | {mandate_id} | {category} | {action} | {details}" + + # Log the event + self.logger.info(audit_entry) + + except Exception as e: + # Use standard logger as fallback + logging.getLogger(__name__).error(f"Failed to log audit event: {str(e)}") + + def log_key_access(self, user_id: str, mandate_id: str, key_name: str, action: str) -> None: + """Log key access events (decode/encode)""" + self.log_event( + user_id=user_id, + mandate_id=mandate_id, + category="key", + action=action, + details=key_name + ) + + def log_user_access(self, user_id: str, mandate_id: str, action: str, success_info: str = "") -> None: + """Log user access events (login/logout)""" + self.log_event( + user_id=user_id, + mandate_id=mandate_id, + category="access", + action=action, + details=success_info + ) + + def log_data_access(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None: + """Log data access events""" + self.log_event( + user_id=user_id, + mandate_id=mandate_id, + category="data", + action=action, + details=details + ) + + def log_security_event(self, user_id: str, mandate_id: str, action: str, details: str = "") -> None: + """Log security-related events""" + self.log_event( + user_id=user_id, + mandate_id=mandate_id, + category="security", + action=action, + details=details + ) + + +# Global audit logger instance +audit_logger = AuditLogger() diff --git a/modules/shared/configuration.py b/modules/shared/configuration.py index 9415b7f7..e906840d 100644 --- a/modules/shared/configuration.py +++ b/modules/shared/configuration.py @@ -7,8 +7,14 @@ config.ini files and environment variables stored in .env files, using a flat st import os import logging +import json +import base64 +import time from typing import Any, Dict, Optional from pathlib import Path +from cryptography.fernet import Fernet +from cryptography.hazmat.primitives import hashes +from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC # Set up basic logging for configuration loading logging.basicConfig( @@ -119,20 +125,43 @@ class Configuration: try: with open(envPath, 'r') as f: - for line in f: - line = line.strip() - # Skip empty lines and comments - if not line or line.startswith('#'): - continue + lines = f.readlines() + + i = 0 + while i < len(lines): + line = lines[i].strip() + + # Skip empty lines and comments + if not line or line.startswith('#'): + i += 1 + continue + + # Parse key-value pairs + if '=' in line: + key, value = line.split('=', 1) + key = key.strip() + value = value.strip() + + # Check if value starts with { (JSON object) + if value.startswith('{'): + # Collect all lines until we find the closing } + json_lines = [value] + i += 1 + brace_count = value.count('{') - value.count('}') - # Parse key-value pairs - if '=' in line: - key, value = line.split('=', 1) - key = key.strip() - value = value.strip() + while i < len(lines) and brace_count > 0: + json_lines.append(lines[i].rstrip('\n')) + brace_count += lines[i].count('{') - lines[i].count('}') + i += 1 - # Add directly to data dictionary + # Join all lines and create the full JSON value + full_json_value = '\n'.join(json_lines) + self._data[key] = full_json_value + else: + # Single line value self._data[key] = value + + i += 1 logger.info(f"Loaded environment variables from {envPath.absolute()}") @@ -158,7 +187,7 @@ class Configuration: logger.info("Environment file has changed, reloading...") self._loadEnv() - def get(self, key: str, default: Any = None) -> Any: + def get(self, key: str, default: Any = None, user_id: str = "system") -> Any: """Get configuration value with optional default""" self.checkForUpdates() # Check for file changes @@ -166,10 +195,24 @@ class Configuration: value = self._data[key] # Handle secrets (keys ending with _SECRET) if key.endswith("_SECRET"): - return handleSecret(value) - # Handle JSON secrets (keys ending with _API_KEY that contain JSON) - elif key.endswith("_API_KEY") and value.startswith("{"): - return handleJsonSecret(value) + # Log audit event for secret key access + try: + from modules.shared.auditLogger import audit_logger + audit_logger.log_key_access( + user_id=user_id, + mandate_id="system", + key_name=key, + action="decode" + ) + except Exception: + # Don't fail if audit logging fails + pass + + if value.startswith("{") and value.endswith("}"): + # Handle JSON secrets (keys ending with _API_KEY that contain JSON) + return handleSecretJson(value, user_id, key) + else: + return handleSecretText(value, user_id, key) return value return default @@ -177,7 +220,7 @@ class Configuration: """Enable attribute-style access to configuration""" self.checkForUpdates() # Check for file changes - value = self.get(name) + value = self.get(name, user_id="system") if value is None: raise AttributeError(f"Configuration key '{name}' not found") return value @@ -191,42 +234,306 @@ class Configuration: """Set a configuration value (for testing/overrides)""" self._data[key] = value -def handleSecret(value: str) -> str: +def handleSecretText(value: str, user_id: str = "system", key_name: str = "unknown") -> str: """ - Handle secret values. Currently just returns the plain text value, - but can be enhanced to provide actual decryption in the future. + Handle secret values with encryption/decryption support. Args: - value: The secret value to handle + value: The secret value to handle (may be encrypted) + user_id: The user ID making the request (default: "system") + key_name: The name of the key being decrypted (default: "unknown") Returns: - str: Processed secret value + str: Processed secret value (decrypted if encrypted) """ - # For now, just return the value as-is - # In the future, this could be enhanced to decrypt values + if _is_encrypted_value(value): + return decrypt_value(value, user_id, key_name) return value -def handleJsonSecret(value: str) -> str: +def handleSecretJson(value: str, user_id: str = "system", key_name: str = "unknown") -> str: """ - Handle JSON secret values (like Google service account keys). - Validates that the value is valid JSON. + Handle JSON secret values (like Google service account keys) with encryption/decryption support. + Validates that the value is valid JSON after decryption. Args: - value: The JSON secret value to handle + value: The JSON secret value to handle (may be encrypted) + user_id: The user ID making the request (default: "system") + key_name: The name of the key being decrypted (default: "unknown") Returns: - str: Processed JSON secret value + str: Processed JSON secret value (decrypted if encrypted) Raises: - ValueError: If the value is not valid JSON + ValueError: If the value is not valid JSON after decryption """ - import json + # Decrypt if encrypted + if _is_encrypted_value(value): + decrypted_value = decrypt_value(value, user_id, key_name) + else: + decrypted_value = value + try: # Validate that it's valid JSON - json.loads(value) - return value + json.loads(decrypted_value) + return decrypted_value except json.JSONDecodeError as e: raise ValueError(f"Invalid JSON in secret value: {e}") +# Global rate limiting tracking +# Structure: {user_id: {key_name: [timestamps]}} +_decryption_attempts = {} + +def _get_master_key() -> bytes: + """ + Get the master key for the current environment. + + Returns: + bytes: The master key for encryption/decryption + + Raises: + ValueError: If no master key is found + """ + # Get the key location from config + key_location = APP_CONFIG.get('APP_KEY_SYSVAR') + env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev') + + if not key_location: + raise ValueError("APP_KEY_SYSVAR not configured") + + # First try to get from environment variable + master_key = os.environ.get(key_location) + + if master_key: + # If found in environment, use it directly + return master_key.encode('utf-8') + + # If not in environment, try to read from file + if os.path.exists(key_location): + try: + with open(key_location, 'r') as f: + content = f.read().strip() + + # Parse the key file format: env = key + lines = content.split('\n') + for line in lines: + line = line.strip() + if not line or line.startswith('#'): + continue + + if '=' in line: + key_env, key_value = line.split('=', 1) + key_env = key_env.strip() + key_value = key_value.strip() + + if key_env == env_type: + return key_value.encode('utf-8') + + raise ValueError(f"No key found for environment '{env_type}' in {key_location}") + + except Exception as e: + raise ValueError(f"Error reading key file {key_location}: {e}") + + raise ValueError(f"Master key not found. Checked environment variable '{key_location}' and file path") + +def _derive_encryption_key(master_key: bytes) -> bytes: + """ + Derive a 32-byte encryption key from the master key using PBKDF2. + + Args: + master_key: The master key bytes + + Returns: + bytes: 32-byte derived key suitable for Fernet + """ + # Use a fixed salt for consistency (in production, consider using a random salt stored separately) + salt = b'poweron_config_salt_2025' + + kdf = PBKDF2HMAC( + algorithm=hashes.SHA256(), + length=32, + salt=salt, + iterations=100000, + ) + + return base64.urlsafe_b64encode(kdf.derive(master_key)) + +def _is_encrypted_value(value: str) -> bool: + """ + Check if a value is encrypted (starts with environment-specific prefix). + + Args: + value: The value to check + + Returns: + bool: True if encrypted, False otherwise + """ + if not value or not isinstance(value, str): + return False + + # Check for environment-specific encryption prefixes + env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev').upper() + expected_prefix = f"{env_type}_ENC:" + return value.startswith(expected_prefix) + +def _get_encryption_prefix(env_type: str) -> str: + """ + Get the encryption prefix for the given environment type. + + Args: + env_type: The environment type (dev, int, prod, etc.) + + Returns: + str: The encryption prefix + """ + return f"{env_type.upper()}_ENC:" + +def _check_decryption_rate_limit(user_id: str, key_name: str, max_per_second: int = 10) -> bool: + """ + Check if decryption is allowed based on rate limiting (max 10 per second per user per key). + + Args: + user_id: The user ID making the request + key_name: The name of the key being decrypted + max_per_second: Maximum decryptions per second (default: 10) + + Returns: + bool: True if allowed, False if rate limited + """ + current_time = time.time() + + # Initialize tracking for this user if not exists + if user_id not in _decryption_attempts: + _decryption_attempts[user_id] = {} + + # Initialize tracking for this key if not exists + if key_name not in _decryption_attempts[user_id]: + _decryption_attempts[user_id][key_name] = [] + + # Clean old attempts (older than 1 second) + _decryption_attempts[user_id][key_name] = [ + timestamp for timestamp in _decryption_attempts[user_id][key_name] + if current_time - timestamp < 1.0 + ] + + # Check if we're within rate limit + if len(_decryption_attempts[user_id][key_name]) >= max_per_second: + logger.warning(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' ({max_per_second}/sec)") + return False + + # Record this attempt + _decryption_attempts[user_id][key_name].append(current_time) + return True + +def encrypt_value(value: str, env_type: str = None, user_id: str = "system", key_name: str = "unknown") -> str: + """ + Encrypt a value using the master key for the specified environment. + + Args: + value: The plain text value to encrypt + env_type: The environment type (dev, int, prod). If None, uses current environment. + user_id: The user ID making the request (default: "system") + key_name: The name of the key being encrypted (default: "unknown") + + Returns: + str: The encrypted value with prefix + + Raises: + ValueError: If encryption fails + """ + if env_type is None: + env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev') + + try: + master_key = _get_master_key() + derived_key = _derive_encryption_key(master_key) + fernet = Fernet(derived_key) + + # Encrypt the value + encrypted_bytes = fernet.encrypt(value.encode('utf-8')) + encrypted_b64 = base64.urlsafe_b64encode(encrypted_bytes).decode('utf-8') + + # Add environment prefix + prefix = _get_encryption_prefix(env_type) + encrypted_value = f"{prefix}{encrypted_b64}" + + # Log audit event for encryption + try: + from modules.shared.auditLogger import audit_logger + audit_logger.log_key_access( + user_id=user_id, + mandate_id="system", + key_name=key_name, + action="encrypt" + ) + except Exception: + # Don't fail if audit logging fails + pass + + return encrypted_value + + except Exception as e: + raise ValueError(f"Encryption failed: {e}") + +def decrypt_value(encrypted_value: str, user_id: str = "system", key_name: str = "unknown") -> str: + """ + Decrypt a value using the master key for the current environment. + + Args: + encrypted_value: The encrypted value with prefix + user_id: The user ID making the request (default: "system") + key_name: The name of the key being decrypted (default: "unknown") + + Returns: + str: The decrypted plain text value + + Raises: + ValueError: If decryption fails + """ + if not _is_encrypted_value(encrypted_value): + return encrypted_value # Return as-is if not encrypted + + # Check rate limiting (10 per second per user per key) + if not _check_decryption_rate_limit(user_id, key_name, max_per_second=10): + raise ValueError(f"Decryption rate limit exceeded for user '{user_id}' key '{key_name}' (10/sec)") + + try: + # Extract the encrypted part (remove prefix) + env_type = APP_CONFIG.get('APP_ENV_TYPE', 'dev') + env_type_upper = env_type.upper() + expected_prefix = f"{env_type_upper}_ENC:" + + if not encrypted_value.startswith(expected_prefix): + raise ValueError(f"Invalid encryption prefix. Expected {expected_prefix}") + + encrypted_part = encrypted_value[len(expected_prefix):] + + # Get master key and derive encryption key + master_key = _get_master_key() + derived_key = _derive_encryption_key(master_key) + fernet = Fernet(derived_key) + + # Decode and decrypt + encrypted_bytes = base64.urlsafe_b64decode(encrypted_part.encode('utf-8')) + decrypted_bytes = fernet.decrypt(encrypted_bytes) + decrypted_value = decrypted_bytes.decode('utf-8') + + # Log audit event for decryption + try: + from modules.shared.auditLogger import audit_logger + audit_logger.log_key_access( + user_id=user_id, + mandate_id="system", + key_name=key_name, + action="decrypt" + ) + except Exception: + # Don't fail if audit logging fails + pass + + return decrypted_value + + except Exception as e: + raise ValueError(f"Decryption failed: {e}") + # Create the global APP_CONFIG instance APP_CONFIG = Configuration() \ No newline at end of file diff --git a/notes/changelog.txt b/notes/changelog.txt deleted file mode 100644 index e10e683a..00000000 --- a/notes/changelog.txt +++ /dev/null @@ -1,1206 +0,0 @@ - -TODO - -# System -- database -- db initialization as separate function to create root mandate, then sysadmin with hashed passwords --> using the connector according to env configuration -- settings: UI page for: db new (delete if exists and init), then to add mandate root and sysadmin, log download --> in the api to add connector settings with the according endpoints -- access model as matrix, not as code --> to have view, add, update, delete with the rights on level table and attribute for all, my (created by me), my mandate (mandate I am in), none (no access) -- document handling centralized -- ai handling centralized -- neutralizer to activate AND put back placeholders to the returned data - -# Tests -- workflow continue after stop. to run normally -- add a prompt --> then shall be visible in the workflow to select -- msft connection bei 2 verschiedene users -- chat 3x ausführen mit verschiedenen mailempfängern, test ob round greift -- manual task retry - triggered - -- check method outlook: alles -- check method sharepoint: alles -- check method webcrawler: alles -- check method google: alles -- check zusammenfassung von 10 dokumenten >10 MB -- test case bewerbung - -# Ida changes gateway: -- Polling endpoint + doku dazu -- files in documents integriert --> document endpoint for files -- prompts in chat endpoint -- - -# DOCUMENTATION -Design principles -- UI: Module classes for data management (CRUD tables & forms --> formGeneric) -- Basic: All timestamps to be timezone aware fehlerabfangroutinen -- Backend: All external components to attach over connectorXxx --> interfaceXxx --> our codebase -- all model definitions in interfaceXxxModel -- action functions for ai: why to use documentList and not just document as input parameter? --> to have full flexibility to pass either list of documents, or documentList - -******************** - - -INIT - -conda activate poweron -cd gateway -pip install -r requirements.txt -python app.py - - - - - ------------------------ OPEN - - - -Tools to transfer incl funds: -- Google SERPAPI (shelly) -- Anthropic Claude (valueon + shelly) -- Cursor Pro -- Mermaid -- Github Pro - - - - ------------------------ DONE - - -FRONTEND -- the application initiation gets userdata with the token over apiCall.js:/api/local/me --> object: - username - fullName - email - language - list of connections with attributes: - id - authority - externalUsername - - -Backend - -in the backend to handle the routes as follows: -- routeSecurityLocal.py to handle all local endpoints, to include token generation from local authority in auth.py -- routeSecurityMsft.py and routeSecurityGoogle.py to handle all their endpoints -- all routeSecurity*.py to use the same interface to manage tokens and userdata: serviceUserClass.py. This class to have following - -logic: - - all tokens are stored in one tabel, where each token has the attribute of the according authenticationAuthority - - login and logout endoints for "local" use a function "getUseridFromToken" to identify the user context. If user does not exist, error message - - login and logout endoints for "msft" and "google" use a function "getUseridFromToken" to identify the user context. If user does not exist for login, to register a new "local" user with the external user data and to attach the external connection. within the identified user context and the connection in its list to send back user context as tokenLocal and connection as tokenExt - - the important thing is, that login endpoint serves for two different actions: - a) without user context (no tokenLocal), it makes login for a user by external authority and sets user context - b) with user context (a tokenLocal provided), it does NOT set a nwe user context, but manipulate a connection in the connection list of a local user - - illustrative example of token data to send to UI (attributes): - connect and - { - "token_type": "Bearer", - "expires_in": , - "access_token": , - "id_token": , - "client_info": , - "user_info": { - "name": "Patrick Motsch", - "email": "p.motsch@valueon.ch", - "id": "xxx" - }, - "mandateId": "", - "userId": "", - "id": "tokenid", - } - - - - - - -We have to correct the following wrong user access management. - -Issue is: when user logs in with "local" managed account and then logs in to msft account with "msft" authority, the userid is switched to the microsoft instance in the workflow. this must not happen. -Objective: The correct logic is, that a user logs in with an account (managed by "local" or other authority). Once logged in, his login does not change, also if he connects to microsoft account afterwards. - -Problem: We have a mix between user-login (creating currentUser profile) and user-connections (attaching user to a service, like "msft" - and future other services in parallel). - -Concept: We need to separate user-login and user-connections: - 1. the ui login and register modules produce a user-login, resulting in a currentUser profile in the backend to be used for workflow and other activities. the user gets a token (from "local" or "msft" or furthers). this token has to be checked when user logs in. ALWAYS a check is required by the according registration authority. - those use cases: - - if user registers with a "local" profile, a new user is created, a local token is produced - - if user logs in with a "local" profile for an existing user, a local token is produced - - if user logs in with a "local" profile for a non-existing user, login is denied (no user) - - if user logs in with a "msft" profile (or other foreign profile) for an existing user, a local token AND a token in "msft" database (or other foreign system) is produced - - if user logs in with a "msft" profile (or other foreign profile) for a non-existing user, a local profile is generated based on information from foreign account, then a local token AND a token in "msft" database (or other foreign system) is produced - - 2. the ui navigation buttons for "Login MSFT" or future other buttons to connect to services (like e.g. google account or github account or microsoft "msft" account, etc.) does NOT generate a user-login, only a user-connection to a service. - -soloution: -So there must be a mechanism, which manages user-login and user-connection. Following proposition: User has a user profile to login and a list of profiles for user-connections. -Examples: -- user registers with "local" profile --> he gets local profile with 0 user-connections -- user registers with "msft" profile --> he gets local profile with 1 user-connections to "msft". Then he connects to another "msft" profile. Now he gets local profile 2 user-connections "msft" -- user registers with "google" profile (future) --> he gets local profile and 1 user-connections to "google". Then he connects to another "msft" profile. Now he gets local profile and 1 user-connections "msft" and 1 user-connection "google". - -can you tell me, how you would implement this adapted model into the pydantic model and into the code modules in a structured and maintainable way? - - - -i want to refactor the user management in the backend through the user journey. currrently we have two problems: we always pass _userid and _mandate or id with _mandate from function to function, which blocks scaling. this is too complicated and non-logic. - -to adapt the following: - -1. The attributes _mandateid and _userid to be removed from @connectorDbJson.py. the attribute _userid to rename to "userId". this is the id of the user, who creates the record. This is the passed attribute instead of _userid and _mandate id., which is stored as userId. The default value to be "" (if None, then set to ""). All new created records get an additional "_createdBy" and "modifiedBy" attribute =self.userId. A modified record gets adapted "modifiedBy" attribute = "userId" when modified. - -2.@gatewayModel.py to adapt class User: add mandateId. This is set to the same mandateId like the mandateId of the user, who creates the user. - -3. @lucydomModel.py to adapt classes Prompt, FileItem, ChatWorkflow: add mandateId. This is set to the same mandateId like the mandateId of the user, who creates the user. -Also to add "workflowId" to ChatStat, it is missing there. - -4. @gatewayInterface.py and @lucydomInterface.py to adapt according to the changes of point 1, 2, 3. Also to integrate their according "*Model.py" to use for record creation with correct attributes. - -Also to separate class initiation and function call getInterface(). - -Class initiation without parameter userid and mandateid. Initialize database and records. Like this it is ensured, when the first function call happens to the class, it is initiated correctly. Initiate the module class automaitcally when module loading. - -function getInterface(currentUser with default value = None) makes this: -- if currentUser is None, then only database is initialized (e.g. for refresh folders and files) and an empty object given back with logger info for databse refresh -- if currentUser is provided, then uses the id of the user for contextkey, creates ne instance of the class, gives self.user=currentUser to the class to have user context, initializes AI service self.aiService=ChatService(), initializes access control: self.access = LucydomAccess(self.currentUser, self.db) -- now to adapt code in the *Interface.py modules to use currentUser attributes. like this we have a proper object usage -- modules.interfaces.*Interface to import as module and not the functions. This ensure, that module is initiated when imported. - -5. @auth.py : getRootInterface to call getInterface(rootUser), where rootUser is the user with initialId indatabase (use function for this) - - - - - -FRONTEND: -- login page and register page withoug fallback. they have mandatory to load their login.html or register.html pages to work (not html in the code). - - - -I want formCeneric module to use api calls over apiCalls.js module, not directly. So please adapt formCeneric parameter "apiEndpoint" with the respective api-functions as objects, handed over by the modules: -- apiEndpoint.get --> the api to get data -- apiEndpoint.update --> the api to update data -- apiEndpoint.delete --> the api to delete data -then to use those api-functions in the module formGeneric instead of direct api calls -the modules mandates, users, files, prompts, to adapt accordingly - - - - - -- all api calls from workflowUI.js and workflowData.js also to transfer to apiCall.js. There to integrate ALL route endpoints from all routes and to call over window.utils.api..... -- handleFileUplad and uploadfile is on nmany places. To have the api functionality only in apiCall.js. - - - - -please refactor those topics. - -- all api calls from workflowUI.js and workflowData.js also to transfer to apiCall.js. There to integrate ALL route endpoints from all routes and to call over window.utils.api..... - -- Functions to handleFileUplad and uploadfile are on many places. To have the api functionality only in apiCall.js. - -no api relevant code in other modules than apiCall.js. - -In apiCalls.js to remove the generic functions get, post, put, delete from the public set. those not to expose. only the specific endpoints from the routes to expose. - -If more than 3 changes in a module, give me the full module. otherwise tell me the parts to change. - - - - -Please enhance this: -- config & env variables integration to have config variables in the globalState set in category "config" integrated. - -cleanup utils.js: - - remove all elements in the context of workflow and messages. those elements have to be integrated within workflow... modules. Some functions within utils.js anyway are not used anymore, so to remove anyway. - - extract all api-call functions to a separate submodule "apiCalls.js" module. There to implement one interface function for each api-call. all calls to put into one object "api" to be accessed. - - at the end utils.js shall only include config & environment data management, show general toast and error, uiUtils, dataUtils - - in workflow... modules there are some redundant functions like in utils.js (e.g. showToast, showError, etc.). Those to remove in workflow and to get from utils.js - - utils data shall be accesssible within those categories: - - window.utils.api --> the functions from apiCalls.js - - window.utils.ui --> what is in uiUtils currently, plus showError, showToast and similair - - window.utils.data --> what is in dataUtils currently plus handleFileUpload - -adapt other modules accordingly. for workflowUi.js only give me the parts to adapt. If only 1-3 adaptions for module, just give me the changes. Otherwise the revised module. - - - - - -please adapt module workflowUi.js with this input (the other modules have already been adapted): -- Error handling for file parsing failures to add -- Clear indication of workflow completion status -- The message object structure to fully match the documented model -- Status field handling to be exaclty and only the implementation according documentation (adjustment to recognize "first", "step", "last") -- File preview to better handle the documented document structure -- File actions to use the correct API paths -- log progress indicator implementation to improve, e.g. the feature to collapse/expand details -- Agent-specific log formatting to fully match the documented model -Updates Required: -- Update message rendering to handle status field correctly -- Improve file preview to handle documented document structure -- Update API paths for file operations -- Add better indication of workflow completion status -- Improve log progress indicator implementation - -also remove unused functionality and objects. - - - -Can you adapt following two modules. the modules workflowCoordination.ja and workflowData.js have already been updated. -please remove unused functionality and objects. - -please adapt module workflow.js with this input: -Updates Required: -- Implement explicit state machine transitions -- Update API interaction to match documented endpoints -- Improve error handling to match documented failure states -- Align status handling with the documented state transitions -- Ensure proper handling of the "last" message status - -please adapt module workflow.js with this input: -- adapt: Explicit handling of the workflow status transitions per state machine, clean separation of workflow states according to the documentation -- The workflow state management to align with the documented state machine -- Status transition handling to be more explicit -- Verify API paths and request structures -- Response handling to match the documented workflow object -Updates Required: -- Implement explicit state machine transitions -- Update API interaction to match documented endpoints -- Improve error handling to match documented failure states -- Align status handling with the documented state transitions -- Ensure proper handling of the "last" message status - - - -please adapt module workflowCoordination.js with this input: -- the workflow state object structure to be updated to match documentation -- Status transitions to follow the documented state machine -- message Status Handling to properly handle message status ("first", "step", "last") -Updates Required: -- Update workflowState object to match documented model -- Implement proper status transitions (null → running → completed/failed/stopped) -- Ensure message status field handling ("first", "step", "last") -- Ensure correct polling mechanism with log/message IDs -- Add missing getWorkflowStatus() function -- Fix the updateWorkflowStatus() function to handle all status transitions - - - - -please adapt module workflowData.js with this input: -- estimateJsonSize not to be in frontend. data stats is delivered in workflow object with attribute "tokensUsed" -- pollWorkflowStatus to implement -- adapt object Model Discrepancies: workflow object structure to match the state machine docs, File object structure to follow the documented model -- API Endpoint paths to correct to be: /api/workflows/${workflowId}/logs?id=${workflowState.lastPolledLogId}; Same issue to adapt for messages endpoint -- Data Handling: lastPolledLogId and lastPolledMessageId tracking variable paths to ensure corretly -- uploadAndAddFile: no change, this happens in the backend -- submitUserInput() and createWorkflow() to align response handling with the documented workflow object -Updates Required: -- Implement pollWorkflowStatus() function -- Define estimateJsonSize() function -- Fix API endpoint paths to match documentation (?logId= → ?id=) -- Update object models to match documentation -- Improve error handling according to the state machine -- Fix file handling to match documented file object model - - - -Can you please refactor the workflow_utils.js. The other documents we have. - -Attached: Frontend State machine Documentation as ruleset for the refactory, the current frontend - -To organize workflow... modules like this: -1. Centralized State Management: Use a single state object that all modules reference. -2. Event-Based Updates: Use a simple event system to trigger UI updates when state changes. -3. Clear Separation of Concerns: - * Model: Manages workflow state and API communication - * View: Purely responsible for rendering the UI based on state - * Controller: Connects user actions to model updates - -Comments: -- all variables and objects and functions and classes to name in camelCase, not in snake_case -- Adapted routes to implement -- I do not need backwards compatibility -- please remove all unnecessary elements and provide smart, well structured code, which is maintainable - -New File names: -- workflow.js - The main module as manager and coordinator -- workflow_state.js - Centralized state management -- workflow_api.js - API communication layer -- workflow_ui.js - UI rendering layer - - - - - -Can you please refactor the backend with those inputs: - -Attached: Backend State machine Documentation as ruleset for the refactory - -Comments: -- all variables and objects and functions and classes to name in camelCase, not in snake_case -- Adapted routes to implement -- I do not need backwards compatibility -- please remove all unnecessary elements and provide smart, well structured code, which is maintainable - -If you need further documents, please tell me. - - - - - -I like your proposition. So do the refactory according to your proposition to clean and structure with these documents: -- workflow_presentation.js -- workflow_presentation_core.js -- workflow_presentation_components.js (here to group the functions accordingly for log, chat, files, ui) -- workflow_presentation_utilities.js - -Can you also split the css files to: -- styles_workflow.css --> here only to keep the basic formatting for the layout -- styles_workflow_log.css -- styles_workflow_chat.css -- styles_workflow_files.css -- styles_workflow_ui.css - - - - -I like to refactor frontend to match updated backend. - -Please this to do: - -- General: Adapt to backend changes and simplify polling and frontend objects status, remove unnecessary elements. -- The Workflow object has only one attribute for status of workflow and for polling to know, if polling shall be active orn not. this is "status" with value "completed" or "running". All other status objects for workflow to remove. -- polling start/finish and frontend elements status have only to look for "status" value of workflow. especially all the routines for button "stop", "send", animations only rely on this status. -- based on this create one centralized function, which gets workflow status and all other status changes in the front end. based on this this function manages ui adaptions. so we have a mainatenable place to control and debug all status changes. -- for log entries to show in the console: always check last log-entry for progress update. logging is done, that also progress information is passed. is this clear for you? - -what other simplifications or consolidations do you see to improve code for clear debugging and maintainability? - -please first to give review plan, before doing code. - - - - -can you do following adaptions for the workflow management for the frontend: -- german comments in logs and prompts to translate to english. where to adapt what? -- ai calls to adapt for user language if necessary (additional parameter in the lucydom ai call) - -- can you check all self.log_add(...) statements and rearrange them for the revised function call. They are for the progress of a workflow to show in the front-end. I want all messages to be in a standardizes format and organized along the workflow, that user understands the logical progress. Not too much information, but the relevant steps to show. Within loops to tell progress in percent by having a log_add in the loops (so to add progress attribute to the function call) - -please deliver adapted modules when more than 3 parts have to be adapted, otherwise the parts to adapt. - - -can you do following adaptions - -for document class: -- class Document to have a "data" attribute, where the file-data is stored in base64 format - -based on this: -- task object for agents to enhance with this attribute - -for content in contents in documents, when adding a file to a document object: -- to set "base64_encoded" if encoded. this should already be, to check - -when building task for the agents: -- ensure attribute "data" is integrated, containing filedata base64 encoded -- in each content to deliver "data" as it is, optional "base64_encoded" attribute depending on data format, to add attribute "data_extracted" and to store here the extracted data from ai call - -everywhere: -- to remove base64 checks ot tests. only to use base64_encoded attribute -- to use the enhanced attributes for document ("data" containing filedata in base64 format) and content ("data", "base64_encoded", "data_extracted") - -please tell me, where to adapt what in the code. I do not neew fully new code. - - - - -please revise all chat_agents* modules: -- all comments, logs and outputs in english language -- all ai answers in the language of the user -- no language specific features like analysis of words. a prompt in japanese would not work with this! i need it generically. -- why are there still data extraction routines in the modules? - data is already delivered in the input_documents section. - -documentation agent: -- why to try to find out document type, when in the "label" of the files to deliver the extension is ALWAYS indludes (e.g. .docx, .csv, etc.). Please revise, this can be very much shortened and simplified - -webcrawler_agent: -- there is a try - except mapping problem in the code. please also fix this -- - -also attached chat.py and chat_content_extraction (centralized), that you can see the scrutcure of passed parameters. - - - - - -alle expliziten prompt ersetzen. -kannst du mir zusammenstellen, wo es überall in chat.py explizite texte an den user in den messages drin hat? - stell dir vor, es arbeitet ein japaner, der würde es nicht verstehen. die referenzen der code-elemente reicht. - - - - -die agents registry bereinigen inkl agents - -die file upload & dragdrop bereinigen, dass einfach file in db geschrieben wird mit file im file-object - -funktion für integration von file in message, als basis db-file-id oder document-part-from-agent; damit alle attribute füllen inkl zusammenfassung pro content --> pro extractor-typ ein file - -Workflow: -- NO-FILES for the workflow! -- All documents in message objects -- Uploads only to store in document object with file inline and parsed into content[] - - - - - -kannst du bitte den Code Vorschlag von Dir als class "ChatManager" ins modul "chat.py" umbauen und mir diese class liefern. hier zusätzliche infos und dokumente. - -für die implementierung der funktionen bitte die beiliegenden module als grundlage verwenden, aber allen code neu erstellen. denn die heutigen codes sind viel zu lange haben zuviele details auf allen levels drin. die implementierung der funktionen soll ebenfalls high-level sein, indem alle detail-ausführungen in grundlagen-funktionen ausgelagert werden. - -folgende anhänge dazu: -- lucydom_model und lucydom_interface : datenmodell und interface zum datenmodell (wir arbeiten nur mir dem workflow object) -- workflow.py: die routerdatei, welche die funktionen von lucydom_interface über den gateway nutzt -- agentservice_registry (old): registry der agenten, diese bitte neu und kompakt erstellen als "chat_registry.py" -- agentservice_base (old): template für agents definitionen. - -kannst du bitte mit dem datenmodell (es wurde angepasst) folgendes tun: - -1. lcuydom_interface.py überarbeiten, damit es mit dem angepassten datenmodell wieder korrekt funktioniert. - -2. workflow.py überarbeiten, sodass die immer wieder gleichen funktionen der routes in hilfsfunktionen ausgelagert werden und alle routinen umschreiben, dass sie nicht agentservice_workflow_manager.py" aufrufen, sondern "chat.py". in der router funktion "workflow.py" keine implementierungen, sondern diese in die chat.py funktion übergeben. Die route "submit_user_input" umschreiben, dass workflow_id auch leer sein kann. direkt die funktion "workflow_integrate_userinput" aufrufen. - - -3. die funktionen implementieren mit diesen hinweisen: - -workflow_integrate_userinput: - - den parameter workflow umbenennen in optional workflow_id. dieser kann initial None sein, wenn ein neuer workflow startet. daher zuerst die zu implementierende funktion workflow_init(workflow_id) aufrufen, welche das workflow object zurückgibt. - - generell werden 2 kommunikationen geführt: - - a) "log_add" (umbenennen von "send_message_to_user") sendet einen log-eintrag, implementiert in mit der implementierung in "lucydom_interface.create_workflow_log" und gleichzeitig einen "Info" Eintrag im logger erstellen - - b) "message_add" speichert eine message im workflow objekt. Implementierung über lucydom_interface - - Vor Step 1. die message_user im workflow als neue message speichern - - Anstatt "# Send initial response" die "user_response" als message object im workflow speichern und auch gleich den obj_answer und obj_workplan in den logger schreiben mittels einer hilfsfunktion "json2text(), welche das json-Objekt als Strukturobjekt lesbar schreibgeschützt - - send_message_to_user(step_info), dies als log_add schreiben - - format_final_response umbenennen in format_final_message und damit das finale message objekt mit den documents erstellen, dieses dann mit messagE_add dem workflow zufügen -- update_workflow(...) nicht mehr nötig, dafür workflow_finish - - -prompt_project_manager: - - mach nur einen typ "doc_type" und gib dafür eine abschliessende liste von optionen an, welche aus der funktion get_available_document_types() kommen - - der obj_workplan soll pro listenelement doc_input und doc_output ein Dict haben mit den Elementen "label","doc_type". auch hier die abschliessende liste der möglichen werte angeben, welche aus der funktion get_available_document_types() kommt. - - -workflow_init: - - wenn die workflow_id leer ist oder nicht existiert, wird ein neuer workflow erzeugt, andernfalls wird der bestehende workflow geladen - - die statuswerte werden gesetzt: status="running", started_at, last_activity=strated_at - -workflow_finish: - - die statuswerte werden gesetzt: status="stopped", last_activity - -message_add: -- die message dem workflow ergänzen -- die statuswerte werden gesetzt: last_activity, last_message_id - -get_available_agents: -- die function aus der agents_registry aufrufen - -get_available_document_types: -- liste dieser doc-types ausgeben: text, csv, png, html - -summarize_workflow(workflow,prompt): -- in der chronologie der messages von aktuell zu historisch pro message mit der funktion summarize_message(prompt) die zusammenfassung holen. Die zusammenfasusng ausgeben mit agent-name, generierte zusammenfassung, liste der dokumente mit jeweils ihrer zusammenfassung - -summarize_message(prompt): -- mit ai call die zusammenfassung der message mit dem prompt generieren. Die zusammenfasusng ausgeben mit agent-name, generierte zusammenfassung des contents, liste der dokumente mit jeweils ihrer zusammenfassung - -summarize_user_documents: -- pro document mit dem angegebenen prompt den content zusammenfassen und die liste ausgeben mit [document.content: text] - -call_agent: braucht es nicht, ai calls können direkt über den connector erfolgen, welcher initial eingebunden wird: "from connectors.connector_aichat_openai import ChatService" - - - - - -Kannst Du mir die python funktion erstellen, um nachfolgendes zu tun. Ich möchte eine kompakte Funktion, welche keine Details enthält, ausser den Prompt-Teil bis und mit Antwort an den user. Alle nötigen Datenkonversionen und Details bitte in Hilfs-funktionen auslagern. Diese müssen nicht implementiert sein, sondern nur deren input und output definieren. - -# Kontext - -Der User liefert im AI Chat eine Anfrage in einem Message Objekt. Dieses beinhaltet seinen Prompt und eine Liste der mitgelieferten Dokumente mit ihnen contents im "message" objekt. Ebenfalls verfügbar ist der bisherige Chatverlauf im objekt "workflow". - -Wir befinden uns in der python funktion "workflow_integrate_userinput", wo der User prompt ankommt, also diese 2 parameter: "message_user" und "workflow". - -Es steht eine Liste von agents zur Verfügung. Das agents in der Art: -- Loop: Er führt repetitive Aufgaben aus. Er benötigt eine Liste von Dokumenten und einen Prompt zur Anwendung auf jedes Dokument, er liefert eine liste von "content" -. Coder: Er führt Pyton Code aus. Benötigt als Input einen Prompt, content und die spezifikation des resultatformates. -(weitere...) - -# Auftrag - -Kannst Du mir bitte den Prompt für den Projektleiter zusammenstellen, welcher dem User die Antwort liefert. - -Dies soll er tun: - -1. Eine Liste von Resultaten, welche der User für seine Antwort benötigt, als json-Objekt "obj_answer" liefern. Die Antworten des Projektleiters sollen strikt in einem vorgegebenen json-format geliefert werden. - -2. Antwort des Vorgehens an den Benutzer mit den Resultat-Dokumenten als Liste senden - -3. Falls für die Antwort oder die Resultate Inputs von Agenten (diese sind gemäss "obj_agents" mit ihren eigenschaften definiert) benötigt werden, diese als json Liste (ich nenne sie "obj_workplan") angeben, welcher agent welches resultat liefern soll - -Dann soll der Code dies machen: - -4. die agenten gemäss obj_workplan ausführen lassen und den user über jeden schritt informieren. die gelieferten dokumente als liste sammeln "obj_results". Jeden Agenten mit den Datenobjekten gemäss seiner Datenstruktur bedienen. - -Dann anhand der gelieferten Dokumente die finale Antwort an den Benutzer senden. Dokumente vom Typ "text" direkt in die Antwort an den Benutzer integrieren. Die Dokumente referenzieren. - -Dann im Code: - -5. Dem benutzer die antwort mit den dokumenten senden - - -Jedes Dokument soll anhand des Labels eindeutig identifizierbar sein. Du hast alle Dokument-conteot-labels im workflow objekt. - -Diese Objektinformationen dazu: - -- datenmodell für workflow inklusive message: - - - workflow - - messages: list of message - - - message - - agent (who created message) - - input (the input prompt) - - content (text) - - documents: list of document - - - document - - source - - contents: list of content - - - content - - label - - format: formatType - - data: the data of the content in the format according to formatType - - - formatType: [text, csv, jpg, gif, png] - - -- obj_answer: json-Liste mit diesen Attributen: - - label: document label (unique name in the documents list) - - doc_type_src: document type des zu liefernden dokumentes: [text, csv, png, html] - - doc_type_final: document type des dokumentes an den Benutzer: [text, csv, jpg, gif, png, pdf, html, docx, xlsx] - - summary: summary of required document content - -- obj_workplan: json-Liste mit diesen Attributen: - - agent: agent identifier based on the given agent list with the skills of the agents - - doc_output: List of label,doc_type_src (documents to deliver) - - prompt: Prompt to use for answer delivery and document-content-extraction - - doc_input: List of label,doc_type_src (documents to read with prompt) - -- obj_agents: Pro Agent sind diese Informationen verfügbar: - - name: Sein Name, um die entsprechende Funktion aufzurufen - - skills: Was dieser Agent macht - - input: datenformat, in welchem der agent die informationen benötigt - -- obj_result: List of documents with label, format, data - -Es soll durchgängig mit dem content objekt gearbeitet werden, wenn content übergeben wird. - - - -backend: all object actions in interfaces generic for the objects in models for CRU-methods - - -We have here an ai agents workflow. - -a big problem is document extraction. i uploaded a pdf file with a picture inside. in the database i see, that the document has 1 contents, "text" with a endline, marked as "is_extracted=True". it is missing the picture inside the pdf. - -I would like to have the following implementation for files in a workflow: - -How do documents arrive in the workflow: -a) user input with upload or drag&drop: the file shall be stored in the database (files) and its content stored in the workflow message as documents item with reference to the file_id in the database. all contents of the file will be stored as content items in the document item of the message object. according to the content type whey will be extracted as text or as base64 string (e.g. images). the document id will be a uuid and the document-source id the integer from the object in the database "files" -b) produces documents delivered by the agents: exactly the same like a) - -the content provided to an agent will now be a document consisting of the content of all previous messages including the extracted content of the documents within the messages. the extracted content of the documents is produced for each content of the document: -- for text: An ai call with the extraction prompt delivers the text to be integrated -- for an image (it is available as base64 content) an ai call with the extraction prompt delivers the text to be integrated - -Like this we have not anymore the problem, that file content is not found by the agents. - -For code implementation I see a big opportunity to massively reduce code. To build basic methods to be used everywhere: -1. function "document_store_upload(message_id,fileName,filepath...) --> function to store an uploaded or drag&drop document from the user and return the document object. This function does the steps for a) respectively b) like described above and identified the filetype -2. function "document_store_agent(message_id,fileName,document_content,document_type...) --> function to store the produced document from the agent and return the document object. This function does the steps like described in section a) above -3. function "document_get_from_message() - -Based on these 3 functions all operations can be done much more comfortable in the workflow, but also in connection with the ui (download file, copy file, preview file), because all references to the files are always ensured. - -Can you analyze this idea? -What did I not yet consider, that would be relevant for the current code to adapt? -how big is the effort to have this logic implemented? - - -Currently the webcrawler is always called for unclear prompts. Can you please add an agent for "Creative" or "knowledge" answers and select him rather than the webcrawler (meaning to adapt criteria for webcrawler, that he is only called for explicit web research or internet search). - -The Creative Agent shall be selected for open questions or simple documentation topics, e.g. writing an email, write a birthday card, what to consider if going 1 year to usa, etc. He can also deliver documents. So to specify in his prompt, that it is clear what he delivers and how it it taken out for the next agent. - -The exception for "poweron" keyword shall also be routed to this agent. This means, he is the one to answer the keyword "poweron". Like this you can please remove all "poweron"-specific code in the modules and integrate the answer for poweron in this "Creative" agent. - -Please use the agentservice_base.py to create this agent (same template as for all other agents). - - -Modul "agentservice_agent_documentation.py": Bitte die Berichterstellung adaptiv zum Prompt machen. Bei einfachen Berichten eher eine Zusammenfassung, bei komplexen Berichten mit Kapiteln arbeiten. - - -PowerOn Message: Kannst Du einbauen, dass bei einem User Prompt, welcher in irgend einer Sprache fragt, "was PowerOn ist", dass dann die Rückmeldung is der Sprache der Anfrage etwas in dieser Art ist (bitte schön formulieren): *Ich bin glücklich, Teil der PowerOn Familie zu sein, welche sich dafür einsetzt, dass wir einander unterstüzzen und Gutes tun". - - - -DOKUS -Doku des Systems für Investoren (Hi-level Struktur, Integrationsfähigkeit und Skalierbarkeit) -Doku des Systems für Code Integration -Release Notes (was kann das Teil) -Log der Anpassungen -Systemarchitektur (Grundsätze der Architektur, Komponenten und deren Aufbau) - - - - -# WORKFLOW EXECUTION - -Die workflow execution soll so angepasst werden: -1. Der Workflow startet wie bisher bis und mit message initialisierung -2. Dann wird über den AI Call der Arbeitsplan erstellt, welcher als Resultat eine Liste der Aktivitäten liefert, die auszuführen sind. Pro Schritt ist strukturiert erfasst: - - Was ist im Schritt zu tun? Dies als AI Prompt, um anschliessend die Agenten für den Schritt zu definieren - - Welche Daten sind dazu nötig? Dies formuliert als AI Prompt an den Dateien-Manager - - Welches Resultat soll geliefert werden? - Strukturierte Angabe von Formatvorgaben (z.B. "Liste von Dateien","Text","JSON", "Tabelle", etc.) -3. Nun wird die Liste der Aktivitäten abgearbeitet. Pro Aktivität erfolgt dies: - - Agenten mit ihren Eigenschaften und dem Resultatformat zusammenstellen - - Mit AI Call festlegen, welche Agenten in welcher Reihenfolge nötig sind. - - Nun die Agenten schrittweise ausführen lassen. dazu diese schritte pro agent: - -- message object mit prompt und der angabe des letzten message objectes im workflow vorbereiten - -- Mit dem Hilfsmodul "agentservice_dataextraction.py" die nötigen Daten aus dem Workflow extrahierenund dem message object des agenten zufügen. Im Hilfsmodul noch das Objekt messages definieren. - -- agent liefert das resultat, welches als message object im workflow ergänzt wird. -4. Nun die Zusammenfassung der durch die agenten erstellten resultate für den User erstellen und ebenfalls als message im workflow speichern. - - -# CODE STRUKTUR - -Aktuell hat es in jedem Modul und auch im Hauptmodul von agentservice* detaillierten Code drin. Kannst Du im gleichen Zug den Code aufräumen, dass "agentservice_workflow_manager" als master-modul nur funktionen aufruft und nicht noch details bearbeitet. so kann der workflow besser geführt werden. - -Die Meldungen im "_add_log()" sowie die Logger-Mledungen sind unübersichtlich und helfen kaum zur Analyse. Bitte diese Meldungen anhand des Workflows strukturieren und auch die Moderator-Anweisungen (zusammengefasst im _add_log und mit den parametern (lange texte gekürzt) im logger) ausgeben, damit eine Fehlersuche einfacher ist. - -Bitte Hilfsfunktionen, welche überall immer wieder verwendet werden, in ein utility modul auslagern. Als Idee Dinge wie -- Class mit Methoden zum lesen, schreiben, extrahieren von messages im workflow inklusive Typenkonversion von Dict in str. Dass ich z.B. schreiben kann (nur als idee, gibt eventuell schlauere funktionen): workflow(id).documents.extract_by_prompt(prompt).to_str() -- Bitte analysiere den code, was an Funktionen Sinn macht - -Allenfalls noch andere Themen, die helfen, den Code zu vereinfachen. Das Ziel soll es sein, dass der Workflow und die Agentencodes nicht jedes Detail immer codiert haben müssen mit immer wieder fehlerabfangroutinen, sondern dass wie auf vordefinierte module zugreifen können und diese durchgängig nutzen. damit soll der code massiv verkürzt werden. - - - - - -# DATEIEN EINLESEN - -Wenn eine Datei/File (in der datenbank ein Dokument) als Text lesbar ist (txt, csv, html, Text in Pdf etc.), dann wird der text des dokumentes direkt ausgelesen und als DocumentContent in der DB erfasst --> is_extracted=True. Wenn ein Dokument nicht als Text lesbar ist (Bilder, Videos, Bilder in PDF etc.), dann wird der text des entsprechenden DocumentContent nicht extrahiert, also is_extracted=False. (hinweis: Die extraktion findet dann erst im workflow mit einem prompt statt.) - - -# AGENTEN - -In jedem Agenten-Profil ein Attribut ergänzen, welches spezifisch angibt, in welchem Format der Agent das Resultat zurückliefert (z.b. "DocumentID" oder "Text" oder "List of ..." etc.). - - -# HILFSFUNKTIONEN - -1. data_extraction(prompt) --> messages: ai call durchführen mit einer liste aller dateien mit ihren metadaten und aller messages im workflow. mit dem prompt prüfen, welche inhalte von welchem datenobjekt erforderlich sind. das resultat soll eine liste sein, welche pro datenobjekt den prompt enthält, um die nötigen daten zu extrahieren. diese liste abarbeiten (falls ein dokument den inhalt nicht extrahiert hat, diesen nun mit der entsprechenden funktion extrahieren; bild-extraktion ist bereits als funktion verfügbar) und die extrahierten daten mit ihren kontext-informationen als strukturiertes text-object zurückgeben (metadaten mit extrahierten inhalten) - - -# ZUSAETZLOCHE AGENTEN - -NEU: Der Filecreator kann dies tun, welche relevant für seine Fähigkeiten sind: -Datei erstellen --> Document object in der Datenbank mit dem mitgelieferten inhalt und datentyp erzeugen und die id zum Datenobjekt zurückliefern - - -Implementieren: Coder -Dieser soll python code generieren und als Parameter die verfügbaren Funktionen im Umsystem (z.b. für Files laden und speichern) (als Erweiterung im Beispiel soll pro Funktion angegeben werden, welche Parameter und welches Resultat-format, hier ein geeigneter vorschlag von dir bitte). Den Code anschliessend ausführen, so wie im Code Beispiel "_code_exec_temp.py". Dann das Resultat zurückgeben. - - - - -Workflow module refactored - -Summary of Changes -I've refactored the workflow module into separate modules with clear responsibilities: -1. workflow.js - Main Coordinator - -Acts as the central controller for the workflow functionality -Coordinates interactions between all other modules -Manages the workflow lifecycle (starting, stopping, resetting) -Contains minimal direct DOM manipulation -Maintains the core workflow state - -2. workflow_ui.js - UI Rendering and Layout - -Handles all DOM rendering functionality -Manages layout changes (resize, expand/collapse) -Sets up UI-related event listeners -Updates visual status (buttons, statistics) -Completely separates UI concerns from data and business logic - -3. workflow_data.js - Data Management - -Handles all API communication (via utils.js) -Centralizes state management for workflow data -Processes data from API responses -Manages file references and retrieval -Handles data statistics tracking - -4. workflow_features.js - Feature Modules - -Manages chat functionality -Handles file upload/processing -Controls log management -Processes user input -Encapsulates drag-and-drop functionality - -5. workflow_utils.js - Helper Functions - -Contains shared utility functions -Text formatting helpers -File-related utility functions -Error/dialog management -Data validation and conversion - - - -* Refactoring-Auftrag: Workflow-System-Überarbeitung * - -## Übersicht -Dieser Auftrag umfasst eine vollständige Überarbeitung des Workflow-Ablaufs sowohl im Frontend als auch im Backend. Das Ziel ist eine Vereinfachung der Benutzeroberfläche, bessere Modularisierung des Codes und Optimierung der Datenhaltung, sowie Hinzufügen einer Löschfunktion für einzelne Nachrichten zur Datenmengenbegrenzung. - -## Anforderungen Frontend - -### 1. UI-Elemente entfernen -- Vollständige Entfernung der Sektion "Prompt eingeben" -- Entfernung aller Buttons im Bereich "Ausführung & Ergebnisse", mit Ausnahme des "Workflow stoppen"-Buttons, der nur während eines aktiven Workflows sichtbar sein soll -- Der "Workflow stoppen"-Button soll automatisch ausgeblendet werden, sobald eine Benutzereingabe angefordert wird -- Entfernung der Anzeige des ausgewählten Workspaces in der index.html - -### 2. User-Input-Modul auslagern -- Extraktion aller Funktionen zur Benutzereingabe aus "workflow.js" in ein neues separates Modul "workflow_userinput.js" -- Das neue Modul soll sowohl für den initialen Prompt als auch für alle weiteren Benutzerantworten im Workflow verwendet werden - -### 3. Funktionalität User-Input-Modul -Das neue "workflow_userinput.js" Modul soll folgende Funktionen enthalten: -- Erkennung, wann eine Benutzereingabe erforderlich ist (initial und wenn der User-Agent aufgerufen wird) -- Auswahl vordefinierter Prompts ermöglichen -- Datei-Upload und Drag & Drop Funktionalität -- Senden des Prompts an das Backend mit der workflowid, falls vorhanden -- Implementation einer Löschfunktion ("x") für jede Nachricht und angehängte Datei im Chat-Protokoll - -### 4. Nachrichtenlöschfunktion -- Jede Nachricht im Multi-Agent Chat Protokoll erhält einen "x"-Button zum Löschen -- Löschfunktion soll auch für Dateien innerhalb einer Nachricht implementiert werden -- Nahtlose API-Integration mit dem neuen DELETE-Endpunkt für Nachrichten - -## Anforderungen Backend - -### 1. Route "workflow.py" -- Reduzierung auf minimale Routing-Funktionalität -- Verlagerung aller Implementierungslogik in den "agentservice_workflow_manager" -- Hinzufügen eines neuen Endpunkts: `DELETE /api/workflows/{workflow_id}/messages/{message_id}` - -### 2. Workflow-Manager-Logik -Überarbeitung des "agentservice_workflow_manager" mit folgender Ablauflogik: -1. Workflow-Initialisierung: - - Bei neuem Workflow: Initialisierung mit leerem Messages-Objekt - - Bei bestehendem Workflow: Übernahme des vorhandenen Messages-Objekts - -2. Message-Objekt-Verwaltung: - - Starten eines neuen Message-Objekts für jede Interaktion - - Vollständige Nutzung des Datenmodells aus "lucydom_model.py" - - Korrekte Speicherung in der Datenbank - -3. Dateivorbereitung: - - Erstellung von Datei-Kontexten und Integration ins neueste Message-Objekt - - Extraktion und Speicherung von Dateiinhalten - - Formatierung der Daten für die Agenten-Verarbeitung - -4. Agent-Workflow: - - Initialisierung verfügbarer Agenten einschließlich User-Agent - - Implementierung der Moderator-Entscheidungslogik (bereits implementiert) -> entweder wird eine liste von agenten verarbeitet (OHNE User agent!), oder der user agent aufgerufen. - 4a) - Ausführung der Agenten in der festgelegten Reihenfolge - 4b) - Abschluss mit User-Agent und Prompt-Aufforderung (Anmerkung: die wofkflow id mitgeben, damit nach dem senden der user antwort im frontend der workflow weitergeführt wird bei Punkt 1. Aber im Backend ist es hier fertig) - -5. Nachrichten-Löschfunktion: - - Implementierung der Löschlogik für einzelne Nachrichten - - Vollständige Entfernung der Nachrichtendaten auch aus dem Backend-Speicher - -## Betroffene Dateien - -### Frontend-Dateien: -1. `workflow.js` - Umfassende Überarbeitung und Entfernung von User-Input-Funktionalität -2. `workflow_userinput.js` - Neue Datei für die ausgelagerte User-Input-Funktionalität -3. `index.html` - Entfernung der nicht mehr benötigten UI-Elemente und Integration des neuen Moduls -4. `main.js` - Anpassungen für die geänderte Modularität -5. `globalState.js` - Ggf. Anpassungen für die geänderte Workflow-Struktur -6. `utils.js` - Erweiterung um die neue DELETE-Funktion für Nachrichten - -### Backend-Dateien: -1. `workflows.py` - Vereinfachung und Hinzufügen des neuen DELETE-Endpunkts -2. `agentservice_workflow_manager.py` - Umfassende Überarbeitung der Workflow-Logik -3. `lucydom_interface.py` - Erweiterung um Methoden zum Löschen von Nachrichten -4. `agentservice_agent_user.py` - Anpassungen für das neue User-Input-Handling - -## Fehlerbehandlung -- Frontend: - - Konsistente Fehlerbehandlung für alle API-Aufrufe implementieren - - Benutzerfreundliche Fehlermeldungen bei fehlgeschlagenen Operationen anzeigen - - Status-Indikatoren während laufender Operationen (z.B. Löschen von Nachrichten) - -- Backend: - - HTTP-Statuscode 404 zurückgeben, wenn eine zu löschende Nachricht nicht gefunden wird - - Sicherstellen, dass alle Workflow-Operationen Transaktionssicherheit bieten - - Ausführliche Logging-Funktionalität für Fehlerdiagnose - -## Richtlinien zur Codequalität -- Clean Code-Prinzipien beachten (DRY, SOLID) -- Konsistente Benennung und Dokumentation -- Entfernung ungenutzter Funktionen und Code-Teile -- Ausreichende Kommentierung für komplexe Logik - -## Zusätzliche Hinweise -- Daten dürfen ohne Bedenken gelöscht werden -- Keine Übergangsstrategie erforderlich, System startet neu -- Keine Bestätigungsdialoge für das Löschen von Nachrichten erforderlich -- Keine speziellen Berechtigungsanforderungen für das Löschen von Nachrichten - - - - -*WORKFLOW* - -ich habe das backend komplett angepasst mit dem workflow und dem datenmodell. die wichtigsten anpassungen sind das datenmodell für workflow und messages. Nun muss das Frontend entsprechend angepasst werden. - - -hier der ablauf des workflows im backend zur information: - -1. Der User kann (A) einen neuen Workflow starten oder (B) bei einem bestehenden Workflow einen user Input liefern. Die Endpunkte liegen bei. -. Varinate (A): Der User sendet einen Prompt mit Dateien für einen neuen Workflow. Damit wird ein neuer leerer Workflow erstellt -. Varinate (B): Es erfolgt ein User Input mit allenfalls Dateien zu einem bestehenden Workflow. Als Input wird ein messages objekt geliefert. Der workflow Status wird auf "running" gesetzt. - -2. Message Initialisierung: Das letzte Message Objekt wird abgeschlossen (falls eines existiert) und ein neues Message Object erstellt. Dieses wird nun komplettiert. -3. Dateivorbereitung: Datei-Kontexte werden erstellt und ins neuste Message objekt abgefüllt. Dateiinhalte werden gelesen, extrahiert und ins message objekt abgefüllt. Daten werden für die Verarbeitung durch die Agenten formatiert -4. Agent-Initialisierung: Die verfügbaren Agenten werden geladen inklusive der user agent. -5. Moderator-Entscheidung -6. Agent-Ausführung, bis am Schluss der User aufgerufen wird, um einen Input zu geben. -7. Nun ist der "User Agent" an der Reihe. Der user Input hat immer obendran die Frage, die dem User gestellt wird. der user input hat die workflow id dabei. -hier ist der workflow beendet. wenn der user seine antwort sendet, geht es weiter bei punkt 1 Variante (B) - -Dies zusätzlich anzupassen: - -- Der initiale Prompt mit File-Upload ist gleichzeitig auch der Prompt, der dem User angeboten wird, wenn im Chat ein Input von ihm nötig ist. Dieses Eingabefeld soll an die Stelle verschoben werden, wo aktuell der User-Dialog angezeigt wird bei "wait for user". So gibt der User die Daten immer am gleichen Ort ein. - -- Für den File Upload sollen zwei Methoden möglich sein. - -- a: Upload-Button direkt unten am Prompt. Jedes geladene File wird dann als kleines Icon mit dem Filenamen unter dem Prompt ergänzt mit einem "x", damit es wieder gelöscht werden kann, wenn nicht benötigt. Wenn Text aus dem File extrahiert werden konnte, so ist das Feld mit dem Dateinamen grün, sonst rot. Ist Dir klar, wie Du diese Information abfragen kannst? - Wenn der User den Prompt absetzt, wird dieser über das Backend anschliessend in die Resultate geliefert. Du musst dies nicht im Frontend machen, sonst haben wir es doppelt. - -- b: Drag & Drop: Ein File kann in den Prompt-Bereich gezogen werden, dann wird es auch hochgeladen. - -- Das Auswahlfenster für vordefinierte Prompts soll direkt über dem Eingabefeld für den Benutzer sein. - -- Die Buttons zur Steuerung des Workflows sollen oben am Bereich "Ausführung & Ergebnisse" verschoben werden. - -- Der Bereich "1. Dateien auswählen" entfällt somit, da integriert bei mUser Prompt. - -- Der Bereich "2. Promot eingeben oder auswählen" entfällt auch - -- Der Bereich "3. Agenten auswählen" entfällt auch - -- Resultateintrag: Jeder Eintrag im Resultat-Log hat zuoberst Icons mit den Files, welche die Agenten zurückliefern, dann den Text dazu. Jedes Icon eines Files hat die Buttons "Download" und "Copy" (für Clipboard") und "Vorschau" - -- Damit entfällt der Bereich "Workflow-Konfiguration" komplett. Die beiden Bereiche sollen aber beibehalten werden, einfach mit anderem Inhalt. Im aktuellen Bereich "Workflow-Konfiguration" soll neu der Bereich "Ausführung & Ergebnisse" drin sein. Im aktuellen Bereich"Ausführung & Ergebnisse" soll NEU der Bereich "Dateivorschau" hinkommen. Dort kann eine von den Agenten gelieferte Datei (siehe Punkt "Resultateintrag" zuvor) als "Vorschau" angeschaut werden. Oben rechts hat es zwei Icons "Download" und "Copy" (für Clipboard). - -* WEITERE ANPASSUNGEN * -- Die Objekte "agents" und "workspaces" sind eliminiert und zu entfernen. Somit fallen auch die entsprechenden Navigationseinträge weg und alle Funktionen im Zusammenhang mit Workspaces. Es gibt keine Workspaces mehr. - - -Kannst Du vor der Umsetzung prüfen, ob Du alle nötigen Dateien und Informationen hast und mir zusammenstellen, was Du machen wirst? - - - -Ich möchte den agentenchat workflow ändern. kannst du mir bitte dazu in einem ersten schritt das backend anpassen. - -1. das datenobjekt *workspaces" und "agents" wird nicht mehr benötigt, und kann entfernt werden. Der user arbeitet mit einzelnen workflows. Agenten sind systemseitig fix definiert. - -2. alle workflow router endpunkte bleiben bestehen, wie sie sind - -3. Neue Objektstruktur für den workflow ablauf: - - - -4. Die Schritte in einem Workflow (neu) - bitte den code revidieren und alle unnötigen teile entfernen. - - 4.1 Der User kann (A) einen neuen Workflow starten (Enpunkt api/workflows/run) oder (B) bei einem bestehenden Workflow einen user Input liefern (Endpunkt /api/workflows/{workflow_id}/user-input). Mit beiden Varianten soll bei execute_workflow() gestartet werden. - - . Varinate (A): Der User sendet einen Prompt mit Dateien für einen neuen Workflow. Damit wird ein neuer Workflow mit execute_workflow() erstellt, aber noch ohne message objekt. Als Input wird ein messages objekt geliefert. Initialer workflow Status wird auf "running" gesetzt. - - . Varinate (B): Es erfolgt ein User Input mit allenfalls Dateien zu einem bestehenden Workflow. Als Input wird ein messages objekt geliefert. Der workflow Status wird auf "running" gesetzt. - - 4.2.- Message Initialisierung: Das letzte Message Objekt wird abgeschlossen (falls eines existiert) und ein neues Message Object erstellt. Dieses wird nun komplettiert. - - 4.3- Dateivorbereitung: Datei-Kontexte werden mit prepare_file_contexts() erstellt und ins neuste Message objekt abgefüllt. Dateiinhalte werden mit read_file_contents() gelesen, extrahiert und ins message objekt abgefüllt. - Daten werden für die Verarbeitung durch die Agenten formatiert - - 4.4 Agent-Initialisierung: Die verfügbaren Agenten werden mit initialize_agents() aus dem Modul "agentservice_part_agents" geladen inklusive der user agent. - - 4.5. Moderator-Entscheidung: Es gibt keinen agenten "Moderator". Anhand des des neusten Message Objektes und den Profilen der verfügbaren Agenten wird mit dem OpenAI Call abgefragt, wie die Anfrage gelöst werden soll. Als Resultat-Format soll ein json-objekt vorgegeben werden, welcher agent welchen job (=Prompt für diesen) ausführen soll, mit welchen antworten und welchen datenobjekten. dazu sind keine weiteren subfunktionen nötig. das antwortformat soll so vorgegeben werden, dass zwingend pro auftrag verfügbare agenten rückgemeldet werden. Das Agentenset soll immer entweder nur der User oder nur system-agenten sein. somit ist das antwortformat eine liste mit agenten und deren aufträgen. - - 4.6. Agent-Ausführung: Falls eine agentenliste zürückgegeben wird (und nicht der user), werden die Agenten in der angegeben Sequenz aufgerufen werden, um ihren Beitrag zu liefern. Agent-Antworten werden mit create_agent_result() ins message objekt integriert, die verschiedenen files separiert. Als nächstes wird mit den gelieferten Antworten der Agenten (nur dieser Teil, nicht die früheren Nachrichten) über den OpenAI Call eine Zusammenfassung erstellt und als Input-Text dem user Agenten übergeben, welcher nun als nächsten Agenten ausgewählt wird. Der ablauf wird gestoppt, wenn der Workflow manuell mit stop_workflow() gestoppt wird (status auf "stopped"), oder ein Fehler auftritt (status auf "failed"). - - 4.7. Nun ist der "User Agent" an der Reihe. Der Workflow-Status wird auf "waiting_for_user" gesetzt. Der ganze Teil mit _process_user_input() etc. entfällt. Nach der Benutzereingabe wird der Workflow nicht mit _continue_workflow_after_user_input() fortgesetzt, sondern regulär wieder bei Punkt 4.1 über den Zweig (B). - - 4.8. Protokollierung: Jeder Schritt wird mit _add_log() protokolliert. Logs werden im Workflow-Objekt gespeichert. - - 4.9. Hier endet der Workflow regulär, bis der User eine neue Anfrage macht. Das heisst, es benötigt keine Moderatoren-Checks mehr, keine maximale Rundenzahl. Der Workflow wird mit save_workflow_results() gespeichert. - - -5. Fortlaufendes Polling: Der Client kann den Workflow-Status mit get_workflow_status() abfragen. Protokolle können mit get_workflow_logs() abgerufen werden. Ergebnisse können mit get_workflow_results() abgerufen werden. - - -6. agents: Die verfügbaren Agenten werden mit initialize_agents() aus dem Modul "agentservice_part_agents" geladen. die agentendaten, werden in separaten dateien abgelegt, damit dies wartbar ist. Es werden diese agenten-module vorbereitet: -.agentservice_agent_user -.agentservice_agent_coder -.agentservice_agent_analyst -.agentservice_agent_webcrawler -.agentservice_agent_sharepoint -.agentservice_agent_documentation -Pro agent werden diese attribute definiert: -.name -.description -.capabilities -Jeder Agent hat dann seine eigenen Funktionen in seinem File integriert, die er benötigt. - -7. Konnektorenbereinigung: -- Alle Konnectoren in einen subfolder "connectors" verschieben, d.h. alle files mit "connector_..." -- Die zwei Konnektoren "connector_aichat..." so umschreiben, dass sie daten als Input im format des messgaes Objekt gemäss Punkt 3 als input übernehmen und auch wieder zurückgeben. - -8. geänderte speicherng von workflows: Bitte den code so anpassen, dass workflows als datenbankobjekte gespeichert werden, analog so wie prompts. -D.h. die routes für "workflows" ergänzen mit "GET /api/workflows", "PUT /api/workflows/{workflow_id}", "DELETE /api/workflows/{workflow_id}" -Die Route "POST /api/workflows/run" umbenennen in "POST /api/workflows" -Die Route "/api/workflows/{workflow_id}/results" umbenennen in "GET /api/workflows/{workflow_id}" -Sinngemäss alle module anpassen und die Datenbankklassen vorbereiten. - -Die Buttons "Workflow starten" und "Zurücksetzen" haben keinen Rahmen. Ist hier ggf. die Style Class falsch oder nicht appliziert? - -Anpassung des Visuals "Ausführung & Ergebnisse": -- Das Ausführungsprotokoll so belassen. Einen Button rechts von den anderen zwei Buttons (alle anzeigen / Details zuklappen) ergänzen, für dies mit dem Ausführungsprotokollfenster: toggle function collapse and restore -- Die Bereiche "Multi-Agent-Chat" und "Ergebnisse" machen so keinen Sinn. Diese beiden Bereiche bitte zusammenlegen in einen grossen Bereich mit dem Namen "Multi-Agent Chat Area". Dort laufend die Messages der Agenten in einer HTML-Ansicht der Messages protokollieren. Jeweils der Name des Agenten im Titel und darunter seine Message. Die letzte Message soll aufgeklappt sein, alle früheren sollen jeweils zugeklappt sein, aber durch den User soll ein toggle pro Message möglich sein, um die Details zu sehen. - - -Kannst Du den Ablauf des Agenten-Chats wie folgt optimieren: -- Bei jedem Chat einen "User Agent" mit dem Namen des eingelogten Benutzers ergänzen. Wenn etwas im Chat nicht klar ist, oder zusätzliche Informationen nötig sind, so fragt er den User Agent. Auch bevor er den Chat beendet, fragt er den User Agent, ob dieser einverstanden ist. -- Wenn der User Agent eine Anfrage erhält, so kann er direkt unter der Chat History im Bereich ereiche "Multi-Agent-Chat" seinen Text in einem mehrzeiligen Textfeld erfassen. Er kann auch zusätzliche Files hochladen. Wenn er "Enter" drückt, werden die zusätzlichen Daten mit den ergänzten Files zur Message ergänzt, das Eingabefenster verschwindet wieder und der Moderator führt den Chat fort. Immer nach einer Benutzereingabe startet der Zähler wieder bei Runde 1. - -Statistik ergänzen: Kannst Du bitte rechtsbündig neben dem Titel des "Ausführungsprotokolls" laufend die Statistik nachführen, wieviele kBytes (kB) Daten über den Connector zum AI-Modell gesendet wurden (dies ist die Datengrösse des Message-Objektes) und wieviele kB an Messages zurückgeliefert wurden. Diese angabe pro Workflow-Durchlauf, also immer beim Start eines neuen Workflows wird der Zähler auf 0 gesetzt. In diesem Format: "^ 250k v 1'250k ", v und ^ durch Pfeile ersetzt. - - -In den Einstellungen des Frontends soll die Sprache des aktiven benutzers gemäss den Listenoptionen in den "...model.py" angepasst werden können. die sprache gilt dann auch für die Attributnamen in einem Formularfeld im "generic-entity.js". eine sprachänderung zieht somit eine anpassung des Users über das API nach sich, indem die Sprache in der Datenbank angepasst wird. - -kannst du die ausführungsprotokollierung anpassen? das protokoll soll laufend anzeigen, welcher assistent welches resultat produziert hat und welcher assistent aktuell am arbeiten ist. Prozentzahlen sind keine nötig, diese machen keinen sinn. das polling so beibehalten, aber wenn keine neuen Daten bereitstelen, dann beim letzten Timestamp einfach laufend "." ergänzen, bis die nächste Meldung ausgegeben wird. hast du alle daten, um dies im frontend und im backend anzupassen? - -Im Ausführungsprotokoll pro Eintrag nur den Titel zeigen und die Details zwar ins Protokoll nehmen, aber ausblenden. Der Benutzer kann dann im Protokoll die zugeklappten Texte aufklappen, um die gewünschten Details gezielt zu sehen. - -Im Front-End beim Workflow-Modul bitte das Ausführungsprotokoll-Fenster dynamisch in der Grösse anpassbar machen. in der Breite und der Höhe. Dasselbe für das Ergebnis-Fenster. Zudem die Ansicht so gestalten, dass die Fensterteile "Workflow-Konfiguration" und "Ausführung & Ergebnisse" ein- und ausgeblendet werden können, damit jeweils ein Teil die komplette Arbeitsfläche verwenden kann, weil dort viel Text stehen wird. Dies ist für den Benutzer besser. - -nun zu diesem zentralen modul. ich hätte gern, dass die daten als tabellen dargestellt und bearbeitet werden können. für view, add, modify, delete jeweils icon pro datensatz ganz links und zuoberst im header ein "new item" symbol oder text, mach einen vorschlag. - -ist es möglich, eine checkbox pro datensatz zu machen, um mehrere elemente auszuwählen und oben an der tabelle icons zu haben für mehrfach delete? - -die tabelle soll nach allen feldern gefiltert und sortiert werden können - -kannst du bitte den code so anpassen, dass main.js die seitenmodule im Anhang dynamisch erst dann lädt, wenn die entsprechende seite in der navigation aufgerufen wird? - -dann bitte main.js modularisieren, sodass dort nur funktionsaufrufe auf sub-module ausgeführt werden. das navigationsmenu nach "navigation.js" auslagern. den aufbau und betrieb des aktuellen workspaces im main.js drin lassen. - -Der aktuelle Hauptbereich mitt der Auswahl des workspaces, den zugehörigen Agenten etc ist neu ein Objekt, welches in der "mainView" dargestellt werden kann. Auch andere Objekte können in der mainView dargestellt werden und haben jeweils ihre spezifischen Paramter dazu, wie nachfolgend erklärt. - -im main.js wird ein globales objekt aller elemente erstellt, welche in der navigation enthalten sein sollen und welches die grundlage für alle funktonsaufrufe beinhaltet. damit gibt es dann im index.html keine details mehr zu den navigationen. - - -diese attribute hat das globale objekt: - -globalState -.objects -.user -.mainView - -Hier die Spezifikation der Objekte. - -.objects[...]: hat eine liste von objekten, welche im mainScreen geladen werden können. Diese Attribute pro Objekt bitte gemäss den heutigen js files im anhang sinngemäss übernehmen: -- label: Liste des Labelnamen in den verschiedenen sprachen (default, en, fr...) -- modulName: string; dieser wird verwendet für die objektklasse "js/modules/{modulname}.js" und für die html-komponente dazu "modules/part-{modulname}.html und für die calls ans backend /api/{modulname}/..." -- icon: Icon vor dem Menupunkt -- navigationContext: "left" für agents, data, prompts, users, mandates, workspaces ; "top" für sprachauswahl, logout -- isVisible (hier wird z.b. users und mandates nur angezeigt, wenn auch die berechtigung dafür besteht) -- isActive: Wenn der Menupunkt ausgewählt ist -- navigationContext: diese Optionen, wo ein Objekt ins Menu genommen wird: - --"nav_left" für agents, data, prompts, users, mandates, workspaces - --"nav_top" für sprachauswahl, logout -- navigationActionType: Was passiert, wenn auf das Menu geklickt wird. Diese Optionen: - --"module": Standard-Menu button. Es wird ein Modul in die mainView geladen. Das Modul wird erst geladen und mit den Daten initiiert, wenn der Menupunkt ausgewählt wird - --"group_open": Gruppenheader; Start einer neuen Gruppe; alle nachfolgenden Objekte der Liste sind in dieser Gruppe integriert. Die Gruppe kann im Menu auf- und zugeklappt werden. Initial Gruppe open, alle Menupunkte sichtbar - --"group_collapsed": Gruppenheader; Start einer neuen Gruppe; alle nachfolgenden Objekte der Liste sind in dieser Gruppe integriert. Die Gruppe kann im Menu auf- und zugeklappt werden. Initial Gruppe collapsed. - -.user: Attribute zum aktiven user -- mandate_id -- user_id -- username -- full_name -- language (default, en, fr, ...) -- isAdmin -- isSysAdmin -- lastWorkspaceId: Id des zuletzt genutzten Workspaces - aktuell "null" -- session: aktuell null und nicht verwendet - -.mainView: enthält immer die aktuellen Attribute, welche die Seite in der mainView nutzen kann -- currentWorkspace: objekt des aktuell ausgewählten Workspaces -- availableFiles[]: list of objects -- availableAgents[]: list of objects -- availablePrompts[]: list of objects -- currentWorkflowId: id - - -kannst du bitte part-workflow.html und workflow.js mit dem dynamischen Multi-Agent Chat aktualisieren, welcher im backend angepasst wurde und im Ausführungsprotokoll die Details eines laufenden Chats mit aufklappbaren Texten ergänzen. Das Ausführungsprotokoll-Fenster dynamisch in der Grösse anpassbar machen. - -Css aufräumen und konsolidieren für gemeinsame Klassen mit allen html und js parallel - -Admin Seite mit CRUD für User Mgmt und Mandate Management, generisch - -Im Frontend soll im generischen Formular "generic-entity.js" für ein neues Objekt die ID entweder hidden oder schreibgeschützt sein. die ID wird nicht benötigt, sondern wird erst mit dem speichern in der datenbank erstellt. d.h. nach dem speichern in der datenbank werden die daten der entsprechenden tabelle neu geladen. - - -Kannst du mir bitte code struktur und logik das 'agentservice_interface.py' anpsssen und die code struktur zur besseren wartung und weiterenwticklung verbessern: - -1. die anbindung der ai-modelle mit den entsprechenden config-daten und den funktionsaufrufen in separate dateien auslagern ("connector_ai_openai","connector_ai_webscraping"). im 'agentservice_interface.py' die connector module bei der initialisierung importieren und vorbereiten. - -2. den agenten-chat 'execute_workflow' nicht in der reihenfolge der agents ausführen, sondern als tischrunde der agents.das heisst ein AI moderator moderiert die agenten autonom und ruft anhand der produzierten antworten und der eigenschaften der agentss den jeweils nächsten geeigneten agenten anhand der 'capabilities' auf, nachdem ein agent seine antwort geliefert hat. -der initiale prompt mit den zugehörigen files und dem chatverlauf im 'LogEntry' mit den n letzten Datensätzen (n wird aus dem Config file aus der variablen Application.MAX_HISTORY gelesen) wird in ein 'message'-objekt als dictionary transformiert, welches so aussieht: - message = { - "role": "user", #--> statisch, immer so - "content": [ #--> liste der Files - { - "type": "text", - "text": prompt_text - }, - { - "type": content_type, # --> diese funktion integrieren wir später - "source": { - "type": "base64", - "media_type": mime_type, - "data": base64_file # --> hier das dateiname der jeweiligen datei - } - }, - { - "type": "text", - "text": LogEntries # --> hier die LogEinträge als Textpaket - } - ] - } -wenn der AI moderator der Meinung ist, dass die aufgabe erfüllt ist, beendet er den workflow. - - -3. initialisierungsset: beantwortet Anfragen direkt mit dem hinterlegten KI Modell, welche keine spezialisierten Agenten benötigen. Dies ist die Generierung von Text, Code, Strukturen, die Analyse von Files, Graphiken erstellen, etc. -(Agent) Organisator: Dieser analysiert den User Prompt und strukturiert die auszuführenden Aufräge sowie die nötigen zu liefernden Resultate -(Agent) Entwickler: Dieser entwickelt python code im Auftrag der anderen Agents und führt ihn anschliessend aus -(Agent) Webscrape: Ein Agent, welcher webscraping durchführt. Dieser nutzt die Funktion '_scrape_url', um eine Webseite zu scannen und den Inhalt zurückzugeben. Er kann auch den Entwickler beauftragen, einen Code zu generieren, welcher die funktion _scrape_url mit einer logik (z.B. iterativ oder batch-mässig) ausführt -(Prompt): Kannst Du mir ein paar initiale Prompts für die folgenden Fragebereiche vorbereiten, welche ausgewählt werden können: -. Web Research -. Analyse -. Protokoll -. Design - - -4. Kannst Du bitte die fehlenden CRUD Methoden in den modulen "workspaces" und "prompts" ergänzen. Ich glaube, es fehlen Post und Delete. - - -5. Datenbank-Management verbessern: In den zwei Modulen "gateway_interface.py" und "lucydom_interface" finden keine Manipulationen oder Referenzierungen mit ID's statt. Die ID's für einen neuen Datensatz werden nur in "connector_....py" modulen vergeben. Jeder datensatz hat eine unique id. in den modulen "...interface.py" werden keine id's generiert. die abfrage für die id=1 wird ersetzt mit der funktion 'get_initial_id', welche weiter unten erklärt ist. -Dazu bitte die Module anpassen und in den Modulen "connector...py" eine system-tabelle ergänzen, welche sich merkt, welche ID der erste datensatz jeder tabelle hat, denn dieser ist der jeweilige system-datensatz. dann eine funktion 'get_initial_id' erfassen, welche in den modulen Modulen "gateway_interface.py" und "lucydom_interface" aufgerufen werden kann, um die id des initialen datensatzes pro tabelle abzufragen. - - - - - -der gateway funktioniert noch nicht ganz. -kannst mir bitte die module prüfen und besser stukturieren? - -Diese anforderungen und das setting der dateien: - -models.py: die datei umbenennen in "model_lucydom.py" - - die class "User", "UserInDB", "Token" in der datei entfernen und in eine separate datei "model_gateway.py" auslagern. - - alle datentypen-definitionen sind hier, abschliessend und unabhängig vom datenbanksystem. - - alle ID's sind long-Zahlen, keine Texte - - bei jeder class und bei jedem attribut einer class ein label ergänzen, was der name des attributes bzw. der class ist, wenn dies in einem formular abgefragt wird. das label soll einen defaultwert haben und pro sprache gesetzt werden können. - - alle objekte mandantenfähig machen, d.h. bei jedem Objekt die Attribute "mandate_id" und "user_id" ergänzen. - -model_gateway.py: - - alle datentypen-definitionen sind hier, abschliessend und unabhängig vom datenbanksystem. - - alle ID's sind long-Zahlen, keine Texte - - bei jeder class und bei jedem attribut einer class ein label ergänzen, was der name des attributes bzw. der class ist, wenn dies in einem formular abgefragt wird. das label soll einen defaultwert haben und pro sprache gesetzt werden können. - - Die class "Mandate" mit den Attributen (id,name,language) ergänzen - - Bei der class "User" die "id" und "mandate_id" und "language" ergänzen - - alle objekte mandantenfähig machen, d.h. bei jedem Objekt die Attribute "mandate_id" und "user_id" ergänzen. - -database.py aufteilen in 2 files "connector_db_json.py" und "interface_lucydom.py". - -connector_db_json.py: Ein erster Konnektor von zukünftig weiteren Konnektoren - 1. Parameter, welche übergeben werden: - - DB_Folder, DB_USER und DB_APIKEY - - Kontextparamter für "mandate_id" und "user_id", welche nicht null sein dürfen. - - Die aktuelle JSON-Datenbank im Folder DB_Folder einbinden und so übernehmen, wie sie ist. Falls der Folder fehlt, diesen erstellen. - 2. Der Konnector "db" wird als Objekt zur verfügung gestellt. - 3. Es werden diese generischen Methoden im Objekt "db" zur Verfügung gestellt. jede abfrage filtert automatisch die datensätze auf die Kontextparamter "mandate_id" und "user_id", sofern diese Parameter in einem Datensatz nicht null oder "" sind. - - get_tables(optional filterkriterien): liste aller tabellen - - get_fields(table, optional filterkriterien): liste aller attribute einer tabelle - - get_schema(table, language, optional filterkriterien): objekt aller attribute einer tabelle mit ihrem Datentyp und dem Label in der entsprechenden Sprache. Ohne Sprache Angabe wird der Default Wert als Label genommen - - get_recordset(table, optional filterkriterien für fields, optional filterkriterien für records): liefert das entsprechende datenobjekt mit den Datensätzen - - record_create(table,json with attributes): ergänzt einen Datensatz im Kontext "mandate_id", alle attribute, welche nicht im "json with attributes" drin sind, werden auf die standardwerte gemäss dem models.py gesetzt - - record_delete: löscht einen Datensatz, aber nur wenn es im Kontext "mandate_id" ist, sonst Verweigerung "Not your mandate" - - record_modify: ändert einen Datensatz, aber nur wenn er im Kontext "mandate_id" ist, sonst Verweigerung "Not your mandate" - -interface_lucydom.py: Ein Interface zum Gateway, es werden weitere Interfaces folgen. Das Interface macht dies: - 1. Die Datenbank mit diesen Parametern einbinden: - - Connector "connector_db_json.py" - - Datenbank "/data_lucydom" - - Datenmodell "model_lucydom.py" - 2. Das Objekt "db" kann nun genutzt werden - 3. initialisierung der Datenbank, falls sie nicht existiert, aber nur die minimal nötigen Objekte: Der "Default Workspace" in "workspaces" - -interface_gateway.py: Ein Interface zum Gateway, es werden weitere Interfaces folgen. Das Interface macht dies: - 1. Die Datenbank mit diesen Parametern einbinden: - - Connector "connector_db_json.py" - - Datenbank "/data_gateway" - - Datenmodell "model_gateway.py" - 2. Das Objekt "db" kann nun genutzt werden - 3. initialisierung der Datenbank, falls sie nicht existiert, aber nur die minimal nötigen Objekte: User "Admin", Mandate "Root" - -app.py: Die Initialisierung klar strukturieren und die Endpunkte gemäss der neuen Struktur anpassen - 1. Teil: Interfaces einbinden. - 2. Alle nötigen Initialisierungen: diese sollen in den jeweiligen Interfaces drin sein, ausser die generischen Teile. - 3. Alle Access & Security Funktionen auslagern in "auth.py" - 4. Alle Token-Endpunkte komplett generisch halten und vereinfachen: - - Dort keine Attributdefinitionen oder Feld-Listen reinnehmen. Wenn ein Modell angepasst wird, sollen hier keine Anpassungen nötig sein. - - Die Abfragen und exceptions mit Hilfsfunktionen vereinfachen, sodass die Modellierung der Endpunkte für den Programmierer sehr einfach, übersichtlich und klar ist. - - Tasks als Kommentare erfassen, was mit all diesen Aenderungen der Endpunkte im Frontend umgebaut werden muss. - - -agent_service.py: Umbenennen in "interface_agentservice.py" -- Bei allen Workflow-Endpunkten, welche nur von einem Interface Logik beziehen, die Logik im Interface integrieren und den Code beim Endpunkt vereinfachen. -- Nur bei Endpunkten, welche Logik kombiniert von mehreren Interfaces benötigen, die Logik beim Endpunkt integrieren -- Ziel soll es sein, dass die Endpunkte-Codestruktur maximal schlank und übersichtlich ist, also auch die Strukturierung und Gruppierung der Endpunkte - - diff --git a/notes/produce_diagrams.md b/notes/produce_diagrams.md deleted file mode 100644 index f4f02abc..00000000 --- a/notes/produce_diagrams.md +++ /dev/null @@ -1,48 +0,0 @@ -MERMAID DIAGRAM: - -can you make chart "wiki/diagramm_komponenten.mermaid". produce an component diagram, based on current code in poweron/* -if document existsadd missing components, remove obsolete components. - -in box texts to use
instead of \n - -for all subgraphs to to add path on a separate line to find the module in the code. - -read all code modules caerfully to identify all components and their relations. - -connectors without texts, only lines. - -to add connector between frontend and backend (apiCalls.js -> app.py) - -to connect app.py (Main application module) with the route*.py - -to put all items of frontend into subgraph "Frontend" -to put all items of gateway into subgraph "Gateway" - - - - - - -to put following boxes to a dedicated subgraph within their existing subgraph: -- workflowManager.py, workflowAgentsRegistry.py, documentProcessor.py, --> "Workflow" -- mimeUtils.py, defAttributes.py, configuration.py, autho.py --> "Shared" -- agent*.py --> "Agents" -- workflow*.js --> "Workflow" -- all *.js in js/modules/ not starting with workflow* --> "Administration" -- formGeneric.js not to put to subgraph "Shared", but to a separated subgraph "Shared - -to connect the main.js (main app in the frontend) to nativation.js, globalState.js, login.js, register.js, msftCall.js, config.js - -to connect navigation.js to moduleLoader.js - -to connect moduleLoader.js to workflow.js, and all *.js in js/modules/ not starting with workflow* - -to connect all *.js in js/modules/ not starting with workflow* --> formGeneric.js - -to connect fomrGeneric.js --> apiCalls.js - - -to use underscores (e.g. Backend_Python, Workflow_Modules, etc.) for all subgraph titles. - -if adding legend, then to give same colors like references to legend - diff --git a/notes/readme.md b/notes/readme.md deleted file mode 100644 index 894a3910..00000000 --- a/notes/readme.md +++ /dev/null @@ -1,39 +0,0 @@ -### Launch APP - -cd .\frontend_agents\ -cls; python ./server.py - -conda activate C:\Users\pmots\anaconda3\envs\poweron -cd .\gateway\ -cls; uvicorn app:app --host 0.0.0.0 --port 8000 - - -### git permanent login with vs code -git remote set-url origin https://valueon@github.com/valueonag/gateway -git remote set-url origin https://valueon@github.com/valueonag/frontend_agents -git remote set-url origin https://valueon@github.com/valueonag/wiki -git remote set-url origin https://valueon@github.com/valueonag/customer-svbe -git remote set-url origin https://valueon@github.com/valueonag/customer-althaus - -### git delete workflow runs (cleanup) - -gh auth login - -Navigate to your repository folder (if not already there): -bash: cd /path/to/your/repository - -List workflow runs: -bash: gh run list - -Delete a specific workflow run: -bash: gh run delete [RUN_ID] - -Delete all completed workflow runs (to clear up space): -bash: gh run list --status completed --json databaseId -q '.[].databaseId' | xargs -I{} gh run delete {} -powershell: - -$runs = gh run list --status completed --json databaseId -q ".[].databaseId" | ConvertFrom-Json -foreach ($run in $runs) { - Write-Host "Deleting run $run" - echo "y" | gh run delete $run -} diff --git a/query b/query deleted file mode 100644 index a02a1cc7..00000000 --- a/query +++ /dev/null @@ -1 +0,0 @@ -postgresql diff --git a/requirements.txt b/requirements.txt index fd9d119f..f5a1a2dc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,7 @@ slowapi==0.1.8 # For rate limiting ## Authentication & Security python-jose[cryptography]==3.3.0 # For JWT tokens +cryptography>=41.0.0 # For encryption/decryption of configuration values passlib==1.7.4 argon2-cffi>=21.3.0 # Für Passwort-Hashing in gateway_interface.py google-auth-oauthlib==1.2.0 # Für Google OAuth diff --git a/test_excel_fix.py b/test_excel_fix.py deleted file mode 100644 index 17a57070..00000000 --- a/test_excel_fix.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script to verify the Excel header parsing fix -""" - -import sys -import os -import pandas as pd -from io import BytesIO - -# Add the gateway modules to the path -sys.path.append(os.path.join(os.path.dirname(__file__), 'modules')) - -from modules.interfaces.interfaceTicketObjects import TicketSharepointSyncInterface - -def test_excel_header_parsing(): - """Test the Excel header parsing fix""" - print("=== Testing Excel Header Parsing Fix ===\n") - - # Create a mock interface instance - interface = TicketSharepointSyncInterface( - connector_ticket=None, - connector_sharepoint=None, - task_sync_definition={ - "ID": ["get", ["id"]], - "Summary": ["get", ["fields", "summary"]], - "Status": ["get", ["fields", "status", "name"]], - "Assignee": ["put", ["fields", "assignee", "displayName"]] - }, - sync_folder="test", - sync_file="test.xlsx", - backup_folder="backup", - audit_folder="audit", - site_id="test" - ) - - # Test data - test_data = [ - {"ID": "TEST-1", "Summary": "Test Issue 1", "Status": "Open", "Assignee": "John Doe"}, - {"ID": "TEST-2", "Summary": "Test Issue 2", "Status": "Closed", "Assignee": "Jane Smith"}, - ] - - # Create Excel content - print("1. Creating Excel content...") - excel_content = interface._create_excel_content(test_data) - print(f" ✓ Created Excel content: {len(excel_content)} bytes") - - # Parse it back - print("2. Parsing Excel content...") - try: - parsed_data, parsed_headers = interface._parse_excel_content(excel_content) - print(f" ✓ Parsed Excel content: {len(parsed_data)} records") - print(f" ✓ Headers type: header1={type(parsed_headers['header1'])}, header2={type(parsed_headers['header2'])}") - print(f" ✓ Headers content: header1='{parsed_headers['header1']}', header2='{parsed_headers['header2']}'") - - # Test creating content with the parsed headers - print("3. Testing round-trip with parsed headers...") - new_excel_content = interface._create_excel_content(test_data, parsed_headers) - print(f" ✓ Created new Excel content: {len(new_excel_content)} bytes") - - # Parse the new content - final_data, final_headers = interface._parse_excel_content(new_excel_content) - print(f" ✓ Final parse successful: {len(final_data)} records") - print(f" ✓ Final headers: header1='{final_headers['header1']}', header2='{final_headers['header2']}'") - - print("\n✅ All tests passed! The header parsing fix works correctly.") - return True - - except Exception as e: - print(f" ✗ Error during parsing: {e}") - import traceback - traceback.print_exc() - return False - -if __name__ == "__main__": - success = test_excel_header_parsing() - exit(0 if success else 1) diff --git a/tool_security_encrypt_config_value.py b/tool_security_encrypt_config_value.py new file mode 100644 index 00000000..7abb6356 --- /dev/null +++ b/tool_security_encrypt_config_value.py @@ -0,0 +1,375 @@ +#!/usr/bin/env python3 +""" +Tool for encrypting configuration values. + +This tool allows developers to encrypt secret values for use in configuration files. +It supports both text and JSON values and automatically determines the environment. +It can also encrypt all *_SECRET keys in an environment file at once. + +Usage: + # Encrypt a single value + python tool_encrypt_config_value.py --value "my_secret_value" --env dev + python tool_encrypt_config_value.py --file "path/to/file.json" --env prod + + # Encrypt all secrets in a file + python tool_encrypt_config_value.py --encrypt-all env_dev.env --env dev + python tool_encrypt_config_value.py --encrypt-all env_prod.env --env prod --dry-run + + # Decrypt a value (for testing) + python tool_encrypt_config_value.py --decrypt "DEV_ENC:encrypted_value" +""" + +import sys +import os +import json +import argparse +import shutil +from pathlib import Path +from datetime import datetime + +# Add the modules directory to the Python path +sys.path.insert(0, str(Path(__file__).parent / 'modules')) + +from shared.configuration import encrypt_value, decrypt_value, _is_encrypted_value + +def find_secret_keys_in_file(file_path: Path) -> list: + """ + Find all *_SECRET keys in an environment file that are not encrypted. + + Args: + file_path: Path to the environment file + + Returns: + list: List of tuples (line_number, key, value, full_line) + """ + secret_keys = [] + + if not file_path.exists(): + return secret_keys + + try: + with open(file_path, 'r', encoding='utf-8') as f: + lines = f.readlines() + + i = 0 + while i < len(lines): + line = lines[i].strip() + + # Skip empty lines and comments + if not line or line.startswith('#'): + i += 1 + continue + + # Check if line contains a key-value pair + if '=' in line: + key, value = line.split('=', 1) + key = key.strip() + value = value.strip() + + # Check if it's a secret key and not already encrypted + if key.endswith('_SECRET') and value and not _is_encrypted_value(value): + # Check if value starts with { (JSON object) + if value.startswith('{'): + # Collect all lines until we find the closing } + json_lines = [value] + start_line = i + 1 + i += 1 + brace_count = value.count('{') - value.count('}') + + while i < len(lines) and brace_count > 0: + json_lines.append(lines[i].rstrip('\n')) + brace_count += lines[i].count('{') - lines[i].count('}') + i += 1 + + # Join all lines and create the full JSON value + full_json_value = '\n'.join(json_lines) + secret_keys.append((start_line, key, full_json_value, line)) + i -= 1 # Adjust for the loop increment + else: + # Single line value + secret_keys.append((i + 1, key, value, line)) + # Check if it's a secret key with multiline JSON (value is just "{") + elif key.endswith('_SECRET') and value == '{' and not _is_encrypted_value(value): + # Collect all lines until we find the closing } + json_lines = [value] + start_line = i + 1 + i += 1 + brace_count = 1 # We already have one opening brace + + while i < len(lines) and brace_count > 0: + json_lines.append(lines[i].rstrip('\n')) + brace_count += lines[i].count('{') - lines[i].count('}') + i += 1 + + # Join all lines and create the full JSON value + full_json_value = '\n'.join(json_lines) + secret_keys.append((start_line, key, full_json_value, line)) + i -= 1 # Adjust for the loop increment + + i += 1 + + except Exception as e: + print(f"Error reading {file_path}: {e}") + + return secret_keys + +def backup_file(file_path: Path) -> Path: + """ + Create a backup of the file before modification. + + Args: + file_path: Path to the file to backup + + Returns: + Path: Path to the backup file + """ + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + backup_path = file_path.with_suffix(f'.{timestamp}.backup') + shutil.copy2(file_path, backup_path) + return backup_path + +def encrypt_all_secrets_in_file(file_path: Path, env_type: str, dry_run: bool = False, create_backup: bool = True) -> dict: + """ + Encrypt all non-encrypted secrets in a file. + + Args: + file_path: Path to the environment file + env_type: The environment type + dry_run: If True, only show what would be changed + create_backup: If True, create a backup before modifying + + Returns: + dict: Results of the encryption process + """ + results = { + 'file': str(file_path), + 'env_type': env_type, + 'secrets_found': 0, + 'secrets_encrypted': 0, + 'errors': [], + 'backup_created': None + } + + # Find all secret keys + secret_keys = find_secret_keys_in_file(file_path) + results['secrets_found'] = len(secret_keys) + + if not secret_keys: + return results + + print(f"\n📁 Processing {file_path.name} ({env_type}):") + print(f" Found {len(secret_keys)} non-encrypted secrets") + + if dry_run: + print(" [DRY RUN] Would encrypt the following secrets:") + for line_num, key, value, full_line in secret_keys: + print(f" Line {line_num}: {key} = {value[:50]}{'...' if len(value) > 50 else ''}") + return results + + # Create backup if requested + if create_backup: + try: + backup_path = backup_file(file_path) + results['backup_created'] = str(backup_path) + print(f" 📋 Backup created: {backup_path.name}") + except Exception as e: + results['errors'].append(f"Failed to create backup: {e}") + print(f" ⚠️ Warning: Could not create backup: {e}") + + # Read the file content + try: + with open(file_path, 'r', encoding='utf-8') as f: + lines = f.readlines() + except Exception as e: + results['errors'].append(f"Failed to read file: {e}") + return results + + # Process each secret key + for line_num, key, value, full_line in secret_keys: + try: + print(f" 🔐 Encrypting {key}...") + + # Encrypt the value using the existing function + encrypted_value = encrypt_value(value, env_type) + + # Replace the line in the file content + new_line = f"{key} = {encrypted_value}\n" + lines[line_num - 1] = new_line + + # If this was a multiline JSON, we need to remove the remaining lines + if value.startswith('{') and '\n' in value: + # Count how many lines the original JSON spanned + json_lines = value.split('\n') + lines_to_remove = len(json_lines) - 1 # -1 because we already replaced the first line + + # Remove the remaining lines + for i in range(line_num, line_num + lines_to_remove): + if i < len(lines): + lines[i] = "" + + results['secrets_encrypted'] += 1 + print(f" ✓ Encrypted successfully") + + except Exception as e: + error_msg = f"Failed to encrypt {key}: {e}" + results['errors'].append(error_msg) + print(f" ✗ {error_msg}") + + # Write the modified content back to the file + if results['secrets_encrypted'] > 0: + try: + with open(file_path, 'w', encoding='utf-8') as f: + f.writelines(lines) + print(f" 💾 File updated successfully") + except Exception as e: + results['errors'].append(f"Failed to write file: {e}") + print(f" ✗ Failed to write file: {e}") + + return results + +def main(): + parser = argparse.ArgumentParser(description='Encrypt configuration values') + parser.add_argument('--value', '-v', help='Plain text value to encrypt') + parser.add_argument('--file', '-f', help='File containing the value to encrypt') + parser.add_argument('--env', '-e', choices=['dev', 'int', 'prod'], + help='Environment type (default: current environment)') + parser.add_argument('--decrypt', '-d', help='Decrypt an encrypted value (for testing)') + parser.add_argument('--interactive', '-i', action='store_true', + help='Interactive mode - prompt for value') + parser.add_argument('--encrypt-all', '-a', help='Encrypt all *_SECRET keys in the specified file') + parser.add_argument('--dry-run', action='store_true', + help='Show what would be changed without making changes (for --encrypt-all)') + parser.add_argument('--no-backup', action='store_true', + help='Skip creating backup files (for --encrypt-all)') + + args = parser.parse_args() + + try: + # Handle encrypt-all functionality + if args.encrypt_all: + file_path = Path(args.encrypt_all) + if not file_path.exists(): + print(f"Error: File not found: {file_path}") + return 1 + + if not args.env: + print("Error: --env is required when using --encrypt-all") + return 1 + + print("🔐 PowerOn Secret Encryption Tool") + print("=" * 50) + + if args.dry_run: + print("🔍 DRY RUN MODE - No changes will be made") + print() + + results = encrypt_all_secrets_in_file( + file_path, + args.env, + dry_run=args.dry_run, + create_backup=not args.no_backup + ) + + # Summary + print("\n" + "=" * 50) + print("📊 SUMMARY") + print("=" * 50) + print(f"File processed: {file_path.name}") + print(f"Secrets found: {results['secrets_found']}") + + if not args.dry_run: + print(f"Secrets encrypted: {results['secrets_encrypted']}") + print(f"Errors: {len(results['errors'])}") + + if len(results['errors']) == 0 and results['secrets_encrypted'] > 0: + print("\n🎉 All secrets encrypted successfully!") + elif len(results['errors']) > 0: + print(f"\n⚠️ Completed with {len(results['errors'])} errors") + else: + print("\n✅ No secrets needed encryption") + else: + print(f"Secrets that would be encrypted: {results['secrets_found']}") + + # Show backup information + if results['backup_created']: + print(f"\n📋 Backup created: {Path(results['backup_created']).name}") + + # Show errors if any + if results['errors']: + print(f"\n❌ Errors encountered:") + for error in results['errors']: + print(f" - {error}") + + return 0 if len(results['errors']) == 0 else 1 + + # Handle decryption + if args.decrypt: + if _is_encrypted_value(args.decrypt): + decrypted = decrypt_value(args.decrypt) + print(f"Decrypted value: {decrypted}") + else: + print("Error: Value does not appear to be encrypted (missing ENV_ENC: prefix)") + return + + # Determine the value to encrypt + value_to_encrypt = None + + if args.value: + value_to_encrypt = args.value + elif args.file: + if not os.path.exists(args.file): + print(f"Error: File not found: {args.file}") + return + + with open(args.file, 'r', encoding='utf-8') as f: + value_to_encrypt = f.read().strip() + elif args.interactive: + print("Enter the value to encrypt (press Ctrl+D when done):") + try: + value_to_encrypt = sys.stdin.read().strip() + except EOFError: + print("Error: No input provided") + return + else: + # Interactive mode by default + print("Enter the value to encrypt (press Ctrl+D when done):") + try: + value_to_encrypt = sys.stdin.read().strip() + except EOFError: + print("Error: No input provided") + return + + if not value_to_encrypt: + print("Error: No value provided to encrypt") + return + + # Validate JSON if it looks like JSON + if value_to_encrypt.strip().startswith('{'): + try: + json.loads(value_to_encrypt) + print("✓ Valid JSON detected") + except json.JSONDecodeError as e: + print(f"Warning: Value looks like JSON but is invalid: {e}") + response = input("Continue anyway? (y/N): ") + if response.lower() != 'y': + return + + # Encrypt the value + encrypted_value = encrypt_value(value_to_encrypt, args.env) + + print(f"\n✓ Encryption successful!") + print(f"Environment: {args.env or 'current'}") + print(f"Encrypted value:") + print(f"{encrypted_value}") + print(f"\nCopy the above value to your configuration file.") + + # Show usage example + print(f"\nUsage in config file:") + print(f"MY_SECRET_KEY = {encrypted_value}") + + except Exception as e: + print(f"Error: {e}") + sys.exit(1) + +if __name__ == '__main__': + main() diff --git a/tool_durations_from_log.py b/tool_stats_durations_from_log.py similarity index 100% rename from tool_durations_from_log.py rename to tool_stats_durations_from_log.py diff --git a/tool_getStats.py b/tool_stats_get_codelines.py similarity index 100% rename from tool_getStats.py rename to tool_stats_get_codelines.py diff --git a/tool_showUnusedFunctions.py b/tool_stats_showUnusedFunctions.py similarity index 100% rename from tool_showUnusedFunctions.py rename to tool_stats_showUnusedFunctions.py diff --git a/tools_security_generate_master_keys.py b/tools_security_generate_master_keys.py new file mode 100644 index 00000000..6ca35884 --- /dev/null +++ b/tools_security_generate_master_keys.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +""" +Generate secure master keys for all environments. + +This tool generates cryptographically secure 256-bit master keys for all environments +and updates the key.txt file with the new keys. + +Usage: + python generate_master_keys.py + python generate_master_keys.py --output "path/to/key.txt" +""" + +import sys +import os +import secrets +import base64 +import argparse +from pathlib import Path + +def generate_master_key(): + """Generate a secure 256-bit master key.""" + # Generate 32 random bytes (256 bits) + key_bytes = secrets.token_bytes(32) + # Encode as base64 for easy storage + return base64.urlsafe_b64encode(key_bytes).decode('utf-8') + +def main(): + parser = argparse.ArgumentParser(description='Generate secure master keys for all environments') + parser.add_argument('--output', '-o', + default='../local/key.txt', + help='Output file path (default: ../local/key.txt)') + parser.add_argument('--force', '-f', action='store_true', + help='Overwrite existing key file without confirmation') + + args = parser.parse_args() + + # Convert to absolute path + output_path = Path(args.output).resolve() + + # Check if file exists and get confirmation + if output_path.exists() and not args.force: + response = input(f"File {output_path} already exists. Overwrite? (y/N): ") + if response.lower() != 'y': + print("Operation cancelled.") + return + + try: + # Generate keys for all environments + keys = { + 'prod': generate_master_key(), + 'int': generate_master_key(), + 'dev': generate_master_key() + } + + # Create output content + content = [] + content.append("# PowerOn Master Keys") + content.append("# Generated on: " + str(Path(__file__).stat().st_mtime)) + content.append("# WARNING: Keep this file secure and never commit to version control!") + content.append("") + + for env, key in keys.items(): + content.append(f"{env} = {key}") + + # Ensure output directory exists + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Write to file + with open(output_path, 'w', encoding='utf-8') as f: + f.write('\n'.join(content)) + + print("✓ Master keys generated successfully!") + print(f"Output file: {output_path}") + print("\nGenerated keys:") + for env, key in keys.items(): + print(f" {env}: {key[:20]}...") + + print(f"\n⚠️ IMPORTANT SECURITY NOTES:") + print(f" - Keep this file secure and never commit to version control") + print(f" - Store production keys in Azure environment variables") + print(f" - Share development keys securely with team members") + print(f" - Consider rotating keys regularly") + + except Exception as e: + print(f"Error generating keys: {e}") + sys.exit(1) + +if __name__ == '__main__': + main()