diff --git a/.gitignore b/.gitignore index eb6d2935..df4b0c6c 100644 --- a/.gitignore +++ b/.gitignore @@ -167,4 +167,5 @@ cython_debug/ # local data gwserver/_database* gwserver/results/* -*.log.* \ No newline at end of file +*.log.* +test-chat \ No newline at end of file diff --git a/app.py b/app.py index ad932e9a..a167503c 100644 --- a/app.py +++ b/app.py @@ -1,19 +1,75 @@ import os +import sys +from urllib.parse import quote_plus + os.environ["NUMEXPR_MAX_THREADS"] = "12" -from fastapi import FastAPI, HTTPException, Depends, Body, status, Response +from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware +from fastapi.security import HTTPBearer from contextlib import asynccontextmanager -from zoneinfo import ZoneInfo import logging from logging.handlers import RotatingFileHandler -from datetime import timedelta -import pathlib +from datetime import datetime from modules.shared.configuration import APP_CONFIG -from apscheduler.schedulers.asyncio import AsyncIOScheduler -from apscheduler.triggers.cron import CronTrigger +from modules.shared.eventManagement import eventManager +from modules.features import featuresLifecycle as featuresLifecycle + +class DailyRotatingFileHandler(RotatingFileHandler): + """ + A rotating file handler that automatically switches to a new file when the date changes. + The log file name includes the current date and switches at midnight. + """ + + def __init__( + self, logDir, filenamePrefix, maxBytes=10485760, backupCount=5, **kwargs + ): + self.logDir = logDir + self.filenamePrefix = filenamePrefix + self.currentDate = None + self.currentFile = None + + # Initialize with today's file + self._updateFileIfNeeded() + + # Call parent constructor with current file + super().__init__( + self.currentFile, maxBytes=maxBytes, backupCount=backupCount, **kwargs + ) + + def _updateFileIfNeeded(self): + """Update the log file if the date has changed""" + today = datetime.now().strftime("%Y%m%d") + + if self.currentDate != today: + self.currentDate = today + newFile = os.path.join(self.logDir, f"{self.filenamePrefix}_{today}.log") + + if self.currentFile != newFile: + self.currentFile = newFile + return True + return False + + def emit(self, record): + """Emit a log record, switching files if date has changed""" + # Check if we need to switch to a new file + if self._updateFileIfNeeded(): + # Close current file and open new one + if self.stream: + self.stream.close() + self.stream = None + + # Update the baseFilename for the parent class + self.baseFilename = self.currentFile + # Reopen the stream + if not self.delay: + self.stream = self._open() + + # Call parent emit method + super().emit(record) + def initLogging(): """Initialize logging with configuration from APP_CONFIG""" @@ -21,29 +77,45 @@ def initLogging(): logLevelName = APP_CONFIG.get("APP_LOGGING_LOG_LEVEL", "WARNING") logLevel = getattr(logging, logLevelName) + # Get log directory from config + logDir = APP_CONFIG.get("APP_LOGGING_LOG_DIR", "./") + if not os.path.isabs(logDir): + # If relative path, make it relative to the gateway directory + gatewayDir = os.path.dirname(os.path.abspath(__file__)) + logDir = os.path.join(gatewayDir, logDir) + + # Ensure log directory exists + os.makedirs(logDir, exist_ok=True) + # Create formatters - using single line format consoleFormatter = logging.Formatter( fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s", - datefmt=APP_CONFIG.get("APP_LOGGING_DATE_FORMAT", "%Y-%m-%d %H:%M:%S") + datefmt=APP_CONFIG.get("APP_LOGGING_DATE_FORMAT", "%Y-%m-%d %H:%M:%S"), ) - + # File formatter with more detailed error information but still single line fileFormatter = logging.Formatter( fmt="%(asctime)s - %(levelname)s - %(name)s - %(message)s - %(pathname)s:%(lineno)d - %(funcName)s", - datefmt=APP_CONFIG.get("APP_LOGGING_DATE_FORMAT", "%Y-%m-%d %H:%M:%S") + datefmt=APP_CONFIG.get("APP_LOGGING_DATE_FORMAT", "%Y-%m-%d %H:%M:%S"), ) # Add filter to exclude Chrome DevTools requests class ChromeDevToolsFilter(logging.Filter): def filter(self, record): - return not (isinstance(record.msg, str) and - ('.well-known/appspecific/com.chrome.devtools.json' in record.msg or - 'Request: /index.html' in record.msg)) + return not ( + isinstance(record.msg, str) + and ( + ".well-known/appspecific/com.chrome.devtools.json" in record.msg + or "Request: /index.html" in record.msg + ) + ) # Add filter to exclude all httpcore loggers (including sub-loggers) class HttpcoreStarFilter(logging.Filter): def filter(self, record): - return not (record.name == 'httpcore' or record.name.startswith('httpcore.')) + return not ( + record.name == "httpcore" or record.name.startswith("httpcore.") + ) # Add filter to exclude HTTP debug messages class HTTPDebugFilter(logging.Filter): @@ -51,14 +123,14 @@ def initLogging(): if isinstance(record.msg, str): # Filter out HTTP debug messages http_debug_patterns = [ - 'receive_response_body.started', - 'receive_response_body.complete', - 'response_closed.started', - '_send_single_request', - 'httpcore.http11', - 'httpx._client', - 'HTTP Request', - 'multipart.multipart' + "receive_response_body.started", + "receive_response_body.complete", + "response_closed.started", + "_send_single_request", + "httpcore.http11", + "httpx._client", + "HTTP Request", + "multipart.multipart", ] return not any(pattern in record.msg for pattern in http_debug_patterns) return True @@ -70,8 +142,39 @@ def initLogging(): # Remove only emojis, preserve other Unicode characters like quotes import re import unicodedata + # Remove emoji characters specifically - record.msg = ''.join(char for char in record.msg if unicodedata.category(char) != 'So' or not (0x1F600 <= ord(char) <= 0x1F64F or 0x1F300 <= ord(char) <= 0x1F5FF or 0x1F680 <= ord(char) <= 0x1F6FF or 0x1F1E0 <= ord(char) <= 0x1F1FF or 0x2600 <= ord(char) <= 0x26FF or 0x2700 <= ord(char) <= 0x27BF)) + record.msg = "".join( + char + for char in record.msg + if unicodedata.category(char) != "So" + or not ( + 0x1F600 <= ord(char) <= 0x1F64F + or 0x1F300 <= ord(char) <= 0x1F5FF + or 0x1F680 <= ord(char) <= 0x1F6FF + or 0x1F1E0 <= ord(char) <= 0x1F1FF + or 0x2600 <= ord(char) <= 0x26FF + or 0x2700 <= ord(char) <= 0x27BF + ) + ) + return True + + # Add filter to normalize problematic unicode (e.g., arrows) to ASCII for terminals like cp1252 + class UnicodeArrowFilter(logging.Filter): + def filter(self, record): + if isinstance(record.msg, str): + translation_map = { + "\u2192": "->", # rightwards arrow + "\u2190": "<-", # leftwards arrow + "\u2194": "<->", # left right arrow + "\u21D2": "=>", # rightwards double arrow + "\u21D0": "<=", # leftwards double arrow + "\u21D4": "<=>", # left right double arrow + "\u00AB": "<<", # left-pointing double angle quotation mark + "\u00BB": ">>", # right-pointing double angle quotation mark + } + for u, ascii_eq in translation_map.items(): + record.msg = record.msg.replace(u, ascii_eq) return True # Configure handlers based on config @@ -85,35 +188,30 @@ def initLogging(): consoleHandler.addFilter(HttpcoreStarFilter()) consoleHandler.addFilter(HTTPDebugFilter()) consoleHandler.addFilter(EmojiFilter()) + consoleHandler.addFilter(UnicodeArrowFilter()) handlers.append(consoleHandler) # Add file handler if enabled if APP_CONFIG.get("APP_LOGGING_FILE_ENABLED", True): - # Get log file path and ensure it's absolute - logFile = APP_CONFIG.get("APP_LOGGING_LOG_FILE", "app.log") - if not os.path.isabs(logFile): - # If relative path, make it relative to the gateway directory - gatewayDir = os.path.dirname(os.path.abspath(__file__)) - logFile = os.path.join(gatewayDir, logFile) - - # Ensure log directory exists - logDir = os.path.dirname(logFile) - if logDir: - os.makedirs(logDir, exist_ok=True) - - rotationSize = int(APP_CONFIG.get("APP_LOGGING_ROTATION_SIZE", 10485760)) # Default: 10MB + # Create daily application log file with automatic date switching + rotationSize = int( + APP_CONFIG.get("APP_LOGGING_ROTATION_SIZE", 10485760) + ) # Default: 10MB backupCount = int(APP_CONFIG.get("APP_LOGGING_BACKUP_COUNT", 5)) - - fileHandler = RotatingFileHandler( - logFile, - maxBytes=rotationSize, - backupCount=backupCount + + fileHandler = DailyRotatingFileHandler( + logDir=logDir, + filenamePrefix="log_app", + maxBytes=rotationSize, + backupCount=backupCount, + encoding="utf-8", ) fileHandler.setFormatter(fileFormatter) fileHandler.addFilter(ChromeDevToolsFilter()) fileHandler.addFilter(HttpcoreStarFilter()) fileHandler.addFilter(HTTPDebugFilter()) fileHandler.addFilter(EmojiFilter()) + fileHandler.addFilter(UnicodeArrowFilter()) handlers.append(fileHandler) # Configure the root logger @@ -122,101 +220,162 @@ def initLogging(): format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt=APP_CONFIG.get("APP_LOGGING_DATE_FORMAT", "%Y-%m-%d %H:%M:%S"), handlers=handlers, - force=True # Force reconfiguration of the root logger + force=True, # Force reconfiguration of the root logger ) # Silence noisy third-party libraries - use the same level as the root logger - noisyLoggers = ["httpx", "httpcore", "urllib3", "asyncio", "fastapi.security.oauth2", "msal"] + noisyLoggers = [ + "httpx", + "httpcore", + "urllib3", + "asyncio", + "fastapi.security.oauth2", + "msal", + ] for loggerName in noisyLoggers: logging.getLogger(loggerName).setLevel(logging.WARNING) # Log the current logging configuration logger = logging.getLogger(__name__) logger.info(f"Logging initialized with level {logLevelName}") - logger.info(f"Log file: {logFile if APP_CONFIG.get('APP_LOGGING_FILE_ENABLED', True) else 'disabled'}") - logger.info(f"Console logging: {'enabled' if APP_CONFIG.get('APP_LOGGING_CONSOLE_ENABLED', True) else 'disabled'}") + logger.info(f"Log directory: {logDir}") + + if APP_CONFIG.get("APP_LOGGING_FILE_ENABLED", True): + today = datetime.now().strftime("%Y%m%d") + appLogFile = os.path.join(logDir, f"log_app_{today}.log") + logger.info(f"Application log file: {appLogFile} (auto-switches daily)") + else: + logger.info("Application log file: disabled") + + logger.info( + f"Console logging: {'enabled' if APP_CONFIG.get('APP_LOGGING_CONSOLE_ENABLED', True) else 'disabled'}" + ) + + +def makeSqlalchemyDbUrl() -> str: + host = APP_CONFIG.get("SQLALCHEMY_DB_HOST", "localhost") + port = APP_CONFIG.get("SQLALCHEMY_DB_PORT", "5432") + db = APP_CONFIG.get("SQLALCHEMY_DB_DATABASE", "project_gateway") + user = APP_CONFIG.get("SQLALCHEMY_DB_USER", "postgres") + pwd = quote_plus(APP_CONFIG.get("SQLALCHEMY_DB_PASSWORD_SECRET", "")) + # On Windows, prefer asyncpg to avoid psycopg + ProactorEventLoop incompatibility + if sys.platform == "win32": + return f"postgresql+asyncpg://{user}:{pwd}@{host}:{port}/{db}" + return f"postgresql+psycopg://{user}:{pwd}@{host}:{port}/{db}" + # Initialize logging initLogging() logger = logging.getLogger(__name__) instanceLabel = APP_CONFIG.get("APP_ENV_LABEL") + # Define lifespan context manager for application startup/shutdown events @asynccontextmanager async def lifespan(app: FastAPI): - # Startup logic logger.info("Application is starting up") - - # Initialize root interface to ensure database is properly set up - from modules.interfaces.interfaceAppObjects import getRootInterface - getRootInterface() - - # Setup APScheduler for JIRA sync - scheduler = AsyncIOScheduler(timezone=ZoneInfo("Europe/Zurich")) - try: - from modules.services.serviceDeltaSync import perform_sync_jira_delta_group - # Schedule sync every 20 minutes (at minutes 00, 20, 40) - scheduler.add_job( - perform_sync_jira_delta_group, - CronTrigger(minute="0,20,40"), - id="jira_delta_group_sync", - replace_existing=True, - coalesce=True, - max_instances=1, - misfire_grace_time=1800, - ) - scheduler.start() - logger.info("APScheduler started (jira_delta_group_sync every 20 minutes at 00, 20, 40)") - - # Run initial sync on startup (non-blocking failure) - try: - logger.info("Running initial JIRA sync on app startup...") - await perform_sync_jira_delta_group() - logger.info("Initial JIRA sync completed successfully") - except Exception as e: - logger.error(f"Initial JIRA sync failed: {str(e)}") - except Exception as e: - logger.error(f"Failed to initialize scheduler or JIRA sync: {str(e)}") - + + # --- Init Managers --- + await featuresLifecycle.start() + eventManager.start() + yield - - # Shutdown logic + + # --- Stop Managers --- + eventManager.stop() + await featuresLifecycle.stop() logger.info("Application has been shut down") - try: - if 'scheduler' in locals() and scheduler.running: - scheduler.shutdown(wait=False) - logger.info("APScheduler stopped") - except Exception as e: - logger.error(f"Error shutting down scheduler: {str(e)}") + # START APP app = FastAPI( - title="PowerOn | Data Platform API", + title="PowerOn | Data Platform API", description=f"Backend API for the Multi-Agent Platform by ValueOn AG ({instanceLabel})", - lifespan=lifespan + lifespan=lifespan, + swagger_ui_init_oauth={ + "usePkceWithAuthorizationCodeGrant": True, + }, ) +# Configure OpenAPI security scheme for Swagger UI +# This adds the "Authorize" button to the /docs page +securityScheme = HTTPBearer() +app.openapi_schema = None # Reset schema to regenerate with security + + +def customOpenapi(): + if app.openapi_schema: + return app.openapi_schema + + from fastapi.openapi.utils import get_openapi + + openapiSchema = get_openapi( + title=app.title, + version="1.0.0", + description=app.description, + routes=app.routes, + ) + + # Add security scheme definition + openapiSchema["components"]["securitySchemes"] = { + "BearerAuth": { + "type": "http", + "scheme": "bearer", + "bearerFormat": "JWT", + "description": "Enter your JWT token (obtained from login endpoint or browser cookies)", + } + } + + # Apply security globally to all endpoints + # Individual endpoints can override this if needed + openapiSchema["security"] = [{"BearerAuth": []}] + + app.openapi_schema = openapiSchema + return app.openapi_schema + + +app.openapi = customOpenapi + # Parse CORS origins from environment variable -def get_allowed_origins(): - origins_str = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080") +def getAllowedOrigins(): + originsStr = APP_CONFIG.get("APP_ALLOWED_ORIGINS", "http://localhost:8080") # Split by comma and strip whitespace - origins = [origin.strip() for origin in origins_str.split(",")] + origins = [origin.strip() for origin in originsStr.split(",")] logger.info(f"CORS allowed origins: {origins}") return origins + # CORS configuration using environment variables app.add_middleware( CORSMiddleware, - allow_origins= get_allowed_origins(), + allow_origins=getAllowedOrigins(), allow_credentials=True, allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"], allow_headers=["*"], expose_headers=["*"], - max_age=86400 # Increased caching for preflight requests + max_age=86400, # Increased caching for preflight requests +) + +# CSRF protection middleware +from modules.security.csrf import CSRFMiddleware +from modules.security.tokenRefreshMiddleware import ( + TokenRefreshMiddleware, + ProactiveTokenRefreshMiddleware, +) + +app.add_middleware(CSRFMiddleware) + +# Token refresh middleware (silent refresh for expired OAuth tokens) +app.add_middleware(TokenRefreshMiddleware, enabled=True) + +# Proactive token refresh middleware (refresh tokens before they expire) +app.add_middleware( + ProactiveTokenRefreshMiddleware, enabled=True, check_interval_minutes=5 ) # Include all routers + from modules.routes.routeAdmin import router as generalRouter app.include_router(generalRouter) @@ -232,6 +391,9 @@ app.include_router(userRouter) from modules.routes.routeDataFiles import router as fileRouter app.include_router(fileRouter) +from modules.routes.routeDataNeutralization import router as neutralizationRouter +app.include_router(neutralizationRouter) + from modules.routes.routeDataPrompts import router as promptRouter app.include_router(promptRouter) @@ -241,6 +403,9 @@ app.include_router(connectionsRouter) from modules.routes.routeWorkflows import router as workflowRouter app.include_router(workflowRouter) +from modules.routes.routeChatPlayground import router as chatPlaygroundRouter +app.include_router(chatPlaygroundRouter) + from modules.routes.routeSecurityLocal import router as localRouter app.include_router(localRouter) @@ -253,9 +418,15 @@ app.include_router(googleRouter) from modules.routes.routeVoiceGoogle import router as voiceGoogleRouter app.include_router(voiceGoogleRouter) -from modules.routes.routeVoiceStreaming import router as voiceStreamingRouter -app.include_router(voiceStreamingRouter) - -# Admin security routes (token listing and revocation, logs, db tools) from modules.routes.routeSecurityAdmin import router as adminSecurityRouter -app.include_router(adminSecurityRouter) \ No newline at end of file +app.include_router(adminSecurityRouter) + +from modules.routes.routeSharepoint import router as sharepointRouter +app.include_router(sharepointRouter) + +from modules.routes.routeDataAutomation import router as automationRouter +app.include_router(automationRouter) + +from modules.routes.routeAdminAutomationEvents import router as adminAutomationEventsRouter +app.include_router(adminAutomationEventsRouter) + diff --git a/config.ini b/config.ini index bc8aeb7f..ab0b6712 100644 --- a/config.ini +++ b/config.ini @@ -5,21 +5,6 @@ Auth_ALGORITHM = HS256 Auth_TOKEN_TYPE = bearer -# OpenAI configuration -Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions -Connector_AiOpenai_API_SECRET = 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 -Connector_AiOpenai_MODEL_NAME = gpt-4o -Connector_AiOpenai_TEMPERATURE = 0.2 -Connector_AiOpenai_MAX_TOKENS = 2000 - -# Anthropic configuration -Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages -Connector_AiAnthropic_API_SECRET_OLD = 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 -Connector_AiAnthropic_API_SECRET = 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 -Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022 -Connector_AiAnthropic_TEMPERATURE = 0.2 -Connector_AiAnthropic_MAX_TOKENS = 2000 - # File management configuration File_Management_MAX_UPLOAD_SIZE_MB = 50 File_Management_CLEANUP_INTERVAL = 240 @@ -36,33 +21,6 @@ Security_LOCK_DURATION_MINUTES = 30 # Content Neutralization configuration Content_Neutralization_ENABLED = False -# Agent Mail configuration -Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_CLIENT_SECRET = Kxf8Q~2lJIteZ~JaI32kMf1lfaWKATqxXiNiFbzV -Service_MSFT_TENANT_ID = common - -# Google Service configuration -Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_CLIENT_SECRET = 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 - -# Tavily Web Search configuration -Connector_WebTavily_API_KEY = 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 - -# Google Cloud Speech Services configuration -Connector_GoogleSpeech_API_KEY = { - "type": "service_account", - "project_id": "poweronid", - "private_key_id": "88db66e4248326e9baeac4231bc196fd46a9a441", - "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDTnJuxA+xBL3LA\nPgFILYCsGuppkkdO6d153Q36f2jTj6zpH3OhKMVsaaTBknG2o2+D0Whlk6Yh5rOw\nkWzpMC3y81leRLm5kucERMkBUgd2GL4v16k6m+QGuC3BFlt/XeyuckJNW0V6v/Dy\n3+bSYM7/5o1ftPNWJeAIEWoE/V4wKCYde8RE4Vp1LO5YwhgcM4rRuPmF2OhekpA+\npteYwkY/8/gTTRpZIc8OTsBYRbaMwsjoDj5riuL3boVtkwZwKRb+ZLvupXeU7Ds7\n1305odTcZUwnImHiHfuq83ZJViQiLRNhUAFnQIXPrYLwEpCmzRBGzYHaRlb69ga/\nzqUbKnclAgMBAAECggEAH6W9qHehubioPMAJM7Y6bC2KU/JLNS4csBZd+idb52gG\nwBwIEFjR+H4ZjymhAA4+pe7c4h7MKyh0RI/l7eoFX98Cb+rEq/r1udm1BhGH3s2h\n2UiI8qRQh1YRjF2/nrN5VjhDBOFa6W9opaopZy/l8AzsT8f21zIgPen8z8o6GpFg\n64fJFcbqCGk2ykN2+x2pIOT04tmCszrfbXZP8LEs4xrUB/XwlHL1vT/M3EWIKbnj\njDaIMjw7q/KRgNUvmKS6SU9b3fnOLcQCz9f5cKdiWACKIU/UvuiWhWJ9ou6BWLWU\nva1A6Fi4XJjhW7s3po58/ioQfl0A9p/L92lGg4ST8QKBgQDx8LIM1g0dh9Ql6LmH\nBUGCOewNNXTs+y3ZznUfvVMoyyZK5w/pzeUvkmOwzbRGnZJ9WyCghq8aezyEpo2D\nPL7Odf988IeHmvhyZIM4PLJYgDvSwGXyf/gh6gJkf/4wpx+tx/yQYNBm3Rht7sA0\npSaLehK0E0kW1uyBzHGKgyQOhwKBgQDf6LiZ7hSQqh54vIU1XMDRth0UOo/s/HGi\nDoij29KjmHjLkm8vOlCo83e79X0WhcnyB5kM7nWFegwcM1PJ0Dl8gidUuTlOVDtM\n5u2AaxDoyXAUL457U5dGFAIW+R653ZDkzMfCglacP8HixXEyIpL1cTLqiCAgzszS\nLcSWwoAr8wKBgQC4CGm3X97sFpTmHSd6sCHLaDnJNl9xoAKZifUHpqCqCBVhpm8x\nXp+11vmj1GULzfJPDlE8Khbp4tH+6R39tOhC7fjgVaoSGWxgv1odHfZfYXOf9R/X\nHUZmrbUSM1XsNkPfkZ7pR+teQ1HA1Xo40WMHd1zgw0a2a9fNR/EZ9nUn4wKBgGaK\nUEgGNRrPHadTRnnaoV8o1IZYD2OLdIqvtzm7SOqsv90SkaKCRUAqR5InaYKwAHy7\nqAa5Cc73xqX/h4arujff7x0ouiq5/nJIa0ndPmAtKAvGf6zQ6j0ompBkxAKAioON\nmInmYL2roSI2I5G/LagDkDrB3lzH+Brk5NvZ9RKrAoGAGox462GGGb/NbGdDkahN\ndifzYYvq4FPiWFFo0ynKAulxCBWLXO/N45XNuAyen433d8eREcAYz1Dzax44+MdQ\nHo9dU7YcZvFyt6iZsYeQF8dluHui3vzMpUe0KbqpZC5KMOSw53ZdNIwzo8NTAK59\n+uv3dHGj7sS8fhDo3yCifzc=\n-----END PRIVATE KEY-----\n", - "client_email": "poweron-voice-services@poweronid.iam.gserviceaccount.com", - "client_id": "116641749406798186404", - "auth_uri": "https://accounts.google.com/o/oauth2/auth", - "token_uri": "https://oauth2.googleapis.com/token", - "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", - "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/poweron-voice-services%40poweronid.iam.gserviceaccount.com", - "universe_domain": "googleapis.com" -} - # Web Search configuration Web_Search_MAX_QUERY_LENGTH = 400 Web_Search_MAX_RESULTS = 20 @@ -71,4 +29,9 @@ Web_Search_MIN_RESULTS = 1 # Web Crawl configuration Web_Crawl_TIMEOUT = 30 Web_Crawl_MAX_RETRIES = 3 -Web_Crawl_RETRY_DELAY = 2 \ No newline at end of file +Web_Crawl_RETRY_DELAY = 2 + +# Web Research configuration +Web_Research_MAX_DEPTH = 2 +Web_Research_MAX_LINKS_PER_DOMAIN = 4 +Web_Research_CRAWL_TIMEOUT_MINUTES = 10 \ No newline at end of file diff --git a/debug_audio/audio_google_interpreter_recording.webm b/debug_audio/audio_google_interpreter_recording.webm deleted file mode 100644 index 862174f4..00000000 Binary files a/debug_audio/audio_google_interpreter_recording.webm and /dev/null differ diff --git a/env_dev.env b/env_dev.env index 24a15187..e1dd2756 100644 --- a/env_dev.env +++ b/env_dev.env @@ -4,51 +4,33 @@ APP_ENV_TYPE = dev APP_ENV_LABEL = Development Instance Patrick APP_API_URL = http://localhost:8000 - -# Database Configuration for Application -# JSON File Storage (current) -# DB_APP_HOST=D:/Temp/_powerondb -# DB_APP_DATABASE=app -# DB_APP_USER=dev_user -# DB_APP_PASSWORD_SECRET=dev_password +APP_KEY_SYSVAR = D:/Athi/Local/Web/poweron/local/key.txt +APP_INIT_PASS_ADMIN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEeFFtRGtQeVUtcjlrU3dab1ZxUm9WSks0MlJVYUtERFlqUElHemZrOGNENk1tcmJNX3Vxc01UMDhlNU40VzZZRVBpUGNmT3podzZrOGhOeEJIUEt4eVlSWG5UYXA3d09DVXlLT21Kb1JYSUU9 +APP_INIT_PASS_EVENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERzZjNm56WGVBdjJTeG5Udjd6OGQwUVotYXUzQjJ1YVNyVXVBa3NZVml3ODU0MVNkZjhWWmJwNUFkc19BcHlHMTU1Q3BRcHU0cDBoZkFlR2l6UEZQU3d2U3MtMDh5UDZteGFoQ0EyMUE1ckE9 # PostgreSQL Storage (new) DB_APP_HOST=localhost -DB_APP_DATABASE=poweron_app_dev +DB_APP_DATABASE=poweron_app DB_APP_USER=poweron_dev -DB_APP_PASSWORD_SECRET=dev_password +DB_APP_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEcUIxNEFfQ2xnS0RrSC1KNnUxTlVvTGZoMHgzaEI4Z3NlVzVROTVLak5Ubi1vaEZubFZaMTFKMGd6MXAxekN2d2NvMy1hRjg2UVhybktlcFA5anZ1WjFlQmZhcXdwaGhWdzRDc3ExeUhzWTg9 DB_APP_PORT=5432 -# Database Configuration Chat -# JSON File Storage (current) -# DB_CHAT_HOST=D:/Temp/_powerondb -# DB_CHAT_DATABASE=chat -# DB_CHAT_USER=dev_user -# DB_CHAT_PASSWORD_SECRET=dev_password - # PostgreSQL Storage (new) DB_CHAT_HOST=localhost -DB_CHAT_DATABASE=poweron_chat_dev +DB_CHAT_DATABASE=poweron_chat DB_CHAT_USER=poweron_dev -DB_CHAT_PASSWORD_SECRET=dev_password +DB_CHAT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERFNzNVhoalpCR0QxYXAwdEpXWXVVOTdZdWtqWW5FNXFGcFl2amNYLWYwYl9STXltRlFxLWNzVWlMVnNYdXk0RklnRExFT0FaQjg2aGswNnhhSGhCN29KN2VEb2FlUV9NTlV3b0tLelplSVU9 DB_CHAT_PORT=5432 -# Database Configuration Management -# JSON File Storage (current) -# DB_MANAGEMENT_HOST=D:/Temp/_powerondb -# DB_MANAGEMENT_DATABASE=management -# DB_MANAGEMENT_USER=dev_user -# DB_MANAGEMENT_PASSWORD_SECRET=dev_password - # PostgreSQL Storage (new) DB_MANAGEMENT_HOST=localhost -DB_MANAGEMENT_DATABASE=poweron_management_dev +DB_MANAGEMENT_DATABASE=poweron_management DB_MANAGEMENT_USER=poweron_dev -DB_MANAGEMENT_PASSWORD_SECRET=dev_password +DB_MANAGEMENT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEUldqSTVpUnFqdGhITDYzT3RScGlMYVdTMmZhOXdudDRCc3dhdllOd3l6MS1vWHY2MjVsTUF1Sk9saEJOSk9ONUlBZjQwb2c2T1gtWWJhcXFzVVVXd01xc0U0b0lJX0JyVDRxaDhNS01JcWs9 DB_MANAGEMENT_PORT=5432 # Security Configuration -APP_JWT_SECRET_SECRET=rotated_jwt_secret_2025_09_17_f8a3b6c2-7d4e-45b6-9a1f-3c0b9a1d2e7f +APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2ZlUFRlcFdOZ001RnlzM2FhLWhRV2tjWWFhaWQwQ3hkcUFvbThMcndxSjFpYTdfRV9OZGhTcksxbXFTZWg5MDZvOHpCVXBHcDJYaHlJM0tyNWRZckZsVHpQcmxTZHJoZUs1M3lfU2ljRnJaTmNSQ0w0X085OXI0QW80M2xfQnJqZmZ6VEh3TUltX0xzeE42SGtZPQ== APP_TOKEN_EXPIRY=300 # CORS Configuration @@ -56,7 +38,7 @@ APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net # Logging configuration APP_LOGGING_LOG_LEVEL = DEBUG -APP_LOGGING_LOG_FILE = poweron.log +APP_LOGGING_LOG_DIR = D:/Athi/Local/Web/poweron/local/logs APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S APP_LOGGING_CONSOLE_ENABLED = True @@ -66,4 +48,29 @@ APP_LOGGING_BACKUP_COUNT = 5 # Service Redirects Service_MSFT_REDIRECT_URI = http://localhost:8000/api/msft/auth/callback -Service_GOOGLE_REDIRECT_URI = http://localhost:8000/api/google/auth/callback \ No newline at end of file +Service_GOOGLE_REDIRECT_URI = http://localhost:8000/api/google/auth/callback + +# AI configuration +Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEajBuZmtYTVdqLTBpQm9KZ2pCXzRCV3VhZzlYTEhKb1FqWXNrV3lyb25uZUN1WVVQUEY3dGYtejludV9MNGlKeVREanZGOGloV09mY2ttQ3k5SjBFOGFac2ZQTkNKNUZWVnRINVQyeWhsR2wyYnVrRDNzV2NqSHB0ajQ4UWtGeGZtbmR0Q3VvS0hDZlphVmpSc2Z6RG5nPT0= +Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpENmFBWG16STFQUVZxNzZZRzRLYTA4X3lRanF1VkF4cU45OExNMzlsQmdISGFxTUxud1dXODBKcFhMVG9KNjdWVnlTTFFROVc3NDlsdlNHLUJXeG41NDBHaXhHR0VHVWl5UW9RNkVWbmlhakRKVW5pM0R4VHk0LUw0TV9LdkljNHdBLXJua21NQkl2b3l4UkVkMGN1YjBrMmJEeWtMay1jbmxrYWJNbUV0aktCXzU1djR2d2RSQXZORTNwcG92ZUVvVGMtQzQzTTVncEZTRGRtZUFIZWQ0dz09 +Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQm82Mzk2Q1MwZ0dNcUVBcUtuRDJIcTZkMXVvYnpjM3JEMzJiT1NKSHljX282ZDIyZTJYc09VSTdVNXAtOWU2UXp5S193NTk5dHJsWlFjRjhWektFOG1DVGY4ZUhHTXMzS0RPN1lNcF9nSlVWbW5BZ1hkZDVTejl6bVZNRFVvX29xamJidWRFMmtjQmkyRUQ2RUh6UTN1aWNPSUJBPT0= +Connector_AiTavily_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQTdnUHMwd2pIaXNtMmtCTFREd0pyQXRKb1F5eGtHSnkyOGZiUnlBOFc0b3Vzcndrc3ViRm1nMDJIOEZKYWxqdWNkZGh5N0Z4R0JlQmxXSG5pVnJUR2VYckZhMWNMZ1FNeXJ3enJLVlpiblhOZTNleUg3ZzZyUzRZanFSeDlVMkI= + +# Agent Mail configuration +Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c +Service_MSFT_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk= +Service_MSFT_TENANT_ID = common + +# Google Service configuration +Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com +Service_GOOGLE_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM= + +# Google Cloud Speech Services configuration +Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhIYTlyMkhuNjA4NlF4dk82U2NScHhTVGY3UG83NkhfX3RrcWVtWWcyLXRjU1dTT21zWEl6YWRMMUFndXpsUnJOeHh3QThsNDZKRXROTzdXRUdsT0JZajZJNVlfb0gtMXkwWm9DOERPVnpjU0pyUEZfOGJsUnprT3ltMVVhalUyUm9hMUFtZEtHUnJqOGZ4dEZjZm5SWVVTckVCWnY1UkdVSHVmUlgwbnAyc0xDQW84R3ViSko5OHVCVWZRUVNiaG1pVFB6X3EwS0FPd2dUYjhiSmRjcXh2WEZiXzI4SFZqT21tbDduUWRyVWdFZXpmcVM5ZDR0VWtzZnF5UER6cGwwS2JlLV9CSTZ0Z0IyQ1h0YW9TcmhRTXZEckp4bWhmTkt6UTNYMk4zVkpnbUJmaDIxZnoyR2dWTEYwTUFEV0w2eUdUUGpoZk9XRkt4RVF1Z1NPdUpBeTcyWV9PY1Ffd2s0ZEdVekxGekhoeEl4TmNqaXYtbUJuSVdycFducERWdWtZajZnX011Q2w4eE9VMTBqQ1ZxRmdScWhXY1E3WWhzX1JZcHhxam9FbDVPN3Q1MWtrMUZuTUg3LVFQVHp1T1hpQWNDMzEzekVJWk9ybl91YUVjSkFob1VaMi1ONEtuMnRSOEg1S3QybUMwbVZDejItajBLTjM2Zy1hNzZQMW5LLVVDVGdFWm5BZUxNeEFnUkZzU3dxV0lCUlc0LWo4b05GczVpOGZSV2ZxbFBwUml6OU5tYjdnTks3Y3hrVEZVTHlmc1NPdFh4WE5pWldEZklOQUxBbjBpMTlkX3FFQVJ6c2NSZGdzTThycE92VW82enZKamhiRGFnU25aZGlHZHhZd2lUUmhuTVptNjhoWVlJQkxIOEkzbzJNMjZCZFJyM25tdXBnQ2ZWaHV3b2p6UWJpdk9xUEhBc1dyTlNmeF9wbm5yYUhHV01UZnVXWDFlNzBkdXlWUWhvcmJpSmljbmE3LUpUZEg4VzRwZ2JVSjdYUm1sODViQXVxUzdGTmZFbVpiN2V1YW5XV3U4b2VRWmxldGVGVHZsSldoekhVLU9wZ2V0cGZIYkNqM2pXVGctQVAyUm4xTHhpd1VVLXFhcnVEV21Rby1hbTlqTl84TjVveHdYTExUVkhHQ0ltaTB2WXJnY1NQVE5PbWg3ejgySElYc1JSTlQ3NDlFUWR6STZVUjVqaXFRN200NF9LY1ljQ0R2UldlWUtKY1NQVnJ4QXRyYTBGSWVuenhyM0Z0cWtndTd1eG8xRzY5a2dNZ1hkQm5MV3BHVzA2N1QwUkd6WlRGYTZQOUhnVWQ2S0Y5U0s1dXFNVXh5Q2pLWVUxSUQ2MlR1ak52NmRIZ2hlYTk1SGZGWS1RV3hWVU9rR3d1Rk9MLS11REZXbzhqMHpsSm1HYW1jMUNLT29YOHZsRWNaLTVvOFpmT3l3MHVwaERTT0dNLWFjcGRYZ25qT2szTkVFUnRFR3JWYS1aNXFIRnMyalozTlQzNFF2NXJLVHVPVF9zdTF6ZjlkbzJ4RFc2ZENmNFFxZDZzTzhfMUl0bW96V0lPZkh1dXFYZlEteFBlSG84Si1FNS1TTi1OMkFnX2pOYW8xY3MxMVJnVC02MDUyaXZfMEVHWDQtVlRpcENmV0h3V0dCWEFRS2prQXdNRlQ5dnRFVHU0Q1dNTmh0SlBCaU55bFMydWM1TTFFLW96ODBnV3dNZHFZTWZhRURYSHlrdzF3RlRuWDBoQUhSOUJWemtRM3pxcDJFbGJoaTJ3ZktRTlJxbXltaHBoZXVJVDlxS3cxNWo2c0ZBV0NzaUstRWdsMW1xLXFkanZGYUFiU0tSLXFQa0tkcDFoMV9kak41ZjQ0R214UmtOR1ZBanRuemY3Mmw1SkZ5aDZodGIzT3N2aV85MW9kcld6c0g0ZDgtTWo3b3Y3VjJCRnR2U2tMVm9rUXNVRnVHbzZXVTZ6RmI2RkNmajBfMWVnODVFbnpkT0oyci15czJHU0p1cUowTGZJMzVnd3hIRjQyTVhKOGRkcFRKdVpyQ3Yzd01Jb1lSajFmV0paeEV0cjk1SmpmdWpDVFJMUmMtUFctOGhaTmlKQXNRVlVUNlhJemxudHZCR056SVlBb3NOTEYxRTRLaFlVd2d3TWtxVlB6ZEtQLTkxOGMyY3N0a2pYRFUweDBNaGhja2xSSklPOUZla1dKTWRNbG8tUGdSNEV5cW90OWlOZFlIUExBd3U2b2hyS1owbXVMM3p0Qm41cUtzWUxYNzB1N3JpUTNBSGdsT0NuamNTb1lIbXR4MG1sakNPVkxBUXRLVE1xX0YxWDhOcERIY1lTQVFqS01CaXZKNllFaXlIR0JsM1pKMmV1OUo3TGI1WkRaVnYxUTl1LTM0SU1qN1V1b0RCT0x0VHNLTmNLZnk1S0MxYnBBcm03WnVua0xqaEhGUzhOU253ZkppRzdudXBSVlMxeFVOSWxtZ1o2RVBSQUhEUEFuQ1hxSVZMME4yWUtaU3VyRGo3RkUyRUNjT0pNcE1BdE1ZRzdXVl8ydUtXZjdMdHdEVW4teHUtTi1HSGliLUxud21TX0NtcGVkRFBHNkZ1WTlNczR4OUJfUVluc1BoV09oWS1scUdsNnB5d1U5M1huX3k4QzAyNldtb2hybktYN2xKZ1NTNWFsaWwzV3pCRVhkaGR5eTNlV1d6ZzFfaFZTT0E4UjRpQ3pKdEZxUlJ6UFZXM3laUndyWEk2NlBXLUpoajVhZzVwQXpWVzUtVjVNZFBwdWdQa3AxZC1KdGdqNnhibjN4dmFYb2cxcEVwc1g5R09zRUdINUZtOE5QRjVUU0dpZy1QVl9odnFtVDNuWFZLSURtMXlSMlhRNTBWSVFJbEdOOWpfVWV0SmdRWDdlUXZZWE8xRUxDN1I0aEN6MHYwNzM1cmpJS0ZpMnBYWkxfb3FsbEV1VnlqWGxqdVJ6SHlwSjAzRlMycTBaQ295NXNnZERpUnJQcjhrUUd3bkI4bDVzRmxQblhkaFJPTTdISnVUQmhET3BOMTM4bjVvUEc2VmZhb2lrR1FyTUl2RWNEeGg0U0dsNnV6eU5zOUxiNDY5SXBxR0hBS00wOTgyWTFnWkQyaEtLVUloT3ZxZGh0RWVGRmJzenFsaUtfZENQM0JzdkVVeTdXR3hUSmJST1NBMUI1NkVFWncwNW5JZVVLX1p1RXdqVnFfQWpvQ08yQjZhN1NkTkpTSnUxOVRXZXE0WFEtZWxhZW1NNXYtQ2sya0VGLURmS01lMkctNVY3c2ZhN0ZGRFgwWHlabTFkeS1hcUZ1dDZ3cnpPQ3hha2IzVE11M0pqbklmU0diczBqTFBNZC1QZGp6VzNTSnJVSjJoWkJUQjVORG4tYUJmMEJtSUNUdVpEaGt6OTM3TjFOdVhXUHItZjRtZ25nU3NhZC1sVTVXNTRDTmxZbnlfeHNsdkpuMXhUYnE1MnpVQ0ZOclRWM1M4eHdXTzRXbFRZZVQtTS1iRVdXVWZMSGotcWg3MUxUYTFnSEEtanBCRHlZRUNIdGdpUFhsYjdYUndCZnRITzhMZVJ1dHFoVlVNb0duVjlxd0U4OGRuQVV3MG90R0hiYW5MWkxWVklzbWFRNzBfSUNrdzc5bVdtTXg0dExEYnRCaDI3c1I4TWFwLXZKR0wxSjRZYjZIV3ZqZjNqTWhFT0RGSDVMc1A1UzY2bDBiMGFSUy1fNVRQRzRJWDVydUpqb1ZfSHNVbldVeUN2YlAxSW5WVDdxVzJ1WHpLeUdmb0xWMDNHN05oQzY3YnhvUUdhS2xaOHNidkVvbTZtSHFlblhOYmwyR3NQdVJDRUdxREhWdF9ZcXhwUWxHc2hyLW5vUGhIUVhJNUNhY0hFU0ptVnI0TFVhZDE1TFBBUEstSkRoZWJ5MHJhUmZrR1ZrRlFtRGpxS1pOMmFMQjBsdjluY3FiYUU4eGJVVXlZVEpuNWdHVVhJMGtwaTdZR2NDbXd2eHpOQ09SeTV6N1BaVUpsR1pQVDBZcElJUUt6VnVpQmxSYnE4Y1BCWV9IRWdVV0p3enBGVHItdnBGN3NyNWFBWmkySnByWThsbDliSlExQmp3LVlBaDIyZXp6UnR6cU9rTzJmTDBlSVpON0tiWllMdm1oME1zTFl2S2ZYYllhQlY2VHNZRGtHUDY4U1lIVExLZTU4VzZxSTZrZHl1ZTBDc0g4SjI4WGYyZHV1bm9wQ3R2Z09ld1ZmUkN5alJGeHZKSHl1bWhQVXpNMzdjblpLcUhfSm02Qlh5S1FVN3lIcHl0NnlRPT0= + +# Feature SyncDelta JIRA configuration +Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEbm0yRUJ6VUJKbUwyRW5kMnRaNW4wM2YxMkJUTXVXZUdmdVRCaUZIVHU2TTV2RWZLRmUtZkcwZE4yRUNlNDQ0aUJWYjNfdVg5YjV5c2JwMHhoUUYxZWdkeS11bXR0eGxRLWRVaVU3cUVQZWJlNDRtY1lWUDdqeDVFSlpXS0VFX21WajlRS3lHQjc0bS11akkybWV3QUFlR2hNWUNYLUdiRjZuN2dQODdDSExXWG1Dd2ZGclI2aUhlSWhETVZuY3hYdnhkb2c2LU1JTFBvWFpTNmZtMkNVOTZTejJwbDI2eGE0OS1xUlIwQnlCSmFxRFNCeVJNVzlOMDhTR1VUamx4RDRyV3p6Tk9qVHBrWWdySUM3TVRaYjd3N0JHMFhpdzFhZTNDLTFkRVQ2RVE4U19COXRhRWtNc0NVOHRqUS1CRDFpZ19xQmtFLU9YSDU3TXBZQXpVcld3PT0= + +# Debug Configuration +APP_DEBUG_CHAT_WORKFLOW_ENABLED = True +APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat \ No newline at end of file diff --git a/env_int.env b/env_int.env index 416a5b9e..27210a43 100644 --- a/env_int.env +++ b/env_int.env @@ -4,30 +4,33 @@ APP_ENV_TYPE = int APP_ENV_LABEL = Integration Instance APP_API_URL = https://gateway-int.poweron-center.net +APP_KEY_SYSVAR = CONFIG_KEY +APP_INIT_PASS_ADMIN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjWm41MWZ4TUZGaVlrX3pWZWNwakJsY3Facm0wLVZDd1VKeTFoZEVZQnItcEdUUnVJS1NXeDBpM2xKbGRsYmxOSmRhc29PZjJSU2txQjdLbUVrTTE1NEJjUXBHbV9NOVJWZUR3QlJkQnJvTEU9 +APP_INIT_PASS_EVENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjdmtrakgxa0djekZVNGtTZV8wM2I5UUpCZllveVBMWXROYk5yS3BiV3JEelJSM09VYTRONHpnY3VtMGxDRk5JTEZSRFhtcDZ0RVRmZ1RicTFhb3c5dVZRQ1o4SmlkLVpPTW5MMTU2eTQ0Vkk9 # PostgreSQL Storage (new) DB_APP_HOST=gateway-int-server.postgres.database.azure.com DB_APP_DATABASE=poweron_app DB_APP_USER=heeshkdlby -DB_APP_PASSWORD_SECRET=VkAjgECESbEVQ$Tu +DB_APP_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjb2dka2pnN0tUbW1EU0w1Rk1jNERKQ0Z1U3JkVDhuZWZDM0g5M0kwVDE5VHdubkZna3gtZVAxTnl4MDdrR1c1ZXJ3ejJHYkZvcGUwbHJaajBGOWJob0EzRXVHc0JnZkJyNGhHZTZHOXBxd2c9 DB_APP_PORT=5432 # PostgreSQL Storage (new) DB_CHAT_HOST=gateway-int-server.postgres.database.azure.com DB_CHAT_DATABASE=poweron_chat DB_CHAT_USER=heeshkdlby -DB_CHAT_PASSWORD_SECRET=VkAjgECESbEVQ$Tu +DB_CHAT_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjczYzOUtTa21MMGJVTUQ5UmFfdWc3YlhCbWZOeXFaNEE1QzdJV3BLVjhnalBkLVVCMm5BZzdxdlFXQXc2RHYzLWtPSFZkZE1iWG9rQ1NkVWlpRnF5TURVbnl1cm9iYXlSMGYxd1BGYVc0VDA9 DB_CHAT_PORT=5432 # PostgreSQL Storage (new) DB_MANAGEMENT_HOST=gateway-int-server.postgres.database.azure.com DB_MANAGEMENT_DATABASE=poweron_management DB_MANAGEMENT_USER=heeshkdlby -DB_MANAGEMENT_PASSWORD_SECRET=VkAjgECESbEVQ$Tu +DB_MANAGEMENT_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjTnJKNlJMNmEwQ0Y5dVNrR3pkZk9SQXVvLTRTNW9lQ1g3TTE5cFhBNTd5UENqWW9qdWd3NWNseWhnUHJveDJyd1Z3X1czS3VuZnAwZHBXYVNQWlZsRy12ME42NndEVlR5X3ZPdFBNNmhLYm89 DB_MANAGEMENT_PORT=5432 # Security Configuration -APP_JWT_SECRET_SECRET=rotated_jwt_secret_2025_09_17_2c5f8e7a-1b3d-49c7-ae5d-9f0a2c3d4b5e +APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZzTEp4aDR0MktWRjNoeVBrY1Npd1R0VE9YVHp3M2w1cXRzbUxNaU82QUJvaDNFeVQyN05KblRWblBvbWtoT0VXbkNBbDQ5OHhwSUFnaDZGRG10Vmgtdm1YUkRsYUhFMzRVZURmSFlDTFIzVWg4MXNueDZyMGc5aVpFdWRxY3dkTExGM093ZTVUZVl5LUhGWnlRPQ== APP_TOKEN_EXPIRY=300 # CORS Configuration @@ -35,7 +38,7 @@ APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net, # Logging configuration APP_LOGGING_LOG_LEVEL = DEBUG -APP_LOGGING_LOG_FILE = /home/site/wwwroot/poweron.log +APP_LOGGING_LOG_DIR = /home/site/wwwroot/ APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S APP_LOGGING_CONSOLE_ENABLED = True @@ -46,3 +49,28 @@ APP_LOGGING_BACKUP_COUNT = 5 # Service Redirects Service_MSFT_REDIRECT_URI = https://gateway-int.poweron-center.net/api/msft/auth/callback Service_GOOGLE_REDIRECT_URI = https://gateway-int.poweron-center.net/api/google/auth/callback + +# AI configuration +Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjSDBNYkptSkQxTUotYVVpZVNZc0dxNGNwSEtkOEE0T3RZWjROTEhSRlRXdlZmQUxxZ0w3Y0xOV2JNV19LNF9yTUZiU1pUNG15U2VDUDdSVlI4VlpnR3JXVFFtcXBaTEZiaUtSclVFd0lCZG1rWVhra1dfWTVQOTBEYUU0MjByYVNEMTFmeXNOcmpUT216MmJKdlVPeW5nPT0= +Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjT1ZlRWVJdVZMT3ljSFJDcFdxRFBRVkZhS204NnN5RDBlQ0tpenhTM0FFVktuWW9mWHNwRWx2dHB0eDBSZ0JFQnZKWlp6c01pVGREWHd1eGpERnU0Q2xhaks1clQ1ZXVsdnd2ZzhpNXNQS1BhY3FjSkdkVEhHalNaRGR4emhpakZncnpDQUVxOHVXQzVUWmtQc0FsYmFwTF9TSG5FOUFtWk5Ick1NcHFvY2s1T1c2WXlRUFFJZnh6TWhuaVpMYmppcDR0QUx0a0R6RXlwbGRYb1R4dzJkUT09 +Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQm82Mzk2UWZJdUFhSW8yc3RKc0tKRXphd0xWMkZOVlFpSGZ4SGhFWnk0cTF5VjlKQVZjdS1QSWdkS0pUSWw4OFU5MjUxdTVQel9aeWVIZTZ5TXRuVmFkZG0zWEdTOGdHMHpsTzI0TGlWYURKU1Q0VVpKTlhxUk5FTmN6SUJScDZ3ZldIaUJZcWpaQVRiSEpyQm9tRTNDWk9KTnZBPT0= +Connector_AiTavily_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkdkJMTDY0akhXNzZDWHVYSEt1cDZoOWEzSktneHZEV2JndTNmWlNSMV9KbFNIZmQzeVlrNE5qUEIwcUlBSGM1a0hOZ3J6djIyOVhnZzI3M1dIUkdicl9FVXF3RGktMmlEYmhnaHJfWTdGUkktSXVUSGdQMC1vSEV6VE8zR2F1SVk= + +# Agent Mail configuration +Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c +Service_MSFT_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk= +Service_MSFT_TENANT_ID = common + +# Google Service configuration +Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com +Service_GOOGLE_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo= + +# Google Cloud Speech Services configuration +Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2MXF0ZGJoWHBydF85bTczTktiaEJ3Wk1vMW1mZVhDSG1yd0ZxR2ZuSGJTX0N3MWptWXFJTkNTWjh1SUVVTXI4UDVzcGdLMkU5SHJ2TUpkRlRoRWdnSldtYjNTQkh4UDJHY2xmdTdZQ1ZiMTZZcGZxS3RzaHdjV3dtVkZUcEpJcWx0b2xuQVR6ZmpoVFZPY1hNMTV2SnhDaC1IZEh4UUpLTy1ILXA4RG1zamJTbUJ4X0t2M2NkdzJPbEJxSmFpRzV3WC0wZThoVzlxcmpHZ3ZkLVlVY3REZk1vV19WQ05BOWN6cnJ4MWNYYnNiQ0FQSUVnUlpfM3BhMnlsVlZUOG5wM3pzM1lSN1UzWlZKUXRLczlHbjI1LTFvSUJ4SlVXMy1BNk43bE5Hb0RfTTVlWk9oZnFIaVg0SW5pbm9EcXRTTzU1RFlYY3dTcnpKWWNyNjN5T1BGZ0FmX253cEFncmhvZVRuM05KYzhkOEhFMFJsc2NBSEwzZVZ1R0JMOGxsekVwUE55alZaRXFrdzNWWVNGWXNmbnhKeWhQSFo2VXBTUlRPeHdvdVdncEFuOWgydEtsSUFneUN6cGVaTnBSdjNCdVJseGJFdmlMc203UFhLVlYyTENkaGg2dVN6Z2xwT1ZmTmN5bVZGUkM3ZWcyVkt2ckFUVVd3WFFwYnJjNVRobEh2SkVJbXRwUUpEOFJKQ1NUc0Q4NHNqUFhPSDh5cTV6MEcwSDEwRUJCQ2JiTTJlOE5nd3pMMkJaQ1dVYjMwZVVWWnlETmp2dkZ3aXEtQ29WNkxZTFkzYUkxdTlQUU1OTnhWWU12YU9MVnJQa1d2ZjRtUlhneTNubEMxTmp1eUNPOThSMlB3Y1F0T2tCdFNsNFlKalZPV25yR2QycVBUb096RmZ1V0FTaGsxLV9FWDBmenBIOXpMdGpLcUc0TWRoY2hlMFhYTzlET1ZRekw0ZHNwUVBQdVJBX2h6Q2ZzWVZJWTNybTJiekp3WmhmWF9SUFBXQzlqUjctcVlHWWVMZWVQallzR0JGTVF0WmtnWlg1aTM1bFprNVExZXY5dnNvWF93UjhwbkJ3RzNXaVJ2d2RRU3JJVlBvaVh4eTlBRUtqWkJia3dJQVVBV2Nqdm9FUTRUVW1TaHp2ZUwxT0N2ZndxQ2Nka1RYWXF0LWxIWFE0dTFQcVhncFFPM0hFdUUtYlFnemx3WkF4bjA1aDFULUdrZlVZbEJtRGRCdjJyVkdJSXozd0I0dF9zbWhOeHFqRDA4T1NVaWR5cjBwSVgwbllPU294NjZGTnM1bFhIdGpNQUxFOENWd3FCbGpSRFRmRXotQnU0N2lCVEU5RGF6Qi10S2U2NGdadDlrRjZtVE5oZkw5ZWFjXzhCTmxXQzNFTFgxRXVYY3J3YkxnbnlBSm9PY3h4MlM1NVFQbVNDRW5Ld1dvNWMxSmdoTXJuaE1pT2VFeXYwWXBHZ29MZDVlN2lwUUNIeGNCVVdQVi1rRXdJMWFncUlPTXR0MmZVQ1l0d09mZTdzWGFBWUJMUFd3b0RSOU8zeER2UWpNdzAxS0ZJWnB5S3FJdU9wUDJnTTNwMWw3VFVqVXQ3ZGZnU1RkUktkc0NhUHJ0SGFxZ0lVWDEzYjNtU2JfMGNWM1Y0dHlCTzNESEdENC1jUWF5MVppRzR1QlBNSUJySjFfRi1ENHEwcmJ4S3hQUFpXVHA0TG9DZWdoUlo5WnNSM1lCZm1KbEs2ak1yUUU4Wk9JcVJGUkJwc0NvUkMyTjhoTWxtZmVQeDREZVRKZkhYN2duLVNTeGZzdFdBVnhEandJSXB5QjM0azF0ckI3Tk1wSzFhNGVOUVRrNjU0cG9JQ29pN09xOFkwR1lMTlktaGp4TktxdTVtTnNEcldsV2pEZm5nQWpJc2hxY0hjQnVSWUR5VVdaUXBHWUloTzFZUC1oNzJ4UjZ1dnpLcDJxWEZtQlNIMWkzZ0hXWXdKeC1iLXdZWVJhcU04VFlpMU5pd2ZIdTdCdkVWVFVBdmJuRk16bEFFQTh4alBrcTV2RzliT2hGdTVPOXlRMjFuZktiRTZIamQ1VFVqS0hRTXhxcU1mdkgyQ1NjQmZfcjl4c3NJd0RIeDVMZUFBbHJqdEJxWWl3aWdGUEQxR3ZnMkNGdVB4RUxkZi1xOVlFQXh1NjRfbkFEaEJ5TVZlUGFrWVhSTVRPeGxqNlJDTHNsRWRrei1pYjhnUmZrb3BvWkQ2QXBzYjFHNXZoWU1LSExhLWtlYlJTZlJmYUM5Y1Rhb1pkMVYyWTByM3NTS0VXMG1ybm1BTVN2QXRYaXZqX2dKSkZrajZSS2cyVlNOQnd5Y29zMlVyaWlNbTJEb3FuUFFtbWNTNVpZTktUenFZSl91cVFXZjRkQUZyYmtPczU2S1RKQ19ONGFOTHlwX2hOOEE1UHZEVjhnT0xxRjMxTEE4SHhRbmlmTkZwVXJBdlJDbU5oZS05SzI4QVhEWDZaN2ZiSlFwUGRXSnB5TE9MZV9ia3pYcmZVa1dicG5FMHRXUFZXMWJQVDAwOEdDQzJmZEl0ZDhUOEFpZXZWWXl5Q2xwSmFienNCMldlb2NKb2ZRYV9KbUdHRzNUcjU1VUFhMzk1a2J6dDVuNTl6NTdpM0hGa3k0UWVtbF9pdDVsQVp2cndDLUU5dnNYOF9CLS0ySXhBSFdCSnpqV010bllBb3U0cEZZYVF5R2tSNFM5NlRhdS1fb1NqbDBKMkw0V2N0VEZhNExtQlR3ckZ3cVlCeHVXdXJ6X0s4cEtsaG5rVUxCN2RRbHQxTmcyVFBqYUxyOHJzeFBXVUJaRHpXbUoxdHZzMFBzQk1UTUFvX1pGNFNMNDFvZWdTdEUtMUNKMXNIeVlvQk1CeEdpZVdmN0tsSDVZZHJXSGt5c2o2MHdwSTZIMVBhRzM1eU43Q2FtcVNidExxczNJeUx5U2RuUG5EeHpCTlg2SV9WNk1ET3BRNXFuc0pNWlVvZUYtY21oRGtJSmwxQ09QbHBUV3BuS3B5NE9RVkhfellqZjJUQ0diSV94QlhQWmdaaC1TRWxsMUVWSXB0aE1McFZDZDNwQUVKZ2t5cXRTXzlRZVJwN0pZSnJSV21XMlh0TzFRVEl0c2I4QjBxOGRCYkNxek04a011X1lrb2poQ3h2LUhKTGJiUlhneHp5QWFBcE5nMElkNTVzM3JGOWtUQ19wNVBTaVVHUHFDNFJnNXJaWDNBSkMwbi1WbTdtSnFySkhNQl9ZQjZrR2xDcXhTRExhMmNHcGlyWjR3ZU9SSjRZd1l4ZjVPeHNiYk53SW5SYnZPTzNkd1lnZmFseV9tQ3BxM3lNYVBHT0J0elJnMTByZ3VHemxta0tVQzZZRllmQ2VLZ1ZCNDhUUTc3LWNCZXBMekFwWW1fQkQ1NktzNGFMYUdYTU0xbXprY1FONUNlUHNMY3h2NFJMMmhNa3VNdzF4TVFWQk9odnJUMjFJMVd3Z2N6Sms5aEM2SWlWZFViZ0JWTEpUWWM5NmIzOS1oQmRqdkt1NUUycFlVcUxERUZGbnZqTUxIYnJmMDBHZDEzbnJsWEEzSUo3UmNPUDg1dnRUU1FzcWtjTWZwUG9zM0JTY3RqMDdST2UxcXFTM0d0bGkwdFhnMk5LaUlxNWx3V1pLaVlLUFJXZzBzVl9Ia1V1OHdYUEFWOU50UndycGtCdzM0Q0NQamp2VTNqbFBLaGhsbUk5dUI5MjU5OHVySk1oY0drUWtXUloyVVRvOWJmbUVYRzFVeWNQczh2NXJCeVppRlZiWDNJaDhOSmRmX2lURTNVS3NXQXFZT1QtUmdvMWJoVWYxU3lqUUJhbzEyX3I3TXhwbm9wc1FoQ1ZUTlNBRjMyQTBTY2tzbHZ3RFUtTjVxQ0o1QXRTVks2WENwMGZCRGstNU1jN3FhUFJCQThyaFhhMVRsbnlSRXNGRmt3Yk01X21ldmV3bTItWm1JaGpZQWZROEFtT1d1UUtPQlhYVVFqT2NxLUxQenJHX3JfMEdscDRiMXcyZ1ZmU3NFMzVoelZJaDlvT0ZoRGQ2bmtlM0M5ZHlCd2ZMbnRZRkZUWHVBUEx4czNfTmtMckh5eXZrZFBzOEItOGRYOEhsMzBhZ0xlOWFjZzgteVBsdnpPT1pYdUxnbFNXYnhKaVB6QUxVdUJCOFpvU2x2c1FHZV94MDBOVWJhYkxISkswc0U5UmdPWFJLXzZNYklHTjN1QzRKaldKdEVHb0pOU284N3c2LXZGMGVleEZ5NGZ6OGV1dm1tM0J0aTQ3VFlNOEJrdEh3PT0= + +# Feature SyncDelta JIRA configuration +Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkTUNsWm4wX0p6eXFDZmJ4dFdHNEs1MV9MUzdrb3RzeC1jVWVYZ0REWHRyZkFiaGZLcUQtTXFBZzZkNzRmQ0gxbEhGbUNlVVFfR1JEQTc0aldkZkgyWnBOcjdlUlZxR0tDTEdKRExULXAyUEtsVmNTMkRKU1BJNnFiM0hlMXo4YndMcHlRMExtZDQ3Zm9vNFhMcEZCcHpBPT0= + +# Debug Configuration +APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE +APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat \ No newline at end of file diff --git a/env_prod.env b/env_prod.env index 8056a702..201bfd54 100644 --- a/env_prod.env +++ b/env_prod.env @@ -3,31 +3,34 @@ # System Configuration APP_ENV_TYPE = prod APP_ENV_LABEL = Production Instance +APP_KEY_SYSVAR = CONFIG_KEY +APP_INIT_PASS_ADMIN_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pSXoyVEVwNDZ6cmthQTROUkxGUjh1UWF2UU5zaWRuX3p2aHJCVFo2NEstR0RqdnQ5clZmeVliRlhHZGFHTlhZV2dzMmRPZFVEemVlSHd5VHR3cmpNUXRaRlhZSFZ6d1dsX2Y5Zl9lOXdYdEU9 +APP_INIT_PASS_EVENT_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5peGNMWExjWGZxQ2VndXVOSUVGcWhQTWd0N3d0blU3bGJvNjgzNVVNNktCQnZlTEtVckV5RUtQMjMwRTBkdmxEMlZwX0k1M1hlOFFNY3hjaWsyd2JmRGl2UWxfSXEwenVnQ3NmaTlxckp2VXM9 APP_API_URL = https://gateway-prod.poweron-center.net # PostgreSQL Storage (new) DB_APP_HOST=gateway-prod-server.postgres.database.azure.com DB_APP_DATABASE=poweron_app DB_APP_USER=gzxxmcrdhn -DB_APP_PASSWORD_SECRET=prod_password_very_secure.2025 +DB_APP_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pVmtwYWZQakdWZnJPamVlRWJPa0tnc3daSVVHejVrQ0x1VFZZbHhVSkk0S2tFWl92T2NwWURBMU9UbFROMHZ2TkNKZFlEWjhJZDZ0bnFndC1oYjhNRW1VLWpEYnlDNEJwcGVKckpUVlp6YTg9 DB_APP_PORT=5432 # PostgreSQL Storage (new) DB_CHAT_HOST=gateway-prod-server.postgres.database.azure.com DB_CHAT_DATABASE=poweron_chat DB_CHAT_USER=gzxxmcrdhn -DB_CHAT_PASSWORD_SECRET=prod_password_very_secure.2025 +DB_CHAT_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pZVZnTzBPTDY1Q3c2U1pDV0lxbXhoWnlYSXRDWVhIeGJwSkdNMzMxR2h5a1FRN00xcWtYUE4ySGpqRllSaGM5SmRZZk9Bd2trVDJNZDdWcEFIbTJtel91MHpsazlTQnRsV2docGdBc0RVeEU9 DB_CHAT_PORT=5432 # PostgreSQL Storage (new) DB_MANAGEMENT_HOST=gateway-prod-server.postgres.database.azure.com DB_MANAGEMENT_DATABASE=poweron_management DB_MANAGEMENT_USER=gzxxmcrdhn -DB_MANAGEMENT_PASSWORD_SECRET=prod_password_very_secure.2025 +DB_MANAGEMENT_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pQXdaRnVEQUx2MmU5ck9XZzNfaGVoRXlYMlVjSVM5dWNTekhmR2VYNkd6WVhELUlkLWdFWWRWQ1JJLWZ4WUNwclZVRlg3ZHBCS0xwM1laNklTaEs1czFDRTMxYlV2TWNueEJlTHFyNEt4aVk9 DB_MANAGEMENT_PORT=5432 # Security Configuration -APP_JWT_SECRET_SECRET=rotated_jwt_secret_2025_09_17_prod_e1a9c4d7-6b8f-4f2e-9c1a-7e3d2a1b9c5f +APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pY3JfX1R3cEJhTjAzZGx2amtRSE4yVzZhMmY3a3FHam9BdzBxVWd5R0FRSW1KbmNGS3JDMktKTWptZm4wYmZZZTVDQkh3NVlxSW1MZEdiVWdORng4dm0xV08wZDh0YlBNQTdEbmlnVWduMzNWY1RPX1BqaGtnOTc2ZWNBTnNnd1AtaTNRUExpRThVdzNmdVFHM2hkTjFjcW0ya2szMWNaT3VDeDhXMlJ1NDM4PQ== APP_TOKEN_EXPIRY=300 # CORS Configuration @@ -35,7 +38,7 @@ APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net, # Logging configuration APP_LOGGING_LOG_LEVEL = DEBUG -APP_LOGGING_LOG_FILE = /home/site/wwwroot/poweron.log +APP_LOGGING_LOG_DIR = /home/site/wwwroot/ APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S APP_LOGGING_CONSOLE_ENABLED = True @@ -46,3 +49,28 @@ APP_LOGGING_BACKUP_COUNT = 5 # Service Redirects Service_MSFT_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/msft/auth/callback Service_GOOGLE_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/google/auth/callback + +# AI configuration +Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pU05XM2hMaExPMnpYeFpwRVhyYl9JZmRITmlmRDlWOUJSSWE4NTFLZUptSkJhNlEycHBLZmh3WFA2ZmU5VmxHZks1UUNVOUZnckZNdXZ2MTY2dFg1Nl8yWDRrcTRlT0tHYkhyRGZINTEzU25iYVFRMzJGeUZIdlc4LU9GbmpQYmtmU3lJT2VVZ1UzLVd3R25ZQ092SUVnPT0= +Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pNTA1RkZ3UllCOXVsNVZzbkw2Rkl1TWxCZ0wwWEVXUm9ReUhBcVl1cGFUdW9FRVh4elVxR0x3NVRxZkc4SkxHVFdzSU1YNG5Rb0FqSHJhdElwWm1iLWdubTVDcUl3UkVjVHNoU0xLa0ZTSFlfTlJUVXg4cVVwUWdlVDBTSFU5SnBzS0ZnVjlQcmtiNzV2UTNMck1IakZ0OWlubUtlWDZnMk4yX2JsZ1U4Wm1yT29fM2d2NVBNOWNBbWtTRWNyQ2tZNjhwSVF6bG5SU3dTenR2MzA3Z19NUT09 +Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQm82Mzk2Q1FGRkJEUkI4LXlQbHYzT2RkdVJEcmM4WGdZTWpJTEhoeUF1NW5LUVpJdDBYN3k1WFN4a2FQSWJSQmd0U0xJbzZDTmFFN05FcXl0Z3V1OEpsZjYydV94TXVjVjVXRTRYSWdLMkd5XzZIbFV6emRCZHpuOUpQeThadE5xcDNDVGV1RHJrUEN0c1BBYXctZFNWcFRuVXhRPT0= +Connector_AiTavily_API_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pMjhJNS1CZFJubUlkN3ZrTUoxR0Y1QzJFWEJSMk0wQkI0UndqOW1UelVieWhGaTVBcHoxRXo1VjRzVVRROHFIeHMyS3Q5cDZCeUlEMzE1ZlhVTmNveFk5VmFQMm80NTRyVW1TZHVsR3dUN0RtMnd4LW1VWlpqOXJPeXZBTmg4OEM= + +# Agent Mail configuration +Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c +Service_MSFT_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQm83T29rSzdYLTRydXN5V3lQLXhmRjMyQ1FOaGpuek45QllaX1REN2s5aWNIUl81NGlrYlJTeFV0RlRZd0xPcm5uMDM4QlpibHJQbm5XZTlWeWxfcWNVdFpCUHI2amh0MVBnZ21IN2ptSkhWLTVfaHEwNmI5SEtiS05pQmt5eV8yMnhLMEc= +Service_MSFT_TENANT_ID = common + +# Google Service configuration +Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com +Service_GOOGLE_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pV2JEV0lNUXhwa1VTUGh2RWcyYnJHSFQyTmdBOEhwRkJWc3MwOFZlcHJGUmlGOVVFbG1XalNyUXVuaExESy1xeFNIQlRiSFVIWTB6Rm1fNFg0OHZZSkF4ZlBIcFZDMjZHcFRERXJ0WlVFclhHa29Za1BqWGxsM05NZGFRc1BLZnE= + +# Google Cloud Speech Services configuration +Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pNjlJdmFMeERXUUQzR0duRUY4cGRZRzdwQlpnVFAzSzQ5cHZNRnVUZ0xWd3dQMHR3QjVsdF92NmdUQlJGRk1RcG1RYWZzcE9RbEhjQmR5Yk5Ud3ZKTW5jbmpEVGJ2ZkxVeVJpcUxaT2lNREFXaks5WHg5aVlHcXlUZldMdnZGYklHWjlJOWJ6Wm5RSkNmdm5feENjS1E0QUVXTTE5SW5sNFBEeTJ1RjRmVm9SQUNIYmF2U1U2dklsbTVlWFpCcHMwTFF1SUg5NmNfcWhQRFlpeWt0U19HMXNuUHd2RFdrVl9XdUFaY0hWdVBPYWlybU1CdGlCN1A0RzZBbi1IUVJ1TWMxTE9Ea09sTURhcDFZb1JIUW1zUFJybW15MDcxOUtfVXA2N0xwMnFrczA1YTJaN05pRHhOYWNzMjVmUHdhbVdlemF3TEIzN0pJaVo3bGJBMXJnZmNYTXVJVDdmYkRXWTlBT2F2NmN4eTlteUI1SlJTOXc2WWFWUTBCZTJBVHRLVDhEVjBFeHE0Nmk1YkxYd3N3RXgtVUdGdlZFSmk4dHM0QjFmbktsQTctbmJMT0MtMDlKS1pUR0pELXBxckhULUUycjlBZmVJQjFrM0xEUm50U2ZabExtVjZ1WWZ1WnlobUZIOVlndjNydUZfczJUWVVRZURTd1lYazllaER4VU10cXUyVS1ZNG9Ha2hnbTAzOEpGMklFSWpWeVV5eFB2UlVWYmJJakZnOVM2R2lJSXRSM3VzVEZZNUVpNmVjRzdXRUJsT2hzcjhZWERFeGV5c1dFQVM3dkhGY2Q3ckNBRDZCcVdhZnZkdzM3QVNpODZYWE81TEIyZGUycldkSVRvbm5hR3Jib2UzOEtXdUpHQ2FyWDQtMDdQbC1ycEdfUzdXd0U2dHFIVjhoRDJ0YkNsWUpva1dzOGNPdXRpZjVwUldtT3FVN3RrZUhTN3JfX1M3LU9PaXZELWkzRmtMbjgxZGZ6ZjVJNW9RZW1nM2hqUXo4Z2I5Z2tSVTVMdUNLblRxOGQ1Y3F4SGZIbWo4YkFBV3FIbjB6LUxGNHdsQWgxQUM4bzVrblBObFFfVWNaQ3QwejQ1eGFlSXVIcXlyVEZEdzVKNV9pd2o4RW1UVjlqb3VMWnF0V1JTcWF1R0RjdUNjM2lLUHRqZDl2WWtXUnhmbVdxeHA3REFHTkdkMjM4LTllajBWQnd3RHlFSVdiUThfQnduOVFJdmR6OUVGN1lOYjBqclhadHozX21kRzlUT2EtWVBkYWFRSjRGdW80dmlEUTVrVjhWbjJYNGtCeGNtNzRHQXJsRlZyWjBYdHltVDM2MV9IT0RFT2dLLTVBREtsS09HdUxrODRLcEQ1TmRoVDh6WmgybGc5MzgtbmJSYThQd3FFaUcxbmg3eE95RkJVX2hHM20wT1k2c21qd24wSkFWNGROaklQeHZrc21PdTVsdHVxR0pxd3Ztb1NQVHEtd25URHRNa1pqa3BLdVdkTnNFeDNManJST0dOb1RWM2hqekxFTlFSZkd6TlZBY1VQT1NFOVlDQzlPQWVlVXQ4MW0wdGkzd0Myam1lSWE2aEtVVTVNc3N3dENpa1BWRl9ZQ3daYllONWRmRUF0THpleFRmdWRqTFM2aldmLUFuZzFGdkFQNHR6d21SdzRGQ0Q4cU8yV0xGUTVUY01TZlYxSzZ4cmtfUGZvVDhmYmNBX1pibTVTcl9lenJoME9KSnBucUxPRU1PRXBmLWFENEgwRWZOU0RvRDlvQk9ueVp0dXJrUVgtQUk5VldVbV9MS19PYmlua3liWl80Z2hMcFRnTXBkZDA3enIxRWFzaU56TEZKa0hPQUtNY0dCY1pnQ2V3Zml6ZFczWFBESUlLd3BSVEs5ZXlGLUpINDRsd1NBVjBkR1dvbE8wLWZBeEhFQ0hvY3E5UGJsTDdteGdSRjBIZTRobXpsd29PMmhKQkxXY3Znd2FMdWtZU1VkQlVRZXlSZ3FaVnNqcXpwR3N3SktOTDA3aUZIcE9TR1VDcXdaTDhQX2E5VDlwckoyX0xlNmFQcnoydEkwc0s1S08yaVlsM0pwYktUVWl3LU5hQzF2UVZNSm9ZR3QyQWdrUXB2a25QNzhkVEFOYmZ0b1BmTXRCMmVQZTAtYzdOeUlBYlNINlZNZW1nUTFfSV92UlJiWGt6Qms1c1hBc3kzZkVRMzEwNVJDOS1JeVg4YWtVeUJyOTZPQ0FnSUs1Z25sMlY0S1V1c0dIWEpuX2pMQmZ4Z29SY1U0bVZscXNWcjJwRy1UZEFYSXBzQURGblRTelBybU5BeDF6N3hZLXZwSHBkMmlzbHZWN2JkU3hRcE0zQ0hna3QwYWlJX3hBdGcxUHdGRE55cndUNHRvbXU5VTRMRmZDRjhvXzIwajI1Y0RCcmR2OV94cS1XYkNwalNHS2lObHlkNGZBbklycnZMSlJYVnlfakRXb1ZfWUo2MGxzYUNIektYeENGTkUzMUJXRE9WRHRrY2o5UFJHckZza2RQbjNPUkstbG9GZG4yNmxKeEdtbHo4WDZFc0lvT01wZkxuN29ycXl3X1hTN1prRGdvWG9hRFYwNzBwVVpuMW0wQlZYbGZxZjFQUHp2XzBQT3Fqa3lzejVKZmJDMG0wRzhqWV9HY1dxaXB2VFNQUzV2LUJSOXRFRUllak83cUI3RGUtYVBJakF1YUVOV0otT1BxUHJqS0NLdFVHc0tsT2RGcWd6UTU4Yi1kc0JZS1VPT1NXSlc3TDM5ZDVEZlRDOURZU1hMT0YxZ25ndVBUaG1VcGsxWFZSS1RxT1ZZTU1vclZjVU5iYmZMd0VBTXlvdTE0YjdoclZ6ZnNKMmE2Yy1ORmNCMnJNX3dwcVJSN2RSd2d6aENLRXQyTjhkcDlLTFVZMHBydFowNTJoZm1mVHNRVHI1YjhTNnl1Vll4dFZhenZfa0dybk9KYVh6LUluSUo0djUzRFNEdzBoVGt5UU9tMlg5UnBLbk9WaEhoU2txY2tUSXJmemlmNEExb3Q1blI5bE9adHluWVI3NXZQNUtXdmpra05aNy15dTBXdlVqcXhteFVqSXFxNnlQR2FGeVNONkx3NVpQUk1FNk5yTUY4T1hQV1FCdm9PYzdFTGl4QXZkODltSlprbGJ6cWREcEM1VlNwN3V5aWdWYXNkekk4X3U0cjJjZ1k2X190cmNnMlpMQVlLdExxM3pFNkZudVFKci1CalE1U3kzdmotQ01LV0ZzWnp0VUxRblhkdlN6VG1MWHNQdGlrNmF4RnFtd0c3UXNqZFVRZTRFMGl1NFU5T2k3VEpjZXA1U052VkJtdUhDWEpTaDRGQnM0SDQwY2IxdDVNbUtELTQ0R0s0OHpfTHdFOHZ0VmRMTC1FUVpPSkJ4QXRWNnl5MURUdjVyUk53emRwbDBxUnloUmlheXhKY3RBUG1mX3JxM2w0VlZvcE40b2ROeG15NS01RFlvUHdoYllLNVhCZUNEd0dwQnFCLVdZU0RhVEFzR2gxTVpub3FGRnl4VDNiSVZrTnpMQUlxeGJGQzh5WlNZR2NKbklHRVRTaVJ2REduN0hXaGo5MHFGb1FOa0U5TUFwQ09zOXVWMnRRNVlJWmZpaTUxLWFIeWR0UEFtaVNDX1k5Q1p3Y2V4ckVXQVBRYzV1eGwwMWd0SE15WUxiYzUyLTUzTGlyTUhZUDFlRTFjcFpieWQwU0pxRWJXSE53Nkd5aHp5T28wZVd6Z1phLTQ4TmgxU3hvNHpySzExUk5WZlFFS3VpOXNHMDdZU0gzSGxYUlU4WmgwNUlPdlhQcUI0cGtITmQ4SlByczN0THUxNHc0a21vUEp6S1hLNnFRNmFfdlpmUWpJQ1VNYXVEOW1abzlsd2RoRG5pVXRVbjBKV2RFTGFEa3ZYTHByOTJjalc1b3hTWkFmS2RPdVlTUTVkRkpSTnZsMWtnYWZEUm1SR3lBemdON2xiN3pkZlNfX2NSYU5wWHNybHh4V0lnNHJjQ2NON1hiRHMycUdmNC1kay13bUE0OTBPN0xmNDA1NlQxVmRySEJvM1VUN2Y2Sl9KX2pZVHRPWEdfR2RYNUoxY01Va3pXb2VBd3lZb3BSXzU5NVJfWlhEYXFSVDJrUnFHWG42RVZJUVQ2RlJWUEkyQnRnREI3eHNiRERiQ3FUczJsRTBDZ3pUUGZPcjExZUFKc21QUWxVYVBmV2hPZXRGd3lJX3ZTczhCVG1jWFVwanhIZHlyTTdiR2c5cTBVSXBRV1U4ZExtWWdub1pTSHU0cU5aYWJVWmExbXI0MjE3WUVnPT0= + +# Feature SyncDelta JIRA configuration +Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pTDhnTVNzRUhScU8wYnZsZk52bHFkSWxLc18xQmtCeC1HbnNwTzVBbXRNTmQzRjZYaGE2MVlCNGtnWDk1T2I5VXVKNHpKU1VRbXEyN2tRWUJnU2ltZE5qZ3lmNEF6Z1hMTTEwZkk2NUNBYjhmVTJEcWpRUW9HNEVpSGFWdjBWQXQ3eUtHUTFJS3U5QWpaeno0RFNhMUxnPT0= + +# Debug Configuration +APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE +APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat \ No newline at end of file diff --git a/modules/aicore/aicoreBase.py b/modules/aicore/aicoreBase.py new file mode 100644 index 00000000..848590fc --- /dev/null +++ b/modules/aicore/aicoreBase.py @@ -0,0 +1,102 @@ +""" +Base connector interface for AI connectors. +All AI connectors should inherit from this class. + +IMPORTANT: Model Registration Requirements +- Each model must have a unique displayName across all connectors +- The displayName is used as the unique identifier in the model registry +- The name field is used for API calls (can be duplicated across different model instances) +- If duplicate displayNames are detected during registration, an error will be raised +""" + +from abc import ABC, abstractmethod +from typing import List, Dict, Any, Optional +from modules.datamodels.datamodelAi import AiModel + + +class BaseConnectorAi(ABC): + """ + Base class for all AI connectors. + + IMPORTANT: Models returned by getModels() must have unique displayName values. + The displayName serves as the unique identifier in the model registry. + Duplicate displayNames will cause registration to fail with an error. + """ + + def __init__(self): + self._models_cache: Optional[List[AiModel]] = None + self._last_cache_update: Optional[float] = None + self._cache_ttl: float = 300.0 # 5 minutes cache TTL + + @abstractmethod + def getModels(self) -> List[AiModel]: + """ + Get all available models for this connector. + Should be implemented by each connector. + + IMPORTANT: Each model's displayName must be unique across all connectors. + If multiple models share the same API name (e.g., "gpt-4o"), they must have + different displayNames (e.g., "OpenAI GPT-4o" vs "OpenAI GPT-4o Instance Vision"). + """ + pass + + @abstractmethod + def getConnectorType(self) -> str: + """ + Get the connector type identifier. + Should return one of: openai, anthropic, perplexity, tavily + """ + pass + + def getCachedModels(self) -> List[AiModel]: + """ + Get cached models with TTL check. + Returns cached models if still valid, otherwise refreshes cache. + """ + import time + + current_time = time.time() + + # Check if cache is valid + if (self._models_cache is not None and + self._last_cache_update is not None and + current_time - self._last_cache_update < self._cache_ttl): + return self._models_cache + + # Refresh cache + self._models_cache = self.getModels() + self._last_cache_update = current_time + + return self._models_cache + + def clearCache(self): + """Clear the models cache.""" + self._models_cache = None + self._last_cache_update = None + + def getModelByDisplayName(self, displayName: str) -> Optional[AiModel]: + """Get a specific model by displayName (displayName must be unique).""" + models = self.getCachedModels() + for model in models: + if model.displayName == displayName: + return model + return None + + def getModelByName(self, name: str) -> Optional[AiModel]: + """Get a specific model by name (API name). Note: name can be duplicated, returns first match.""" + models = self.getCachedModels() + for model in models: + if model.name == name: + return model + return None + + + def getModelsByPriority(self, priority: str) -> List[AiModel]: + """Get models that have a specific priority.""" + models = self.getCachedModels() + return [model for model in models if model.priority == priority] + + def getAvailableModels(self) -> List[AiModel]: + """Get only available models.""" + models = self.getCachedModels() + return [model for model in models if model.isAvailable] diff --git a/modules/aicore/aicoreModelRegistry.py b/modules/aicore/aicoreModelRegistry.py new file mode 100644 index 00000000..54027a26 --- /dev/null +++ b/modules/aicore/aicoreModelRegistry.py @@ -0,0 +1,202 @@ +""" +Dynamic model registry that collects models from all AI connectors. +Implements plugin-like architecture for connector discovery. +""" + +import logging +import importlib +import os +from typing import Dict, List, Optional, Any +from modules.datamodels.datamodelAi import AiModel +from modules.aicore.aicoreBase import BaseConnectorAi + +logger = logging.getLogger(__name__) + + +class ModelRegistry: + """Dynamic registry for AI models from all connectors.""" + + def __init__(self): + self._models: Dict[str, AiModel] = {} + self._connectors: Dict[str, BaseConnectorAi] = {} + self._lastRefresh: Optional[float] = None + self._refreshInterval: float = 300.0 # 5 minutes + + def registerConnector(self, connector: BaseConnectorAi): + """Register a connector and collect its models.""" + connectorType = connector.getConnectorType() + + # If connector already registered, skip re-registration to avoid duplicate models + if connectorType in self._connectors: + logger.debug(f"Connector {connectorType} already registered, skipping re-registration") + return + + self._connectors[connectorType] = connector + + # Collect models from this connector + try: + models = connector.getCachedModels() + for model in models: + # Validate displayName uniqueness + if model.displayName in self._models: + existingModel = self._models[model.displayName] + errorMsg = f"Duplicate displayName '{model.displayName}' detected! Existing model: displayName='{existingModel.displayName}', name='{existingModel.name}' (connector: {existingModel.connectorType}), New model: displayName='{model.displayName}', name='{model.name}' (connector: {connectorType}). displayName must be unique." + logger.error(errorMsg) + raise ValueError(errorMsg) + + # Use displayName as the key (must be unique) + self._models[model.displayName] = model + logger.debug(f"Registered model: {model.displayName} (name: {model.name}) from {connectorType}") + except Exception as e: + logger.error(f"Failed to register models from {connectorType}: {e}") + raise + + def discoverConnectors(self) -> List[BaseConnectorAi]: + """Auto-discover connectors by scanning aicorePlugin*.py files.""" + connectors = [] + connectorDir = os.path.dirname(__file__) + + # Scan for connector files + for filename in os.listdir(connectorDir): + if filename.startswith('aicorePlugin') and filename.endswith('.py'): + moduleName = filename[:-3] # Remove .py extension + + try: + # Import the module + module = importlib.import_module(f'modules.aicore.{moduleName}') + + # Find connector classes (classes that inherit from BaseConnectorAi) + for attrName in dir(module): + attr = getattr(module, attrName) + if (isinstance(attr, type) and + issubclass(attr, BaseConnectorAi) and + attr != BaseConnectorAi): + + # Instantiate the connector + connector = attr() + connectors.append(connector) + logger.info(f"Discovered connector: {connector.getConnectorType()}") + + except Exception as e: + logger.warning(f"Failed to discover connector from {filename}: {e}") + + return connectors + + def refreshModels(self, force: bool = False): + """Refresh models from all registered connectors.""" + import time + + currentTime = time.time() + + # Check if refresh is needed + if (not force and + self._lastRefresh is not None and + currentTime - self._lastRefresh < self._refreshInterval): + return + + logger.info("Refreshing model registry...") + + # Clear existing models + self._models.clear() + + # Re-register all connectors + for connector in self._connectors.values(): + try: + connector.clearCache() # Clear connector cache + models = connector.getCachedModels() + for model in models: + # Validate displayName uniqueness + if model.displayName in self._models: + existingModel = self._models[model.displayName] + errorMsg = f"Duplicate displayName '{model.displayName}' detected! Existing model: displayName='{existingModel.displayName}', name='{existingModel.name}' (connector: {existingModel.connectorType}), New model: displayName='{model.displayName}', name='{model.name}' (connector: {connector.getConnectorType()}). displayName must be unique." + logger.error(errorMsg) + raise ValueError(errorMsg) + + # Use displayName as the key (must be unique) + self._models[model.displayName] = model + except Exception as e: + logger.error(f"Failed to refresh models from {connector.getConnectorType()}: {e}") + raise + + self._lastRefresh = currentTime + logger.info(f"Model registry refreshed: {len(self._models)} models available") + + def getModel(self, displayName: str) -> Optional[AiModel]: + """Get a specific model by displayName (displayName must be unique).""" + self.refreshModels() + return self._models.get(displayName) + + def getModels(self) -> List[AiModel]: + """Get all available models.""" + self.refreshModels() + return list(self._models.values()) + + def getModelsByConnector(self, connectorType: str) -> List[AiModel]: + """Get models from a specific connector.""" + self.refreshModels() + return [model for model in self._models.values() if model.connectorType == connectorType] + + + def getModelsByPriority(self, priority: str) -> List[AiModel]: + """Get models that have a specific priority.""" + self.refreshModels() + return [model for model in self._models.values() if model.priority == priority] + + def getAvailableModels(self) -> List[AiModel]: + """Get only available models.""" + self.refreshModels() + allModels = list(self._models.values()) + availableModels = [model for model in allModels if model.isAvailable] + unavailableCount = len(allModels) - len(availableModels) + if unavailableCount > 0: + unavailableModels = [m.name for m in allModels if not m.isAvailable] + logger.debug(f"getAvailableModels: {len(availableModels)} available, {unavailableCount} unavailable. Unavailable: {unavailableModels}") + logger.debug(f"getAvailableModels: Returning {len(availableModels)} models: {[m.name for m in availableModels]}") + return availableModels + + def getConnectorForModel(self, displayName: str) -> Optional[BaseConnectorAi]: + """Get the connector instance for a specific model by displayName.""" + model = self.getModel(displayName) + if model: + return self._connectors.get(model.connectorType) + return None + + def getModelStats(self) -> Dict[str, Any]: + """Get statistics about the model registry.""" + self.refreshModels() + + stats = { + "totalModels": len(self._models), + "availableModels": len([m for m in self._models.values() if m.isAvailable]), + "connectors": len(self._connectors), + "byConnector": {}, + "byCapability": {}, + "byPriority": {} + } + + # Count by connector + for model in self._models.values(): + connector = model.connectorType + if connector not in stats["byConnector"]: + stats["byConnector"][connector] = 0 + stats["byConnector"][connector] += 1 + + # Count by capability + for model in self._models.values(): + for capability in model.capabilities: + if capability not in stats["byCapability"]: + stats["byCapability"][capability] = 0 + stats["byCapability"][capability] += 1 + + # Count by priority + for model in self._models.values(): + priority = model.priority + if priority not in stats["byPriority"]: + stats["byPriority"][priority] = 0 + stats["byPriority"][priority] += 1 + + return stats + + +# Global registry instance +modelRegistry = ModelRegistry() diff --git a/modules/aicore/aicoreModelSelector.py b/modules/aicore/aicoreModelSelector.py new file mode 100644 index 00000000..e1961fa0 --- /dev/null +++ b/modules/aicore/aicoreModelSelector.py @@ -0,0 +1,279 @@ +""" +Simplified model selection based on model properties and priority-based sorting. +No complex rules needed - just filter by properties and sort by priority! +""" + +import logging +from typing import List, Dict, Any, Optional +from modules.datamodels.datamodelAi import AiModel, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum + +# Configure logger +logger = logging.getLogger(__name__) + +class ModelSelector: + """Simple model selector based on properties and priority-based sorting.""" + + def __init__(self): + logger.info("ModelSelector initialized with simplified approach") + + def selectModel(self, + prompt: str, + context: str, + options: AiCallOptions, + availableModels: List[AiModel]) -> Optional[AiModel]: + """ + Select the best model using simple filtering and priority-based sorting. + + Args: + prompt: User prompt + context: Context data + options: AI call options + availableModels: List of available models + + Returns: + Best model for the request, or None if no suitable model found + """ + try: + # Get failover models (which includes all filtering and sorting) + failoverModelList = self.getFailoverModelList(prompt, context, options, availableModels) + + if not failoverModelList: + logger.warning("No suitable models found for the request") + return None + + selectedModel = failoverModelList[0] # First model is the best one + logger.info(f"Selected model: {selectedModel.name} (quality: {selectedModel.qualityRating}, cost: ${selectedModel.costPer1kTokensInput:.4f})") + return selectedModel + + except Exception as e: + logger.error(f"Error selecting model: {str(e)}") + return None + + def getFailoverModelList(self, + prompt: str, + context: str, + options: AiCallOptions, + availableModels: List[AiModel]) -> List[AiModel]: + """ + Get prioritized list of models using scoring-based ranking. + + Args: + prompt: User prompt + context: Context data + options: AI call options + availableModels: List of available models + + Returns: + List of models sorted by score (descending) + """ + try: + promptSize = len(prompt.encode("utf-8")) + contextSize = len(context.encode("utf-8")) + totalSize = promptSize + contextSize + # Convert bytes to approximate tokens (1 token ≈ 4 bytes) + promptTokens = promptSize / 4 + contextTokens = contextSize / 4 + totalTokens = totalSize / 4 + + logger.debug(f"Request sizes - Prompt: {promptTokens:.0f} tokens ({promptSize} bytes), Context: {contextTokens:.0f} tokens ({contextSize} bytes), Total: {totalTokens:.0f} tokens ({totalSize} bytes)") + + # Step 1: Filter by operation type (MUST match) - check if model has this operation type + operationFiltered = [] + for model in availableModels: + # Check if model has the required operation type + hasOperationType = any(ot.operationType == options.operationType for ot in model.operationTypes) + if hasOperationType: + operationFiltered.append(model) + logger.debug(f"After operation type filtering: {len(operationFiltered)} models") + + if operationFiltered: + logger.debug(f"Models with {options.operationType.value}: {[m.name for m in operationFiltered]}") + + # Step 2: Filter by prompt size (MUST be <= 80% of context size) + # Note: contextLength is in tokens, so we need to compare tokens with tokens + promptFiltered = [] + for model in operationFiltered: + if model.contextLength == 0: + # No context length limit - always pass + promptFiltered.append(model) + else: + maxAllowedTokens = model.contextLength * 0.8 + # Compare prompt tokens (not bytes) with model's token limit + if promptTokens <= maxAllowedTokens: + promptFiltered.append(model) + else: + logger.debug(f"Model {model.name} filtered out: promptSize={promptTokens:.0f} tokens > maxAllowed={maxAllowedTokens:.0f} tokens (80% of {model.contextLength} tokens)") + + logger.debug(f"After prompt size filtering: {len(promptFiltered)} models") + + if not promptFiltered and operationFiltered: + logger.warning(f"All {len(operationFiltered)} models with {options.operationType.value} were filtered out due to prompt size. Prompt: {promptTokens:.0f} tokens. Available models:") + for model in operationFiltered: + maxAllowed = model.contextLength * 0.8 / 4 if model.contextLength > 0 else "unlimited" + logger.warning(f" - {model.name}: contextLength={model.contextLength} tokens, maxAllowed={maxAllowed} tokens") + + # Step 3: Calculate scores for each model + scoredModels = [] + for model in promptFiltered: + score = self._calculateModelScore(model, promptSize, contextSize, totalSize, options) + scoredModels.append((model, score)) + logger.debug(f"Model {model.name}: score={score:.3f}") + + # Step 4: Sort by score (descending) + scoredModels.sort(key=lambda x: x[1], reverse=True) + sortedModels = [model for model, score in scoredModels] + + logger.debug(f"Final sorted models: {len(sortedModels)} models") + return sortedModels + + except Exception as e: + logger.error(f"Error getting failover models: {str(e)}") + return [] + + def _calculateModelScore(self, model: AiModel, promptSize: int, contextSize: int, totalSize: int, options: AiCallOptions) -> float: + """ + Calculate a score for a model based on how well it fulfills the criteria. + Operation type rating is the PRIMARY sorting criteria (multiplied by 1000). + + Args: + model: The model to score + promptSize: Size of the prompt in bytes + contextSize: Size of the context in bytes + totalSize: Total size (prompt + context) in bytes + options: AI call options + + Returns: + Score for the model (higher is better) + """ + score = 0.0 + + # 1. PRIMARY: Operation Type Rating (multiplied by 1000 for primary sorting) + operationTypeRating = self._getOperationTypeRating(model, options.operationType) + score += operationTypeRating * 1000.0 # Primary sorting criteria + + # 2. Prompt + Context size rating + if model.contextLength > 0: + modelMaxSize = model.contextLength * 0.8 # 80% of model context length + if totalSize <= modelMaxSize: + # Within limits: rating = (prompt+contextsize) / (80% modelsize) + score += totalSize / modelMaxSize + else: + # Exceeds limits: rating = modelsize / (prompt+contextsize) (ensures minimum chunks) + score += modelMaxSize / totalSize + else: + # No context length limit + score += 1.0 + + # 3. Processing Mode rating + if hasattr(options, 'processingMode') and options.processingMode: + score += self._getProcessingModeRating(model.processingMode, options.processingMode) + else: + score += 1.0 # No preference + + # 4. Priority rating + if hasattr(options, 'priority') and options.priority: + score += self._getPriorityRating(model, options.priority) + else: + score += 1.0 # No preference + + return score + + def _getOperationTypeRating(self, model: AiModel, operationType: OperationTypeEnum) -> float: + """ + Get the operation type rating for a model. + + Args: + model: The model to check + operationType: The operation type to get rating for + + Returns: + Rating (1-10) or 0 if model doesn't support this operation type + """ + for ot_rating in model.operationTypes: + if ot_rating.operationType == operationType: + return float(ot_rating.rating) + return 0.0 # Model doesn't support this operation type + + def _getProcessingModeRating(self, modelMode: ProcessingModeEnum, requestedMode: ProcessingModeEnum) -> float: + """Get processing mode rating based on compatibility.""" + if modelMode == requestedMode: + return 1.0 + + # Compatibility matrix + if requestedMode == ProcessingModeEnum.BASIC: + if modelMode == ProcessingModeEnum.ADVANCED: + return 0.5 + elif modelMode == ProcessingModeEnum.DETAILED: + return 0.2 + + elif requestedMode == ProcessingModeEnum.ADVANCED: + if modelMode == ProcessingModeEnum.BASIC: + return 0.2 + elif modelMode == ProcessingModeEnum.DETAILED: + return 0.5 + + elif requestedMode == ProcessingModeEnum.DETAILED: + if modelMode == ProcessingModeEnum.BASIC: + return 0.2 + elif modelMode == ProcessingModeEnum.ADVANCED: + return 0.5 + + return 0.0 # No compatibility + + def _getPriorityRating(self, model: AiModel, requestedPriority: PriorityEnum) -> float: + """Get priority rating based on model capabilities.""" + if requestedPriority == PriorityEnum.BALANCED: + return 1.0 + + elif requestedPriority == PriorityEnum.SPEED: + return model.speedRating / 10.0 + + elif requestedPriority == PriorityEnum.QUALITY: + return model.qualityRating / 10.0 + + elif requestedPriority == PriorityEnum.COST: + # Cost priority: cost gives 1, speed gives 0.5, quality gives 0.2 + # Lower cost is better, so we invert the cost rating + costRating = 1.0 - (model.costPer1kTokensInput / 0.1) # Normalize to 0-1 + costRating = max(0, costRating) # Ensure non-negative + + speedRating = model.speedRating / 10.0 * 0.5 + qualityRating = model.qualityRating / 10.0 * 0.2 + + return costRating + speedRating + qualityRating + + return 1.0 # Default + + def _getSizeRating(self, model: AiModel, totalSize: int) -> float: + """Get size rating for a model based on total input size.""" + if model.contextLength > 0: + modelMaxSize = model.contextLength * 0.8 # 80% of model context length + if totalSize <= modelMaxSize: + # Within limits: rating = (prompt+contextsize) / (80% modelsize) + return totalSize / modelMaxSize + else: + # Exceeds limits: rating = modelsize / (prompt+contextsize) (ensures minimum chunks) + return modelMaxSize / totalSize + else: + # No context length limit + return 1.0 + + + def _logModelDetails(self, model: AiModel): + """Log detailed information about a model.""" + logger.info(f"Model: {model.name}") + logger.info(f" Display Name: {model.displayName}") + logger.info(f" Connector: {model.connectorType}") + logger.info(f" Context Length: {model.contextLength}") + logger.info(f" Max Tokens: {model.maxTokens}") + logger.info(f" Quality Rating: {model.qualityRating}/10") + logger.info(f" Speed Rating: {model.speedRating}/10") + logger.info(f" Cost: ${model.costPer1kTokensInput:.4f}/1k tokens") + logger.info(f" Priority: {model.priority}") + logger.info(f" Processing Mode: {model.processingMode}") + operationTypesStr = ', '.join([f"{ot.operationType.value}({ot.rating})" for ot in model.operationTypes]) + logger.info(f" Operation Types: {operationTypesStr}") + + +# Global model selector instance +modelSelector = ModelSelector() \ No newline at end of file diff --git a/modules/aicore/aicorePluginAnthropic.py b/modules/aicore/aicorePluginAnthropic.py new file mode 100644 index 00000000..c26bdaf2 --- /dev/null +++ b/modules/aicore/aicorePluginAnthropic.py @@ -0,0 +1,376 @@ +import logging +import httpx +import os +from typing import Dict, Any, List +from fastapi import HTTPException +from modules.shared.configuration import APP_CONFIG +from modules.aicore.aicoreBase import BaseConnectorAi +from modules.datamodels.datamodelAi import AiModel, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse, createOperationTypeRatings + +# Configure logger +logger = logging.getLogger(__name__) + +def loadConfigData(): + """Load configuration data for Anthropic connector""" + return { + "apiKey": APP_CONFIG.get('Connector_AiAnthropic_API_SECRET'), + } + +class AiAnthropic(BaseConnectorAi): + """Connector for communication with the Anthropic API.""" + + def __init__(self): + super().__init__() + # Load configuration + self.config = loadConfigData() + self.apiKey = self.config["apiKey"] + + # HttpClient for API calls + self.httpClient = httpx.AsyncClient( + timeout=120.0, # Longer timeout for complex requests + headers={ + "x-api-key": self.apiKey, + "anthropic-version": "2023-06-01", # Anthropic API Version + "Content-Type": "application/json" + } + ) + + logger.info("Anthropic Connector initialized") + + def getConnectorType(self) -> str: + """Get the connector type identifier.""" + return "anthropic" + + def getModels(self) -> List[AiModel]: + """Get all available Anthropic models.""" + return [ + AiModel( + name="claude-sonnet-4-5-20250929", + displayName="Anthropic Claude Sonnet 4.5", + connectorType="anthropic", + apiUrl="https://api.anthropic.com/v1/messages", + temperature=0.2, + maxTokens=8192, + contextLength=200000, + costPer1kTokensInput=0.015, + costPer1kTokensOutput=0.075, + speedRating=6, # Slower due to high-quality processing + qualityRating=10, # Best quality available + # capabilities removed (not used in business logic) + functionCall=self.callAiBasic, + priority=PriorityEnum.QUALITY, + processingMode=ProcessingModeEnum.DETAILED, + operationTypes=createOperationTypeRatings( + (OperationTypeEnum.PLAN, 9), + (OperationTypeEnum.DATA_ANALYSE, 10), + (OperationTypeEnum.DATA_GENERATE, 9), + (OperationTypeEnum.DATA_EXTRACT, 8) + ), + version="claude-sonnet-4-5-20250929", + calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.015 + (bytesReceived / 4 / 1000) * 0.075 + ), + AiModel( + name="claude-sonnet-4-5-20250929", + displayName="Anthropic Claude Sonnet 4.5 Vision", + connectorType="anthropic", + apiUrl="https://api.anthropic.com/v1/messages", + temperature=0.2, + maxTokens=8192, + contextLength=200000, + costPer1kTokensInput=0.015, + costPer1kTokensOutput=0.075, + speedRating=6, + qualityRating=10, + functionCall=self.callAiImage, + priority=PriorityEnum.QUALITY, + processingMode=ProcessingModeEnum.DETAILED, + operationTypes=createOperationTypeRatings( + (OperationTypeEnum.IMAGE_ANALYSE, 10) + ), + version="claude-sonnet-4-5-20250929", + calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.015 + (bytesReceived / 4 / 1000) * 0.075 + ) + ] + + + async def callAiBasic(self, modelCall: AiModelCall) -> AiModelResponse: + """ + Calls the Anthropic API with the given messages using standardized pattern. + + Args: + modelCall: AiModelCall with messages and options + + Returns: + AiModelResponse with content and metadata + + Raises: + HTTPException: For errors in API communication + """ + try: + # Extract parameters from modelCall + messages = modelCall.messages + model = modelCall.model + options = modelCall.options + temperature = getattr(options, "temperature", None) + if temperature is None: + temperature = model.temperature + maxTokens = model.maxTokens + + # Transform OpenAI-style messages to Anthropic format: + # - Move any 'system' role content to top-level 'system' + # - Keep only 'user'/'assistant' messages in the list + system_contents: List[str] = [] + converted_messages: List[Dict[str, Any]] = [] + for m in messages: + role = m.get("role") + content = m.get("content", "") + if role == "system": + # Collect system content; Anthropic expects top-level 'system' + if isinstance(content, list): + # Join text parts if provided as blocks + joined = "\n\n".join( + [ + (part.get("text") if isinstance(part, dict) else str(part)) + for part in content + ] + ) + system_contents.append(joined) + else: + system_contents.append(str(content)) + continue + # For Anthropic, content can be a string; pass through strings, collapse blocks + if isinstance(content, list): + # Collapse to text if blocks are provided + collapsed = "\n\n".join( + [ + (part.get("text") if isinstance(part, dict) else str(part)) + for part in content + ] + ) + converted_messages.append({"role": role, "content": collapsed}) + else: + converted_messages.append({"role": role, "content": content}) + + system_prompt = "\n\n".join([s for s in system_contents if s]) if system_contents else None + + # Create Anthropic API payload + payload: Dict[str, Any] = { + "model": model.name, + "messages": converted_messages, + "temperature": temperature, + } + + # Anthropic requires max_tokens - use provided value or throw error + if maxTokens is None: + raise ValueError("maxTokens must be provided for Anthropic API calls") + payload["max_tokens"] = maxTokens + if system_prompt: + payload["system"] = system_prompt + + response = await self.httpClient.post( + model.apiUrl, + json=payload + ) + + if response.status_code != 200: + error_detail = f"Anthropic API error: {response.status_code} - {response.text}" + logger.error(error_detail) + + # Provide more specific error messages based on status code + if response.status_code == 529: + error_message = "Anthropic API is currently overloaded. Please try again in a few minutes." + elif response.status_code == 429: + error_message = "Rate limit exceeded. Please wait before making another request." + elif response.status_code == 401: + error_message = "Invalid API key. Please check your Anthropic API configuration." + elif response.status_code == 400: + error_message = f"Invalid request to Anthropic API: {response.text}" + else: + error_message = f"Anthropic API error ({response.status_code}): {response.text}" + + raise HTTPException(status_code=500, detail=error_message) + + # Parse response + anthropicResponse = response.json() + + # Extract content from response + content = "" + if "content" in anthropicResponse: + if isinstance(anthropicResponse["content"], list): + # Content is a list of parts (in newer API versions) + for part in anthropicResponse["content"]: + if part.get("type") == "text": + content += part.get("text", "") + else: + # Direct content as string (in older API versions) + content = anthropicResponse["content"] + + # Debug logging for empty responses + if not content or content.strip() == "": + logger.warning(f"Anthropic API returned empty content. Full response: {anthropicResponse}") + content = "[Anthropic API returned empty response]" + + # Return standardized response + return AiModelResponse( + content=content, + success=True, + modelId=model.name, + metadata={"response_id": anthropicResponse.get("id", "")} + ) + + except Exception as e: + error_msg = str(e) if str(e) else f"{type(e).__name__}" + error_detail = f"Error calling Anthropic API: {error_msg}" + if hasattr(e, 'detail') and e.detail: + error_detail += f" | Detail: {e.detail}" + if hasattr(e, 'status_code'): + error_detail += f" | Status: {e.status_code}" + logger.error(error_detail, exc_info=True) + raise HTTPException(status_code=500, detail=error_detail) + + async def callAiImage(self, modelCall: AiModelCall) -> AiModelResponse: + """ + Analyzes an image using Anthropic's vision capabilities using standardized pattern. + + Args: + modelCall: AiModelCall with messages and image data in options + + Returns: + AiModelResponse with analysis content + """ + try: + # Extract parameters from messages for Anthropic Vision API + messages = modelCall.messages + model = modelCall.model + + # Verify messages contain image data + if not messages or not messages[0].get("content"): + raise ValueError("No messages provided for image analysis") + + logger.info(f"callAiImage called with {len(messages)} message(s)...") + + # Extract text prompt and image data from messages + # Messages format: [{"role": "user", "content": [{"type": "text", "text": "..."}, {"type": "image_url", "image_url": {"url": "data:..."}}]}] + userContent = messages[0]["content"] + if not isinstance(userContent, list): + raise ValueError("Expected content to be a list for vision") + + textPrompt = "" + imageUrl = None + + for contentItem in userContent: + if contentItem.get("type") == "text": + textPrompt = contentItem.get("text", "") or "" + elif contentItem.get("type") == "image_url": + imageUrlDict = contentItem.get("image_url") + if imageUrlDict and isinstance(imageUrlDict, dict): + imageUrl = imageUrlDict.get("url", "") or "" + else: + imageUrl = None + + if not imageUrl or not imageUrl.startswith("data:"): + raise ValueError("No image data found in messages") + + # Extract base64 data and mime type from data URL + # Format: data:image/jpeg;base64,/9j/4AAQSkZ... + parts = imageUrl.split(";base64,") + if len(parts) != 2: + raise ValueError("Invalid image data URL format") + + mimeType = parts[0].replace("data:", "") + base64Data = parts[1] + + # Convert to Anthropic's vision format + anthropicMessages = [{ + "role": "user", + "content": [ + {"type": "text", "text": textPrompt}, + { + "type": "image", + "source": { + "type": "base64", + "media_type": mimeType, + "data": base64Data + } + } + ] + }] + + # Call Anthropic API directly for vision + import time + import base64 + + startTime = time.time() + + # Prepare system prompt if available + systemPrompt = None + for msg in messages: + if msg.get("role") == "system": + systemContent = msg.get("content") + if isinstance(systemContent, list): + textParts = [] + for item in systemContent: + if item.get("type") == "text": + textValue = item.get("text") + if textValue is not None: + textParts.append(str(textValue)) + if textParts: + systemPrompt = "\n".join(textParts) + elif systemContent is not None: + systemPrompt = str(systemContent) + break + + # Get parameters from model (consistent with callAiBasic) + maxTokens = model.maxTokens if hasattr(model, 'maxTokens') else 8192 + temperature = model.temperature if hasattr(model, 'temperature') else 0.2 + + # Prepare API payload + payload = { + "model": model.name, # Use standard model.name + "max_tokens": maxTokens, + "messages": anthropicMessages + } + + if systemPrompt: + payload["system"] = systemPrompt + + # Set temperature from model + payload["temperature"] = temperature + + # Make API call with headers from httpClient (which includes anthropic-version) + response = await self.httpClient.post( + "https://api.anthropic.com/v1/messages", + json=payload + ) + + if response.status_code != 200: + errorText = response.text + logger.error(f"Anthropic API error: {response.status_code} - {errorText}") + raise HTTPException(status_code=response.status_code, detail=f"Anthropic API error: {errorText}") + + # Parse response + result = response.json() + content = result["content"][0]["text"] if result.get("content") else "" + + endTime = time.time() + processingTime = endTime - startTime + + # Calculate cost + inputTokens = result.get("usage", {}).get("input_tokens", 0) + outputTokens = result.get("usage", {}).get("output_tokens", 0) + + # Return standardized response + return AiModelResponse( + content=content, + success=True, + modelId=model.name, + processingTime=processingTime + ) + + except Exception as e: + logger.error(f"Error during image analysis: {str(e)}", exc_info=True) + return AiModelResponse( + content="", + success=False, + error=f"Error during image analysis: {str(e)}" + ) \ No newline at end of file diff --git a/modules/aicore/aicorePluginInternal.py b/modules/aicore/aicorePluginInternal.py new file mode 100644 index 00000000..77f31511 --- /dev/null +++ b/modules/aicore/aicorePluginInternal.py @@ -0,0 +1,117 @@ +import logging +from typing import List +from modules.aicore.aicoreBase import BaseConnectorAi +from modules.datamodels.datamodelAi import AiModel, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse, createOperationTypeRatings + +# Configure logger +logger = logging.getLogger(__name__) + +class AiInternal(BaseConnectorAi): + """Internal connector for document processing, generation, and rendering.""" + + def __init__(self): + super().__init__() + logger.info("Internal Connector initialized") + + def getConnectorType(self) -> str: + """Get the connector type identifier.""" + return "internal" + + def getModels(self) -> List[AiModel]: + """Get all available internal models.""" + return [ + AiModel( + name="internal-extractor", + displayName="Internal Document Extractor", + connectorType="internal", + apiUrl="internal://extract", + temperature=0.0, # Not applicable for extraction + maxTokens=0, # Not token-based + contextLength=0, + costPer1kTokensInput=0.0, + costPer1kTokensOutput=0.0, + speedRating=9, # Very fast for internal operations + qualityRating=8, # Good quality + # capabilities removed (not used in business logic) + functionCall=self.extractDocument, + priority=PriorityEnum.COST, + processingMode=ProcessingModeEnum.BASIC, + operationTypes=createOperationTypeRatings(), + version="internal-extractor-v1", + calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: 0.001 + (bytesSent + bytesReceived) / (1024 * 1024) * 0.01 + ), + AiModel( + name="internal-generator", + displayName="Internal Document Generator", + connectorType="internal", + apiUrl="internal://generate", + temperature=0.0, # Not applicable for generation + maxTokens=0, # Not token-based + contextLength=0, + costPer1kTokensInput=0.0, + costPer1kTokensOutput=0.0, + speedRating=8, # Fast for generation + qualityRating=8, # Good quality + # capabilities removed (not used in business logic) + functionCall=self.generateDocument, + priority=PriorityEnum.COST, + processingMode=ProcessingModeEnum.BASIC, + operationTypes=createOperationTypeRatings(), + version="internal-generator-v1", + calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: 0.002 + (bytesReceived / (1024 * 1024)) * 0.005 + ), + AiModel( + name="internal-renderer", + displayName="Internal Document Renderer", + connectorType="internal", + apiUrl="internal://render", + temperature=0.0, # Not applicable for rendering + maxTokens=0, # Not token-based + contextLength=0, + costPer1kTokensInput=0.0, + costPer1kTokensOutput=0.0, + speedRating=7, # Good for rendering + qualityRating=9, # High quality rendering + # capabilities removed (not used in business logic) + functionCall=self.renderDocument, + priority=PriorityEnum.QUALITY, + processingMode=ProcessingModeEnum.DETAILED, + operationTypes=createOperationTypeRatings(), + version="internal-renderer-v1", + calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: 0.003 + (bytesReceived / (1024 * 1024)) * 0.008 + ) + ] + + async def extractDocument(self, modelCall: AiModelCall) -> AiModelResponse: + """ + NOP - we only need the model for price calculations + """ + logger.error(f"Document extraction not to call here") + return AiModelResponse( + content="", + success=False, + error="Internal connector should not be called directly" + ) + + async def generateDocument(self, modelCall: AiModelCall) -> AiModelResponse: + """ + NOP - we only need the model for price calculations + """ + logger.error(f"Document generation not to call here") + return AiModelResponse( + content="", + success=False, + error="Internal connector should not be called directly" + ) + + async def renderDocument(self, modelCall: AiModelCall) -> AiModelResponse: + """ + NOP - we only need the model for price calculations + """ + logger.error(f"Document rendering not to call here") + return AiModelResponse( + content="", + success=False, + error="Internal connector should not be called directly" + ) + diff --git a/modules/aicore/aicorePluginOpenai.py b/modules/aicore/aicorePluginOpenai.py new file mode 100644 index 00000000..7f7e3c70 --- /dev/null +++ b/modules/aicore/aicorePluginOpenai.py @@ -0,0 +1,388 @@ +import logging +import httpx +from typing import List +from fastapi import HTTPException +from modules.shared.configuration import APP_CONFIG +from modules.aicore.aicoreBase import BaseConnectorAi +from modules.datamodels.datamodelAi import AiModel, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse, createOperationTypeRatings + +# Configure logger +logger = logging.getLogger(__name__) + +class ContextLengthExceededException(Exception): + """Exception raised when the context length exceeds the model's limit""" + pass + +def loadConfigData(): + """Load configuration data for OpenAI connector""" + return { + "apiKey": APP_CONFIG.get('Connector_AiOpenai_API_SECRET'), + } + +class AiOpenai(BaseConnectorAi): + """Connector for communication with the OpenAI API.""" + + def __init__(self): + super().__init__() + # Load configuration + self.config = loadConfigData() + self.apiKey = self.config["apiKey"] + + # HttpClient for API calls + self.httpClient = httpx.AsyncClient( + timeout=120.0, # Longer timeout for complex requests + headers={ + "Authorization": f"Bearer {self.apiKey}", + "Content-Type": "application/json" + } + ) + logger.info("OpenAI Connector initialized") + + def getConnectorType(self) -> str: + """Get the connector type identifier.""" + return "openai" + + def getModels(self) -> List[AiModel]: + """Get all available OpenAI models.""" + return [ + AiModel( + name="gpt-4o", + displayName="OpenAI GPT-4o", + connectorType="openai", + apiUrl="https://api.openai.com/v1/chat/completions", + temperature=0.2, + maxTokens=16384, + contextLength=128000, + costPer1kTokensInput=0.03, + costPer1kTokensOutput=0.06, + speedRating=7, # Good speed for complex tasks + qualityRating=9, # High quality + # capabilities removed (not used in business logic) + functionCall=self.callAiBasic, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.ADVANCED, + operationTypes=createOperationTypeRatings( + (OperationTypeEnum.PLAN, 8), + (OperationTypeEnum.DATA_ANALYSE, 9), + (OperationTypeEnum.DATA_GENERATE, 9), + (OperationTypeEnum.DATA_EXTRACT, 7) + ), + version="gpt-4o", + calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.03 + (bytesReceived / 4 / 1000) * 0.06 + ), + AiModel( + name="gpt-3.5-turbo", + displayName="OpenAI GPT-3.5 Turbo", + connectorType="openai", + apiUrl="https://api.openai.com/v1/chat/completions", + temperature=0.2, + maxTokens=4096, + contextLength=16000, + costPer1kTokensInput=0.0015, + costPer1kTokensOutput=0.002, + speedRating=9, # Very fast + qualityRating=7, # Good but not premium + # capabilities removed (not used in business logic) + functionCall=self.callAiBasic, + priority=PriorityEnum.SPEED, + processingMode=ProcessingModeEnum.BASIC, + operationTypes=createOperationTypeRatings( + (OperationTypeEnum.PLAN, 7), + (OperationTypeEnum.DATA_ANALYSE, 8), + (OperationTypeEnum.DATA_GENERATE, 8) + # Note: GPT-3.5-turbo does NOT support vision/image operations + ), + version="gpt-3.5-turbo", + calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.0015 + (bytesReceived / 4 / 1000) * 0.002 + ), + AiModel( + name="gpt-4o", + displayName="OpenAI GPT-4o Instance Vision", + connectorType="openai", + apiUrl="https://api.openai.com/v1/chat/completions", + temperature=0.2, + maxTokens=16384, + contextLength=128000, + costPer1kTokensInput=0.03, + costPer1kTokensOutput=0.06, + speedRating=6, # Slower for vision tasks + qualityRating=9, # High quality vision + functionCall=self.callAiImage, + priority=PriorityEnum.QUALITY, + processingMode=ProcessingModeEnum.DETAILED, + operationTypes=createOperationTypeRatings( + (OperationTypeEnum.IMAGE_ANALYSE, 9) + ), + version="gpt-4o", + calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.03 + (bytesReceived / 4 / 1000) * 0.06 + ), + AiModel( + name="dall-e-3", + displayName="OpenAI DALL-E 3", + connectorType="openai", + apiUrl="https://api.openai.com/v1/images/generations", + temperature=0.0, # Image generation doesn't use temperature + maxTokens=0, # Image generation doesn't use tokens + contextLength=0, + costPer1kTokensInput=0.04, + costPer1kTokensOutput=0.0, + speedRating=5, # Slow for image generation + qualityRating=9, # High quality art generation + # capabilities removed (not used in business logic) + functionCall=self.generateImage, + priority=PriorityEnum.QUALITY, + processingMode=ProcessingModeEnum.DETAILED, + operationTypes=createOperationTypeRatings( + (OperationTypeEnum.IMAGE_GENERATE, 10) + ), + version="dall-e-3", + calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.04 + ) + ] + + async def callAiBasic(self, modelCall: AiModelCall) -> AiModelResponse: + """ + Calls the OpenAI API with the given messages using standardized pattern. + + Args: + modelCall: AiModelCall with messages and options + + Returns: + AiModelResponse with content and metadata + + Raises: + HTTPException: For errors in API communication + """ + try: + # Extract parameters from modelCall + messages = modelCall.messages + model = modelCall.model + options = modelCall.options + temperature = getattr(options, "temperature", None) + if temperature is None: + temperature = model.temperature + maxTokens = model.maxTokens + + payload = { + "model": model.name, + "messages": messages, + "temperature": temperature, + "max_tokens": maxTokens + } + + response = await self.httpClient.post( + model.apiUrl, + json=payload + ) + + if response.status_code != 200: + error_message = f"OpenAI API error: {response.status_code} - {response.text}" + logger.error(error_message) + + # Check for context length exceeded error + if response.status_code == 400: + try: + error_data = response.json() + if (error_data.get("error", {}).get("code") == "context_length_exceeded" or + "context length" in error_data.get("error", {}).get("message", "").lower()): + # Raise a specific exception for context length issues + raise ContextLengthExceededException( + f"Context length exceeded: {error_data.get('error', {}).get('message', 'Unknown error')}" + ) + except (ValueError, KeyError): + pass # If we can't parse the error, fall through to generic error + + # Include the actual error details in the exception + raise HTTPException(status_code=500, detail=error_message) + + responseJson = response.json() + content = responseJson["choices"][0]["message"]["content"] + + return AiModelResponse( + content=content, + success=True, + modelId=model.name, + metadata={"response_id": responseJson.get("id", "")} + ) + + except ContextLengthExceededException: + # Re-raise context length exceptions without wrapping + raise + except Exception as e: + logger.error(f"Error calling OpenAI API: {str(e)}") + raise HTTPException(status_code=500, detail=f"Error calling OpenAI API: {str(e)}") + + async def callAiImage(self, modelCall: AiModelCall) -> AiModelResponse: + """ + Analyzes an image with the OpenAI Vision API using standardized pattern. + + Args: + modelCall: AiModelCall with messages and image data in options + + Returns: + AiModelResponse with analysis content + """ + try: + # Extract parameters from modelCall + messages = modelCall.messages + model = modelCall.model + + # Messages should already be in the correct format with image data embedded + # Just verify they contain image data + if not messages or not messages[0].get("content"): + raise ValueError("No messages provided for image analysis") + + logger.debug(f"Starting image analysis with {len(messages)} message(s)...") + + # Use the messages directly - they should already contain the image data + # in the format: {"type": "image_url", "image_url": {"url": "data:...base64,..."}} + + # Use parameters from model + temperature = model.temperature + # Don't set maxTokens - let the model use its full context length + + payload = { + "model": model.name, + "messages": messages, + "temperature": temperature + } + + response = await self.httpClient.post( + model.apiUrl, + json=payload + ) + + if response.status_code != 200: + logger.error(f"OpenAI API error: {response.status_code} - {response.text}") + raise HTTPException(status_code=500, detail="Error communicating with OpenAI API") + + responseJson = response.json() + content = responseJson["choices"][0]["message"]["content"] + + return AiModelResponse( + content=content, + success=True, + modelId=model.name, + metadata={"response_id": responseJson.get("id", "")} + ) + + except Exception as e: + logger.error(f"Error during image analysis: {str(e)}", exc_info=True) + return AiModelResponse( + content="", + success=False, + error=f"Error during image analysis: {str(e)}" + ) + + async def generateImage(self, modelCall: AiModelCall) -> AiModelResponse: + """ + Generate an image using DALL-E 3 using standardized pattern. + + Args: + modelCall: AiModelCall with messages and generation options + + Returns: + AiModelResponse with generated image data + """ + try: + # Extract parameters from modelCall + messages = modelCall.messages + model = modelCall.model + options = modelCall.options + + # Get prompt from messages + promptContent = messages[0]["content"] if messages else "" + + # Parse prompt using AiCallPromptImage model + from modules.datamodels.datamodelAi import AiCallPromptImage + import json + + try: + # Try to parse as JSON + promptData = json.loads(promptContent) + promptModel = AiCallPromptImage(**promptData) + except: + # If not JSON, use plain text prompt + promptModel = AiCallPromptImage( + prompt=promptContent, + size=options.size if options and hasattr(options, 'size') else "1024x1024", + quality=options.quality if options and hasattr(options, 'quality') else "standard", + style=options.style if options and hasattr(options, 'style') else "vivid" + ) + + # Extract parameters from Pydantic model + prompt = promptModel.prompt + size = promptModel.size or "1024x1024" + quality = promptModel.quality or "standard" + style = promptModel.style or "vivid" + + logger.debug(f"Starting image generation with prompt: '{prompt[:100]}...'") + + # DALL-E 3 API endpoint + dalle_url = "https://api.openai.com/v1/images/generations" + + payload = { + "model": "dall-e-3", + "prompt": prompt, + "size": size, + "quality": quality, + "style": style, + "n": 1, + "response_format": "b64_json" # Get base64 data directly instead of URLs + } + + # Create a separate client for DALL-E API calls + dalle_client = httpx.AsyncClient( + timeout=120.0, + headers={ + "Authorization": f"Bearer {self.apiKey}", + "Content-Type": "application/json" + } + ) + + response = await dalle_client.post( + dalle_url, + json=payload + ) + + await dalle_client.aclose() + + if response.status_code != 200: + logger.error(f"DALL-E API error: {response.status_code} - {response.text}") + return { + "success": False, + "error": f"DALL-E API error: {response.status_code} - {response.text}" + } + + responseJson = response.json() + + if "data" in responseJson and len(responseJson["data"]) > 0: + image_data = responseJson["data"][0]["b64_json"] + + logger.info(f"Successfully generated image: {len(image_data)} characters") + return AiModelResponse( + content=image_data, + success=True, + modelId="dall-e-3", + metadata={ + "size": size, + "quality": quality, + "style": style, + "response_id": responseJson.get("id", "") + } + ) + else: + logger.error("No image data in DALL-E response") + return AiModelResponse( + content="", + success=False, + error="No image data in DALL-E response" + ) + + except Exception as e: + logger.error(f"Error during image generation: {str(e)}", exc_info=True) + return AiModelResponse( + content="", + success=False, + error=f"Error during image generation: {str(e)}" + ) \ No newline at end of file diff --git a/modules/aicore/aicorePluginPerplexity.py b/modules/aicore/aicorePluginPerplexity.py new file mode 100644 index 00000000..86e06898 --- /dev/null +++ b/modules/aicore/aicorePluginPerplexity.py @@ -0,0 +1,471 @@ +import logging +import httpx +from typing import List +from fastapi import HTTPException +from modules.shared.configuration import APP_CONFIG +from modules.aicore.aicoreBase import BaseConnectorAi +from modules.datamodels.datamodelAi import AiModel, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse, createOperationTypeRatings, AiCallPromptWebSearch, AiCallPromptWebCrawl +from modules.datamodels.datamodelTools import CountryCodes + +# Configure logger +logger = logging.getLogger(__name__) + +def loadConfigData(): + """Load configuration data for Perplexity connector""" + return { + "apiKey": APP_CONFIG.get('Connector_AiPerplexity_API_SECRET'), + } + +class AiPerplexity(BaseConnectorAi): + """Connector for communication with the Perplexity API.""" + + def __init__(self): + super().__init__() + # Load configuration + self.config = loadConfigData() + self.apiKey = self.config["apiKey"] + + # HttpClient for API calls + self.httpClient = httpx.AsyncClient( + timeout=120.0, # Longer timeout for complex requests + headers={ + "Authorization": f"Bearer {self.apiKey}", + "Content-Type": "application/json", + "Accept": "application/json" + } + ) + + logger.info("Perplexity Connector initialized") + + def getConnectorType(self) -> str: + """Get the connector type identifier.""" + return "perplexity" + + def _convertIsoCodeToCountryName(self, isoCode: str) -> str: + """ + Convert ISO-2 country code to Perplexity country name. + Uses centralized CountryCodes mapping. + """ + return CountryCodes.getForPerplexity(isoCode) + + def getModels(self) -> List[AiModel]: + """Get all available Perplexity models.""" + return [ + AiModel( + name="sonar", + displayName="Perplexity Sonar", + connectorType="perplexity", + apiUrl="https://api.perplexity.ai/chat/completions", + temperature=0.2, + maxTokens=24000, # Increased for detailed web crawl responses (Perplexity supports up to 25k) + contextLength=32000, + costPer1kTokensInput=0.005, + costPer1kTokensOutput=0.005, + speedRating=8, + qualityRating=8, + # capabilities removed (not used in business logic) + functionCall=self._routeWebOperation, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.ADVANCED, + operationTypes=createOperationTypeRatings( + (OperationTypeEnum.WEB_SEARCH, 9), + (OperationTypeEnum.WEB_CRAWL, 7) + ), + version="sonar", + calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.005 + (bytesReceived / 4 / 1000) * 0.005 + ), + AiModel( + name="sonar-pro", + displayName="Perplexity Sonar Pro", + connectorType="perplexity", + apiUrl="https://api.perplexity.ai/chat/completions", + temperature=0.2, + maxTokens=24000, # Increased for detailed web crawl responses (Perplexity supports up to 25k) + contextLength=32000, + costPer1kTokensInput=0.01, + costPer1kTokensOutput=0.01, + speedRating=6, # Slower due to AI analysis + qualityRating=9, # Best AI analysis quality + # capabilities removed (not used in business logic) + functionCall=self._routeWebOperation, + priority=PriorityEnum.QUALITY, + processingMode=ProcessingModeEnum.DETAILED, + operationTypes=createOperationTypeRatings( + (OperationTypeEnum.WEB_SEARCH, 9), + (OperationTypeEnum.WEB_CRAWL, 8) + ), + version="sonar-pro", + calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: (bytesSent / 4 / 1000) * 0.01 + (bytesReceived / 4 / 1000) * 0.01 + ) + ] + + async def callAiBasic(self, modelCall: AiModelCall) -> AiModelResponse: + """ + Calls the Perplexity API with the given messages using standardized pattern. + + Args: + modelCall: AiModelCall with messages and options + + Returns: + AiModelResponse with content and metadata + + Raises: + HTTPException: For errors in API communication + """ + try: + # Extract parameters from modelCall + messages = modelCall.messages + model = modelCall.model + options = modelCall.options + temperature = getattr(options, "temperature", None) + if temperature is None: + temperature = model.temperature + maxTokens = model.maxTokens + + payload = { + "model": model.name, + "messages": messages, + "temperature": temperature, + "max_tokens": maxTokens + } + + response = await self.httpClient.post( + model.apiUrl, + json=payload + ) + + if response.status_code != 200: + errorDetail = f"Perplexity API error: {response.status_code} - {response.text}" + logger.error(errorDetail) + + # Provide more specific error messages based on status code + if response.status_code == 429: + errorMessage = "Rate limit exceeded. Please wait before making another request." + elif response.status_code == 401: + errorMessage = "Invalid API key. Please check your Perplexity API configuration." + elif response.status_code == 400: + errorMessage = f"Invalid request to Perplexity API: {response.text}" + else: + errorMessage = f"Perplexity API error ({response.status_code}): {response.text}" + + raise HTTPException(status_code=500, detail=errorMessage) + + apiResponse = response.json() + content = apiResponse["choices"][0]["message"]["content"] + + return AiModelResponse( + content=content, + success=True, + modelId=model.name, + metadata={"response_id": apiResponse.get("id", "")} + ) + + except Exception as e: + logger.error(f"Error calling Perplexity API: {str(e)}") + raise HTTPException(status_code=500, detail=f"Error calling Perplexity API: {str(e)}") + + + + + + async def _testConnection(self) -> bool: + """ + Tests the connection to the Perplexity API. + + Returns: + True if connection is successful, False otherwise + """ + try: + # Try a simple test message + testMessages = [ + {"role": "user", "content": "Hello, please respond with just 'OK' to confirm the connection works."} + ] + + # Create a model call for testing + from modules.datamodels.datamodelAi import AiCallOptions + model = self.getModels()[0] # Get first model for testing + testCall = AiModelCall( + messages=testMessages, + model=model, + options=AiCallOptions() + ) + + response = await self.callAiBasic(testCall) + return response.success and len(response.content.strip()) > 0 + + except Exception as e: + logger.error(f"Perplexity connection test failed: {str(e)}") + return False + + async def _routeWebOperation(self, modelCall: AiModelCall) -> AiModelResponse: + """ + Route web operation based on operation type. + + Args: + modelCall: AiModelCall with messages and options + + Returns: + AiModelResponse based on operation type + """ + operationType = modelCall.options.operationType + + if operationType == OperationTypeEnum.WEB_SEARCH: + return await self.webSearch(modelCall) + elif operationType == OperationTypeEnum.WEB_CRAWL: + return await self.webCrawl(modelCall) + else: + # Fallback to basic call + return await self.callAiBasic(modelCall) + + def _getDepthInstructions(self, maxDepth: int) -> str: + """ + Map maxDepth (numeric) to instructional text for LLM. + + Args: + maxDepth: 1 (fast/overview), 2 (general/standard), 3 (deep/comprehensive) + + Returns: + Instructional text for the LLM + """ + depthMap = { + 1: "Basic overview - extract main content from the main page only", + 2: "Standard crawl - extract content from main page and linked pages (2 levels deep)", + 3: "Deep crawl - comprehensively extract content from main page and all accessible linked pages (3+ levels deep)" + } + return depthMap.get(maxDepth, depthMap[2]) + + def _getWidthInstructions(self, maxWidth: int) -> str: + """ + Map maxWidth (numeric) to instructional text for LLM. + + Args: + maxWidth: Number of pages to crawl at each level (default: 10) + + Returns: + Instructional text for the LLM + """ + if maxWidth <= 5: + return f"Focused crawl - limit to {maxWidth} most relevant pages per level" + elif maxWidth <= 15: + return f"Standard breadth - crawl up to {maxWidth} pages per level" + elif maxWidth <= 30: + return f"Wide crawl - crawl up to {maxWidth} pages per level, prioritize quality" + else: + return f"Extensive crawl - crawl up to {maxWidth} pages per level, comprehensive coverage" + + async def webSearch(self, modelCall: AiModelCall) -> AiModelResponse: + """ + WEB_SEARCH operation - returns list of URLs based on search query. + + Args: + modelCall: AiModelCall with AiCallPromptWebSearch as prompt + + Returns: + AiModelResponse with JSON list of URLs + """ + try: + # Extract parameters + messages = modelCall.messages + model = modelCall.model + options = modelCall.options + temperature = getattr(options, "temperature", None) or model.temperature + maxTokens = model.maxTokens + + # Parse prompt JSON - find user message (not system message) + promptContent = "" + if messages: + for msg in messages: + if msg.get("role") == "user": + promptContent = msg.get("content", "") + break + # Fallback to first message if no user message found + if not promptContent and len(messages) > 0: + promptContent = messages[0].get("content", "") + + import json + promptData = json.loads(promptContent) + + # Create Pydantic model + webSearchPrompt = AiCallPromptWebSearch(**promptData) + + # Convert ISO country code to country name + countryName = webSearchPrompt.country + if countryName: + countryName = self._convertIsoCodeToCountryName(countryName) + + # Build search request for Perplexity + searchPrompt = f"""Search the web for: {webSearchPrompt.instruction} + +Return a JSON array of {webSearchPrompt.maxNumberPages} most relevant URLs. +{'' if not countryName else f'Focus on results from {countryName}.'} + +Return ONLY a JSON array of URLs, no additional text: +[ + "https://example1.com/page", + "https://example2.com/article", + "https://example3.com/resource" +]""" + + payload = { + "model": model.name, + "messages": [{"role": "user", "content": searchPrompt}], + "temperature": temperature, + "max_tokens": maxTokens + } + + response = await self.httpClient.post(model.apiUrl, json=payload) + + if response.status_code != 200: + raise HTTPException(status_code=500, detail=f"Perplexity Web Search API error: {response.text}") + + # Check if response body is empty or invalid + responseText = response.text + if not responseText or not responseText.strip(): + raise HTTPException(status_code=500, detail="Perplexity Web Search API returned empty response") + + try: + apiResponse = response.json() + except Exception as jsonError: + logger.error(f"Failed to parse Perplexity response as JSON. Status: {response.status_code}, Response: {responseText[:500]}") + raise HTTPException(status_code=500, detail=f"Perplexity Web Search API returned invalid JSON: {str(jsonError)}") + + if "choices" not in apiResponse or not apiResponse["choices"]: + raise HTTPException(status_code=500, detail="Perplexity Web Search API response missing 'choices' field") + + content = apiResponse["choices"][0]["message"]["content"] + + return AiModelResponse( + content=content, + success=True, + modelId=model.name, + metadata={"response_id": apiResponse.get("id", ""), "operation": "WEB_SEARCH"} + ) + + except Exception as e: + logger.error(f"Error in Perplexity web search: {str(e)}") + raise HTTPException(status_code=500, detail=f"Error in Perplexity web search: {str(e)}") + + async def webCrawl(self, modelCall: AiModelCall) -> AiModelResponse: + """ + WEB_CRAWL operation - crawls ONE URL and returns content. + + Perplexity API Parameters Used: + - messages: The prompt containing URL and instruction + - max_tokens: Maximum response length + - max_results: Number of search results (1-20, default: 10) + - temperature: Response randomness (not web search specific) + + Pagination: Perplexity does NOT return paginated responses. + A single response contains all results within max_tokens limit. + + Args: + modelCall: AiModelCall with AiCallPromptWebCrawl as prompt + + Returns: + AiModelResponse with crawl results as JSON object + """ + try: + # Extract parameters + messages = modelCall.messages + model = modelCall.model + options = modelCall.options + temperature = getattr(options, "temperature", None) or model.temperature + maxTokens = model.maxTokens + + # Parse prompt JSON - find user message (not system message) + promptContent = "" + if messages: + for msg in messages: + if msg.get("role") == "user": + promptContent = msg.get("content", "") + break + # Fallback to first message if no user message found + if not promptContent and len(messages) > 0: + promptContent = messages[0].get("content", "") + + import json + promptData = json.loads(promptContent) + + # Create Pydantic model + webCrawlPrompt = AiCallPromptWebCrawl(**promptData) + + # Build crawl request for Perplexity - ONE URL + # Match playground prompt style: just URL + question + # This allows Perplexity to return detailed multi-source results + crawlPrompt = f"{webCrawlPrompt.url}: {webCrawlPrompt.instruction}" + + # Build payload with optional Perplexity parameters + # Note: max_tokens_per_page may not be supported by chat/completions endpoint + # The playground Python SDK might use a different internal API + maxResults = min(webCrawlPrompt.maxWidth or 10, 20) # Max 20 results + + payload = { + "model": model.name, + "messages": [{"role": "user", "content": crawlPrompt}], + "temperature": temperature, + "max_tokens": maxTokens, # Use model's configured maxTokens (24000) + "max_results": maxResults, + "return_citations": True # Request citations explicitly + } + + logger.info(f"Perplexity crawl payload: model={model.name}, prompt_length={len(crawlPrompt)}, max_tokens={maxTokens}, max_results={maxResults}") + + response = await self.httpClient.post(model.apiUrl, json=payload) + + if response.status_code != 200: + raise HTTPException(status_code=500, detail=f"Perplexity Web Crawl API error: {response.text}") + + # Check if response body is empty or invalid + responseText = response.text + if not responseText or not responseText.strip(): + raise HTTPException(status_code=500, detail="Perplexity Web Crawl API returned empty response") + + try: + apiResponse = response.json() + except Exception as jsonError: + logger.error(f"Failed to parse Perplexity response as JSON. Status: {response.status_code}, Response: {responseText[:500]}") + raise HTTPException(status_code=500, detail=f"Perplexity Web Crawl API returned invalid JSON: {str(jsonError)}") + + if "choices" not in apiResponse or not apiResponse["choices"]: + raise HTTPException(status_code=500, detail="Perplexity Web Crawl API response missing 'choices' field") + + # Extract the main content + content = apiResponse["choices"][0]["message"]["content"] + + # Check for citations or search results in the response + citations = apiResponse.get("citations", []) + searchResults = apiResponse.get("search_results", []) + + # Log what we found + if citations: + logger.info(f"Found {len(citations)} citations in response") + if searchResults: + logger.info(f"Found {len(searchResults)} search results in response") + logger.debug(f"API response keys: {list(apiResponse.keys())}") + + # Build comprehensive response with citations if available + import json + responseData = { + "content": content, + "citations": citations if citations else [], + "search_results": searchResults if searchResults else [] + } + + # Return comprehensive response + return AiModelResponse( + content=json.dumps(responseData, indent=2) if (citations or searchResults) else content, + success=True, + modelId=model.name, + metadata={ + "response_id": apiResponse.get("id", ""), + "operation": "WEB_CRAWL", + "url": webCrawlPrompt.url, + "actualPromptSent": crawlPrompt, + "has_citations": len(citations) > 0, + "has_search_results": len(searchResults) > 0 + } + ) + + except Exception as e: + logger.error(f"Error in Perplexity web crawl: {str(e)}") + raise HTTPException(status_code=500, detail=f"Error in Perplexity web crawl: {str(e)}") diff --git a/modules/aicore/aicorePluginTavily.py b/modules/aicore/aicorePluginTavily.py new file mode 100644 index 00000000..381f9028 --- /dev/null +++ b/modules/aicore/aicorePluginTavily.py @@ -0,0 +1,610 @@ +"""Tavily web search class. +""" + +import logging +import asyncio +import re +from dataclasses import dataclass +from typing import Optional, List, Dict +from tavily import AsyncTavilyClient +from modules.shared.configuration import APP_CONFIG +from modules.aicore.aicoreBase import BaseConnectorAi +from modules.datamodels.datamodelAi import AiModel, PriorityEnum, ProcessingModeEnum, OperationTypeEnum, AiModelCall, AiModelResponse, createOperationTypeRatings, AiCallPromptWebSearch, AiCallPromptWebCrawl +from modules.datamodels.datamodelTools import CountryCodes + + +logger = logging.getLogger(__name__) + +@dataclass +class WebSearchResult: + title: str + url: str + rawContent: Optional[str] = None + +@dataclass +class WebCrawlResult: + url: str + content: str + title: Optional[str] = None + + +class AiTavily(BaseConnectorAi): + """Tavily web search connector.""" + + def __init__(self): + super().__init__() + self.client: Optional[AsyncTavilyClient] = None + # Cached settings loaded at initialization time + self.crawlTimeout: int = 30 + self.crawlMaxRetries: int = 3 + self.crawlRetryDelay: int = 2 + # Cached web search constraints (camelCase per project style) + self.webSearchMinResults: int = 1 + self.webSearchMaxResults: int = 20 + # Initialize client if API key is available + self._initializeClient() + + + def getModels(self) -> List[AiModel]: + """Get all available Tavily models.""" + return [ + AiModel( + name="tavily-search", + displayName="Tavily Search & Research", + connectorType="tavily", + apiUrl="https://api.tavily.com", + temperature=0.0, # Web search doesn't use temperature + maxTokens=0, # Web search doesn't use tokens + contextLength=0, + costPer1kTokensInput=0.0, + costPer1kTokensOutput=0.0, + speedRating=8, # Good speed for search and extract + qualityRating=9, # Excellent quality for web research + # capabilities removed (not used in business logic) + functionCall=self._routeWebOperation, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.BASIC, + operationTypes=createOperationTypeRatings( + (OperationTypeEnum.WEB_SEARCH, 9), + (OperationTypeEnum.WEB_CRAWL, 10) + ), + version="tavily-search", + calculatePriceUsd=lambda processingTime, bytesSent, bytesReceived: 0.008 # Simple flat rate + ) + ] + + def _initializeClient(self): + """Initialize the Tavily client if API key is available.""" + try: + apiKey = APP_CONFIG.get("Connector_AiTavily_API_SECRET") + if apiKey: + self.client = AsyncTavilyClient(api_key=apiKey) + logger.info("Tavily client initialized successfully") + else: + logger.warning("Tavily API key not found, client not initialized") + except Exception as e: + logger.error(f"Failed to initialize Tavily client: {str(e)}") + + def getConnectorType(self) -> str: + """Get the connector type identifier.""" + return "tavily" + + def _convertIsoCodeToCountryName(self, isoCode: str) -> str: + """ + Convert ISO-2 country code to Tavily country name. + Uses centralized CountryCodes mapping. + """ + return CountryCodes.getForTavily(isoCode) + + def _extractUrlsFromPrompt(self, prompt: str) -> List[str]: + """Extract URLs from a text prompt using regex.""" + if not prompt: + return [] + + # URL regex pattern - matches http/https URLs + urlPattern = r'https?://(?:[-\w.])+(?:[:\d]+)?(?:/(?:[\w/_.])*(?:\?(?:[\w&=%.])*)?(?:#(?:[\w.])*)?)?' + urls = re.findall(urlPattern, prompt) + + # Remove duplicates while preserving order + seen = set() + uniqueUrls = [] + for url in urls: + if url not in seen: + seen.add(url) + uniqueUrls.append(url) + + return uniqueUrls + + def _normalizeUrl(self, url: str) -> str: + """ + Normalize URL for better deduplication. + Removes common variations that represent the same content. + """ + if not url: + return url + + # Remove trailing slashes + url = url.rstrip('/') + + # Remove common query parameters that don't affect content + import urllib.parse + parsed = urllib.parse.urlparse(url) + + # Remove common tracking parameters + queryParams = urllib.parse.parse_qs(parsed.query) + filteredParams = {} + + for key, values in queryParams.items(): + # Keep important parameters, remove tracking ones + if key.lower() not in ['utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content', + 'fbclid', 'gclid', 'ref', 'source', 'campaign']: + filteredParams[key] = values + + # Rebuild query string + filteredQuery = urllib.parse.urlencode(filteredParams, doseq=True) + + # Reconstruct URL + normalized = urllib.parse.urlunparse(( + parsed.scheme, + parsed.netloc, + parsed.path, + parsed.params, + filteredQuery, + parsed.fragment + )) + + return normalized + + def _calculateRelevanceScore(self, result: WebSearchResult, queryWords: set) -> float: + """ + Calculate relevance score for a search result. + Higher score means more relevant to the query. + """ + score = 0.0 + + # Title relevance (most important) + titleWords = set(result.title.lower().split()) + titleMatches = len(queryWords.intersection(titleWords)) + score += titleMatches * 3.0 # Weight title matches heavily + + # URL relevance + urlWords = set(result.url.lower().split('/')) + urlMatches = len(queryWords.intersection(urlWords)) + score += urlMatches * 1.5 + + # Content relevance (if available) + if hasattr(result, 'rawContent') and result.rawContent: + contentWords = set(result.rawContent.lower().split()) + contentMatches = len(queryWords.intersection(contentWords)) + score += contentMatches * 0.1 # Lower weight for content matches + + # Domain authority bonus (simple heuristic) + domain = result.url.split('/')[2] if '/' in result.url else result.url + if any(authDomain in domain.lower() for authDomain in + ['wikipedia.org', 'github.com', 'stackoverflow.com', 'reddit.com', 'medium.com']): + score += 1.0 + + # Penalty for very long URLs (often less relevant) + if len(result.url) > 100: + score -= 0.5 + + return score + + def _intelligentUrlFiltering(self, searchResults: List[WebSearchResult], query: str, maxResults: int) -> List[WebSearchResult]: + """ + Intelligent URL filtering with de-duplication and relevance scoring. + + Args: + searchResults: Raw search results from Tavily + query: Original search query for relevance scoring + maxResults: Maximum number of results to return + + Returns: + Filtered and deduplicated list of search results + """ + if not searchResults: + return [] + + # Step 1: Basic de-duplication by URL + seenUrls = set() + uniqueResults = [] + + for result in searchResults: + # Normalize URL for better deduplication + normalizedUrl = self._normalizeUrl(result.url) + if normalizedUrl not in seenUrls: + seenUrls.add(normalizedUrl) + uniqueResults.append(result) + + logger.info(f"After basic deduplication: {len(uniqueResults)} unique URLs from {len(searchResults)} original") + + # Step 2: Relevance scoring and filtering + scoredResults = [] + queryWords = set(query.lower().split()) + + for result in uniqueResults: + score = self._calculateRelevanceScore(result, queryWords) + scoredResults.append((score, result)) + + # Step 3: Sort by relevance score (higher is better) + scoredResults.sort(key=lambda x: x[0], reverse=True) + + # Step 4: Take top results + filteredResults = [result for score, result in scoredResults[:maxResults]] + + logger.info(f"After intelligent filtering: {len(filteredResults)} results selected from {len(uniqueResults)} unique") + + return filteredResults + + @classmethod + async def create(cls): + apiKey = APP_CONFIG.get("Connector_AiTavily_API_SECRET") + if not apiKey: + raise ValueError("Tavily API key not configured. Please set Connector_AiTavily_API_SECRET in config.ini") + # Load and cache web crawl related configuration + crawlTimeout = int(APP_CONFIG.get("Web_Crawl_TIMEOUT", "30")) + crawlMaxRetries = int(APP_CONFIG.get("Web_Crawl_MAX_RETRIES", "3")) + crawlRetryDelay = int(APP_CONFIG.get("Web_Crawl_RETRY_DELAY", "2")) + return cls( + client=AsyncTavilyClient(api_key=apiKey), + crawlTimeout=crawlTimeout, + crawlMaxRetries=crawlMaxRetries, + crawlRetryDelay=crawlRetryDelay, + webSearchMinResults=int(APP_CONFIG.get("Web_Search_MIN_RESULTS", "1")), + webSearchMaxResults=int(APP_CONFIG.get("Web_Search_MAX_RESULTS", "20")), + ) + + # Standardized method using AiModelCall/AiModelResponse pattern + + + def _cleanUrl(self, url: str) -> str: + """Clean URL by removing extra text that might be appended.""" + import re + # Extract just the URL part, removing any extra text after it + urlMatch = re.match(r'(https?://[^\s,]+)', url) + if urlMatch: + return urlMatch.group(1) + return url + + async def _search( + self, + query: str, + maxResults: int, + searchDepth: str | None = None, + timeRange: str | None = None, + topic: str | None = None, + includeDomains: list[str] | None = None, + excludeDomains: list[str] | None = None, + country: str | None = None, + includeAnswer: str | None = None, + includeRawContent: str | None = None, + ) -> list[WebSearchResult]: + """Calls the Tavily API to perform a web search.""" + # Make sure maxResults is within the allowed range (use cached values) + minResults = self.webSearchMinResults + maxAllowedResults = self.webSearchMaxResults + if maxResults < minResults or maxResults > maxAllowedResults: + raise ValueError(f"maxResults must be between {minResults} and {maxAllowedResults}") + + # Perform actual API call + # Build kwargs only for provided options to avoid API rejections + kwargs: dict = {"query": query, "max_results": maxResults} + if searchDepth is not None: + kwargs["search_depth"] = searchDepth + if timeRange is not None: + kwargs["time_range"] = timeRange + if topic is not None: + kwargs["topic"] = topic + if includeDomains is not None and len(includeDomains) > 0: + kwargs["include_domains"] = includeDomains + if excludeDomains is not None: + kwargs["exclude_domains"] = excludeDomains + if country is not None: + kwargs["country"] = country + if includeAnswer is not None: + kwargs["include_answer"] = includeAnswer + if includeRawContent is not None: + kwargs["include_raw_content"] = includeRawContent + + # Log the final API call parameters for comparison + logger.info(f"Tavily API call parameters: {kwargs}") + + # Ensure client is initialized + if self.client is None: + self._initializeClient() + if self.client is None: + raise ValueError("Tavily client not initialized. Please check API key configuration.") + + response = await self.client.search(**kwargs) + + # Return all results without score filtering + # Tavily's scoring is already applied by the API + logger.info(f"Tavily returned {len(response.get('results', []))} results") + + return [ + WebSearchResult( + title=result["title"], + url=self._cleanUrl(result["url"]), + rawContent=result.get("raw_content") + ) + for result in response["results"] + ] + + async def _crawl( + self, + url: str, + instructions: str | None = None, + limit: int = 20, + maxDepth: int = 2, + maxBreadth: int = 40, + ) -> list[WebCrawlResult]: + """Calls the Tavily API to crawl ONE URL with link following and retry logic.""" + maxRetries = self.crawlMaxRetries + retryDelay = self.crawlRetryDelay + timeout = self.crawlTimeout + + logger.debug(f"Starting crawl of URL: {url}") + logger.debug(f"Crawl settings: instructions={instructions}, limit={limit}, maxDepth={maxDepth}, maxBreadth={maxBreadth}, timeout={timeout}s") + + for attempt in range(maxRetries + 1): + try: + logger.debug(f"Crawl attempt {attempt + 1}/{maxRetries + 1}") + + # Ensure client is initialized + if self.client is None: + self._initializeClient() + if self.client is None: + raise ValueError("Tavily client not initialized. Please check API key configuration.") + + logger.debug(f"Crawling URL: {url}") + + # Build kwargs for crawl + kwargsCrawl: dict = {"url": url} + if instructions: + kwargsCrawl["instructions"] = instructions + if limit: + kwargsCrawl["limit"] = limit + if maxDepth: + kwargsCrawl["max_depth"] = maxDepth + if maxBreadth: + kwargsCrawl["max_breadth"] = maxBreadth + + logger.debug(f"Sending request to Tavily with kwargs: {kwargsCrawl}") + + response = await asyncio.wait_for( + self.client.crawl(**kwargsCrawl), + timeout=timeout + ) + + logger.debug(f"Tavily response received: {type(response)}") + + # Parse response - could be dict with results or list + if isinstance(response, dict) and "results" in response: + pageResults = response["results"] + elif isinstance(response, list): + pageResults = response + else: + logger.warning(f"Unexpected response format: {type(response)}") + pageResults = [] + + logger.debug(f"Got {len(pageResults)} pages from crawl") + + # Convert to WebCrawlResult format + results = [] + for result in pageResults: + results.append(WebCrawlResult( + url=result.get("url", url), + content=result.get("raw_content", result.get("content", "")), + title=result.get("title", "") + )) + + logger.debug(f"Crawl successful: extracted {len(results)} pages from URL") + return results + + except asyncio.TimeoutError: + logger.warning(f"Crawl attempt {attempt + 1} timed out after {timeout} seconds for URL: {url}") + if attempt < maxRetries: + logger.info(f"Retrying in {retryDelay} seconds...") + await asyncio.sleep(retryDelay) + else: + raise Exception(f"Crawl failed after {maxRetries + 1} attempts due to timeout") + + except Exception as e: + logger.warning(f"Crawl attempt {attempt + 1} failed for URL {url}: {str(e)}") + logger.debug(f"Full error details: {type(e).__name__}: {str(e)}") + + # Check if it's a validation error and log more details + if "validation" in str(e).lower(): + logger.debug(f"URL validation failed. Checking URL format:") + logger.debug(f" URL: '{url}' (length: {len(url)})") + # Check for common URL issues + if ' ' in url: + logger.debug(f" WARNING: URL contains spaces!") + if not url.startswith(('http://', 'https://')): + logger.debug(f" WARNING: URL doesn't start with http/https!") + if len(url) > 2000: + logger.debug(f" WARNING: URL is very long ({len(url)} chars)") + + if attempt < maxRetries: + logger.info(f"Retrying in {retryDelay} seconds...") + await asyncio.sleep(retryDelay) + else: + raise Exception(f"Crawl failed after {maxRetries + 1} attempts: {str(e)}") + + async def _routeWebOperation(self, modelCall: AiModelCall) -> "AiModelResponse": + """ + Route web operation based on operation type. + + Args: + modelCall: AiModelCall with messages and options + + Returns: + AiModelResponse based on operation type + """ + operationType = modelCall.options.operationType + + if operationType == OperationTypeEnum.WEB_SEARCH: + return await self.webSearch(modelCall) + elif operationType == OperationTypeEnum.WEB_CRAWL: + return await self.webCrawl(modelCall) + else: + # Unsupported operation type + return AiModelResponse( + content="", + success=False, + error=f"Unsupported operation type: {operationType}" + ) + + async def webSearch(self, modelCall: AiModelCall) -> "AiModelResponse": + """ + WEB_SEARCH operation - returns list of URLs using Tavily search. + + Args: + modelCall: AiModelCall with AiCallPromptWebSearch as prompt + + Returns: + AiModelResponse with JSON list of URLs + """ + try: + # Extract parameters - find user message (not system message) + promptContent = "" + if modelCall.messages: + for msg in modelCall.messages: + if msg.get("role") == "user": + promptContent = msg.get("content", "") + break + # Fallback to first message if no user message found + if not promptContent and len(modelCall.messages) > 0: + promptContent = modelCall.messages[0].get("content", "") + + if not promptContent or not promptContent.strip(): + raise ValueError("Empty prompt content received for web search") + + import json + try: + promptData = json.loads(promptContent) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse prompt content as JSON: {promptContent[:200]}") + raise ValueError(f"Invalid JSON in prompt content: {str(e)}") + + # Create Pydantic model + webSearchPrompt = AiCallPromptWebSearch(**promptData) + + # Convert ISO country code to country name for Tavily + countryName = webSearchPrompt.country + if countryName: + countryName = self._convertIsoCodeToCountryName(countryName) + + # Perform search - use exact parameters from prompt + # NOTE: timeRange parameter causes generic results, so we don't use it + searchResults = await self._search( + query=webSearchPrompt.instruction, + maxResults=webSearchPrompt.maxNumberPages, + timeRange=None, # Not used - causes generic results + country=countryName, + includeAnswer="basic", + includeRawContent="text" + ) + + # Extract URLs from results + urls = [result.url for result in searchResults] + + # Return as JSON array + import json + return AiModelResponse( + content=json.dumps(urls, indent=2), + success=True, + metadata={"total_urls": len(urls), "operation": "WEB_SEARCH"} + ) + + except Exception as e: + logger.error(f"Error in Tavily web search: {str(e)}") + return AiModelResponse( + content="[]", + success=False, + error=str(e) + ) + + async def webCrawl(self, modelCall: AiModelCall) -> "AiModelResponse": + """ + WEB_CRAWL operation - crawls one URL using Tavily with link following. + + Args: + modelCall: AiModelCall with AiCallPromptWebCrawl as prompt + + Returns: + AiModelResponse with crawl results as JSON (may include multiple pages) + """ + try: + # Extract parameters - find user message (not system message) + promptContent = "" + if modelCall.messages: + for msg in modelCall.messages: + if msg.get("role") == "user": + promptContent = msg.get("content", "") + break + # Fallback to first message if no user message found + if not promptContent and len(modelCall.messages) > 0: + promptContent = modelCall.messages[0].get("content", "") + + if not promptContent or not promptContent.strip(): + raise ValueError("Empty prompt content received for web crawl") + + import json + try: + promptData = json.loads(promptContent) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse prompt content as JSON: {promptContent[:200]}") + raise ValueError(f"Invalid JSON in prompt content: {str(e)}") + + # Create Pydantic model + webCrawlPrompt = AiCallPromptWebCrawl(**promptData) + + # Perform crawl for ONE URL with link following + # Use maxWidth as limit, maxDepth as maxDepth, and calculate maxBreadth + crawlResults = await self._crawl( + url=webCrawlPrompt.url, + instructions=webCrawlPrompt.instruction, + limit=webCrawlPrompt.maxWidth or 20, # maxWidth controls number of pages + maxDepth=webCrawlPrompt.maxDepth or 2, + maxBreadth=webCrawlPrompt.maxWidth or 40 # Use same as limit for breadth + ) + + # If we got multiple pages from the crawl, we need to format them differently + # Return the first result for backwards compatibility, but include total page count + if crawlResults and len(crawlResults) > 0: + # Get all pages content + allContent = "" + for i, result in enumerate(crawlResults, 1): + pageHeader = f"\n{'='*60}\nPAGE {i}: {result.url}\n{'='*60}\n" + if result.title: + allContent += f"{pageHeader}Title: {result.title}\n\n" + allContent += f"{result.content}\n" + + resultData = { + "url": webCrawlPrompt.url, + "title": crawlResults[0].title if crawlResults[0].title else "Content", + "content": allContent, + "pagesCrawled": len(crawlResults), + "pageUrls": [result.url for result in crawlResults] + } + else: + resultData = {"url": webCrawlPrompt.url, "title": "", "content": "", "error": "No content extracted", "pagesCrawled": 0} + + # Return as JSON - same format as Perplexity but with multiple pages content + import json + return AiModelResponse( + content=json.dumps(resultData, indent=2), + success=True, + metadata={"operation": "WEB_CRAWL", "url": webCrawlPrompt.url, "pagesCrawled": len(crawlResults) if crawlResults else 0} + ) + + except Exception as e: + logger.error(f"Error in Tavily web crawl: {str(e)}") + import json + errorResult = {"error": str(e), "url": webCrawlPrompt.url if 'webCrawlPrompt' in locals() else ""} + return AiModelResponse( + content=json.dumps(errorResult, indent=2), + success=False, + error=str(e) + ) diff --git a/modules/chat/documents/documentExtraction.py b/modules/chat/documents/documentExtraction.py deleted file mode 100644 index a304cbe3..00000000 --- a/modules/chat/documents/documentExtraction.py +++ /dev/null @@ -1,2035 +0,0 @@ -from typing import Dict, Any, List, Optional, Union, Tuple, TypedDict, Callable, Awaitable -import logging -import json -import os -import io -import base64 -from datetime import datetime, UTC -from pathlib import Path -import xml.etree.ElementTree as ET -from bs4 import BeautifulSoup -import uuid -from .documentUtility import ( - getFileExtension, - getMimeTypeFromExtension, - detectMimeTypeFromContent, - detectMimeTypeFromData, - convertDocumentDataToString -) - -from modules.interfaces.interfaceChatModel import ( - ExtractedContent, - ContentItem, - ContentMetadata -) -from modules.neutralizer.neutralizer import DataAnonymizer -from modules.shared.configuration import APP_CONFIG - -logger = logging.getLogger(__name__) - -# Optional imports - only loaded when needed -pdfExtractorLoaded = False -officeExtractorLoaded = False -imageProcessorLoaded = False - -class FileProcessingError(Exception): - """Custom exception for file processing errors.""" - pass - -class DocumentExtraction: - """Processor for handling document operations and content extraction.""" - - def __init__(self, serviceCenter=None): - """Initialize the document processor.""" - self._neutralizer = DataAnonymizer() if APP_CONFIG.get("ENABLE_CONTENT_NEUTRALIZATION", False) else None - self._serviceCenter = serviceCenter - - self.supportedTypes: Dict[str, Callable[[bytes, str, str], Awaitable[List[ContentItem]]]] = { - # Text and data files - 'text/plain': self._processText, - 'text/csv': self._processCsv, - 'application/json': self._processJson, - 'application/xml': self._processXml, - 'text/html': self._processHtml, - 'image/svg+xml': self._processSvg, - - # Programming languages - 'application/javascript': self._processText, - 'application/typescript': self._processText, - 'text/jsx': self._processText, - 'text/tsx': self._processText, - 'text/x-python': self._processText, - 'text/x-java-source': self._processText, - 'text/x-c': self._processText, - 'text/x-c++src': self._processText, - 'text/x-c++hdr': self._processText, - 'text/x-csharp': self._processText, - 'application/x-httpd-php': self._processText, - 'text/x-ruby': self._processText, - 'text/x-go': self._processText, - 'text/x-rust': self._processText, - 'text/x-swift': self._processText, - 'text/x-kotlin': self._processText, - 'text/x-scala': self._processText, - 'text/x-r': self._processText, - 'text/x-matlab': self._processText, - 'text/x-perl': self._processText, - 'application/x-sh': self._processText, - 'application/x-powershell': self._processText, - 'application/x-msdos-program': self._processText, - 'text/vbscript': self._processText, - 'text/x-lua': self._processText, - 'application/sql': self._processText, - 'application/dart': self._processText, - 'text/x-elm': self._processText, - 'text/x-clojure': self._processText, - 'text/x-haskell': self._processText, - 'text/x-fsharp': self._processText, - 'text/x-ocaml': self._processText, - - # Web technologies - 'text/css': self._processText, - 'text/x-scss': self._processText, - 'text/x-sass': self._processText, - 'text/x-less': self._processText, - 'text/x-vue': self._processText, - 'text/x-svelte': self._processText, - 'text/x-astro': self._processText, - - # Configuration and build files - 'application/x-yaml': self._processText, - 'application/toml': self._processText, - 'text/x-dockerfile': self._processText, - 'text/x-makefile': self._processText, - 'text/x-cmake': self._processText, - 'text/x-gradle': self._processText, - 'text/x-maven': self._processText, - - # Documentation and markup - 'text/markdown': self._processText, - 'text/x-rst': self._processText, - 'application/x-tex': self._processText, - 'text/x-bibtex': self._processText, - 'text/asciidoc': self._processText, - 'text/x-wiki': self._processText, - - # Images - 'image/jpeg': self._processImage, - 'image/png': self._processImage, - 'image/gif': self._processImage, - 'image/webp': self._processImage, - 'image/bmp': self._processImage, - 'image/tiff': self._processImage, - 'image/x-icon': self._processImage, - - # Documents - 'application/pdf': self._processPdf, - 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': self._processDocx, - 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': self._processXlsx, - 'application/vnd.openxmlformats-officedocument.presentationml.presentation': self._processPptx, - 'application/vnd.oasis.opendocument.text': self._processText, - 'application/vnd.oasis.opendocument.spreadsheet': self._processText, - 'application/vnd.oasis.opendocument.presentation': self._processText, - - # Legacy Office formats - 'application/msword': self._processLegacyDoc, - 'application/vnd.ms-excel': self._processLegacyXls, - 'application/vnd.ms-powerpoint': self._processLegacyPpt - } - - self.chunkSizes = { - "text": 40000, # General text content - "plain": 40000, # Plain text - "csv": 40000, # CSV data - "json": 40000, # JSON data - "xml": 40000, # XML data - "html": 40000, # HTML content - "markdown": 40000, # Markdown content - "code": 80000, # Programming code (increased for better preservation) - "script": 80000, # Script files (increased for better preservation) - "javascript": 80000, # JavaScript files specifically - "typescript": 80000, # TypeScript files specifically - "config": 40000, # Configuration files - "image": 1024 * 1024, # 1MB for images - "video": 5 * 1024 * 1024, # 5MB for video chunks - "binary": 1024 * 1024, # 1MB for binary data - "pdf": 40000, # PDF text content - "docx": 40000, # Word document text - "xlsx": 40000, # Excel data - "svg": 40000 # SVG content - } - - def _robustTextDecode(self, fileData: bytes, fileName: str = "unknown") -> str: - """ - Robustly decode text data with multiple encoding fallbacks. - - Args: - fileData: Raw bytes to decode - fileName: fileName for logging purposes - - Returns: - Decoded text string - - Raises: - FileProcessingError: If all decoding attempts fail - """ - # Try multiple encoding options in order of likelihood - encodings_to_try = ['utf-8', 'windows-1252', 'iso-8859-1', 'latin-1', 'cp1252'] - content = None - - # First try UTF-8 (most common) - try: - content = fileData.decode('utf-8') - - return content - except UnicodeDecodeError: - pass - - # Try other encodings - for encoding in encodings_to_try[1:]: - try: - content = fileData.decode(encoding) - - return content - except UnicodeDecodeError: - continue - - # If all encodings fail, try with error handling - try: - # Try with chardet for automatic detection - import chardet - detected = chardet.detect(fileData) - if detected['confidence'] > 0.7: - detected_encoding = detected['encoding'] - content = fileData.decode(detected_encoding, errors='replace') - - return content - else: - # Last resort: decode with replacement characters - content = fileData.decode('utf-8', errors='replace') - logger.warning(f"{fileName}: decoded with UTF-8 and replacement characters due to low encoding confidence") - return content - except ImportError: - # chardet not available, use replacement characters - content = fileData.decode('utf-8', errors='replace') - logger.warning(f"{fileName}: decoded with UTF-8 and replacement characters (chardet not available)") - return content - - # This should never be reached, but just in case - raise FileProcessingError(f"Failed to decode {fileName} with any encoding") - - def initialize(self) -> None: - """Initialize the document processor.""" - pass - - def _loadPdfExtractor(self): - """Loads PDF extraction libraries when needed""" - global pdfExtractorLoaded - if not pdfExtractorLoaded: - try: - global PyPDF2, fitz - import PyPDF2 - import fitz # PyMuPDF for more extensive PDF processing - pdfExtractorLoaded = True - logger.debug("PDF extraction libraries successfully loaded") - except ImportError as e: - logger.warning(f"PDF extraction libraries could not be loaded: {e}") - - def _loadOfficeExtractor(self): - """Loads Office document extraction libraries when needed""" - global officeExtractorLoaded - if not officeExtractorLoaded: - try: - global docx, openpyxl - import docx # python-docx for Word documents - import openpyxl # for Excel files - officeExtractorLoaded = True - logger.debug("Office extraction libraries successfully loaded") - except ImportError as e: - logger.warning(f"Office extraction libraries could not be loaded: {e}") - - def _loadImageProcessor(self): - """Loads image processing libraries when needed""" - global imageProcessorLoaded - if not imageProcessorLoaded: - try: - global PIL, Image - from PIL import Image - imageProcessorLoaded = True - logger.debug("Image processing libraries successfully loaded") - except ImportError as e: - logger.warning(f"Image processing libraries could not be loaded: {e}") - - - - async def processFileData(self, fileData: bytes, fileName: str, mimeType: str, base64Encoded: bool = False, prompt: str = None, documentId: str = None, enableAI: bool = True) -> ExtractedContent: - """ - Process file data directly and extract its contents with optional AI processing. - - Args: - fileData: Raw file data as bytes - fileName: Name of the file - mimeType: MIME type of the file - base64Encoded: Whether the data is base64 encoded - prompt: Prompt for AI content extraction - documentId: Optional document ID - enableAI: Whether to enable AI processing (default: True) - - Returns: - ExtractedContent containing the processed content - - Raises: - FileProcessingError: If document processing fails - """ - try: - # Decode base64 if needed - if base64Encoded: - fileData = base64.b64decode(fileData) - # Use documentUtility for mime type detection - if mimeType == "application/octet-stream": - mimeType = detectMimeTypeFromData(fileData, fileName, self._serviceCenter) - # Process document based on type - if mimeType not in self.supportedTypes: - contentItems = await self._processBinary(fileData, fileName, mimeType) - else: - processor = self.supportedTypes[mimeType] - contentItems = await processor(fileData, fileName, mimeType) - - # Process with AI if prompt provided and AI is enabled - if enableAI and prompt and contentItems: - try: - # Process each content item with AI - processedItems = await self._aiDataExtraction(contentItems, prompt) - contentItems = processedItems - except Exception as e: - logger.error(f"Error processing content with AI: {str(e)}") - elif not enableAI: - logger.debug(f"AI processing disabled for {fileName}, returning raw extracted content") - - return ExtractedContent( - id=documentId if documentId else str(uuid.uuid4()), - contents=contentItems - ) - - except Exception as e: - logger.error(f"Error processing file data: {str(e)}") - raise FileProcessingError(f"Failed to process file data: {str(e)}") - - - - async def _processText(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]: - """Process text document with robust encoding detection and complete content extraction""" - try: - content = self._robustTextDecode(fileData, fileName) - - # Validate that we got the complete content - if not content or len(content.strip()) == 0: - logger.warning(f"Empty content extracted from {fileName}") - return [ContentItem( - label="empty", - data="[Empty file or no readable content]", - metadata=ContentMetadata( - size=0, - pages=1, - mimeType="text/plain", - base64Encoded=False - ) - )] - - # Log content size for debugging - content_size = len(content.encode('utf-8')) - - - # Use documentUtility for mime type - mime_type = getMimeTypeFromExtension(getFileExtension(fileName)) - return [ContentItem( - label="main", - data=content, - metadata=ContentMetadata( - size=content_size, - pages=1, - mimeType=mime_type, - base64Encoded=False - ) - )] - except Exception as e: - logger.error(f"Error processing text document: {str(e)}") - raise FileProcessingError(f"Failed to process text document: {str(e)}") - - async def _processCsv(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]: - """Process CSV document with robust encoding detection""" - try: - content = self._robustTextDecode(fileData, fileName) - mime_type = getMimeTypeFromExtension(getFileExtension(fileName)) - return [ContentItem( - label="main", - data=content, - metadata=ContentMetadata( - size=len(content.encode('utf-8')), - pages=1, - mimeType=mime_type, - base64Encoded=False - ) - )] - except Exception as e: - logger.error(f"Error processing CSV document: {str(e)}") - raise FileProcessingError(f"Failed to process CSV document: {str(e)}") - - async def _processJson(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]: - """Process JSON document with robust encoding detection""" - try: - content = self._robustTextDecode(fileData, fileName) - jsonData = json.loads(content) - mime_type = getMimeTypeFromExtension(getFileExtension(fileName)) - return [ContentItem( - label="main", - data=content, - metadata=ContentMetadata( - size=len(content.encode('utf-8')), - pages=1, - mimeType=mime_type, - base64Encoded=False - ) - )] - except Exception as e: - logger.error(f"Error processing JSON document: {str(e)}") - raise FileProcessingError(f"Failed to process JSON document: {str(e)}") - - async def _processXml(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]: - """Process XML document with robust encoding detection""" - try: - content = self._robustTextDecode(fileData, fileName) - mime_type = getMimeTypeFromExtension(getFileExtension(fileName)) - return [ContentItem( - label="main", - data=content, - metadata=ContentMetadata( - size=len(content.encode('utf-8')), - pages=1, - mimeType=mime_type, - base64Encoded=False - ) - )] - except Exception as e: - logger.error(f"Error processing XML document: {str(e)}") - raise FileProcessingError(f"Failed to process XML document: {str(e)}") - - async def _processHtml(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]: - """Process HTML document with robust encoding detection""" - try: - content = self._robustTextDecode(fileData, fileName) - mime_type = getMimeTypeFromExtension(getFileExtension(fileName)) - return [ContentItem( - label="main", - data=content, - metadata=ContentMetadata( - size=len(content.encode('utf-8')), - pages=1, - mimeType=mime_type, - base64Encoded=False - ) - )] - except Exception as e: - logger.error(f"Error processing HTML document: {str(e)}") - raise FileProcessingError(f"Failed to process HTML document: {str(e)}") - - async def _processSvg(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]: - """Process SVG document with robust encoding detection and meaningful content extraction""" - try: - content = self._robustTextDecode(fileData, fileName) - - # Check if it's actually SVG content - if " List[ContentItem]: - """Process image document""" - try: - self._loadImageProcessor() - if not imageProcessorLoaded: - raise FileProcessingError("Image processing libraries not available") - - with io.BytesIO(fileData) as imgStream: - img = Image.open(imgStream) - - # For GIF files, provide descriptive information instead of AI processing - if mimeType == "image/gif": - try: - frame_count = getattr(img, 'n_frames', 1) - duration = getattr(img, 'duration', 0) - - # Create a descriptive text about the GIF - gif_description = f"GIF Image Analysis:\n" - gif_description += f"- Dimensions: {img.width} x {img.height} pixels\n" - gif_description += f"- Frame count: {frame_count}\n" - gif_description += f"- Color mode: {img.mode}\n" - if duration > 0: - gif_description += f"- Duration: {duration}ms\n" - gif_description += f"- File size: {len(fileData)} bytes\n" - gif_description += f"- Format: {img.format}\n\n" - gif_description += f"Note: This is an animated GIF image. The AI cannot directly analyze image content, but the file contains {frame_count} frame(s) of animation." - - return [ContentItem( - label="gif_analysis", - data=gif_description, - metadata=ContentMetadata( - size=len(gif_description.encode('utf-8')), - width=img.width, - height=img.height, - colorMode=img.mode, - mimeType="text/plain", - base64Encoded=False - ) - )] - except Exception as gifError: - logger.warning(f"GIF processing failed: {str(gifError)}") - # Fallback to basic description - pass - - metadata = ContentMetadata( - size=len(fileData), - width=img.width, - height=img.height, - colorMode=img.mode, - mimeType=mimeType, - base64Encoded=True - ) - - # Convert image to base64 for storage - imgStream.seek(0) - imgData = base64.b64encode(imgStream.read()).decode('utf-8') - - return [ContentItem( - label="image", - data=imgData, - metadata=metadata - )] - except Exception as e: - logger.error(f"Error processing image document: {str(e)}") - raise FileProcessingError(f"Failed to process image document: {str(e)}") - - async def _processPdf(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]: - """Process PDF document""" - try: - self._loadPdfExtractor() - if not pdfExtractorLoaded: - raise FileProcessingError("PDF extraction libraries not available") - - contentItems = [] - - with io.BytesIO(fileData) as pdfStream: - # Extract text with PyPDF2 - pdfReader = PyPDF2.PdfReader(pdfStream) - metadata = ContentMetadata( - size=len(fileData), - pages=len(pdfReader.pages), - mimeType="application/pdf", - base64Encoded=False - ) - - # Extract text from all pages - for pageNum in range(len(pdfReader.pages)): - page = pdfReader.pages[pageNum] - pageText = page.extract_text() - if pageText: - contentItems.append(ContentItem( - label=f"page_{pageNum + 1}", - data=pageText, - metadata=ContentMetadata( - size=len(pageText.encode('utf-8')), - pages=1, - mimeType="text/plain", - base64Encoded=False - ) - )) - - # Extract images with PyMuPDF - pdfStream.seek(0) - doc = fitz.open(stream=pdfStream, filetype="pdf") - for pageNum in range(len(doc)): - page = doc[pageNum] - for imgIndex, imgInfo in enumerate(page.get_images(full=True)): - try: - xref = imgInfo[0] - baseImage = doc.extract_image(xref) - if baseImage: - imageBytes = baseImage.get("image", b"") - imageExt = baseImage.get("ext", "png") - - if imageBytes: - contentItems.append(ContentItem( - label=f"image_{pageNum + 1}_{imgIndex}", - data=base64.b64encode(imageBytes).decode('utf-8'), - metadata=ContentMetadata( - size=len(imageBytes), - pages=1, - mimeType=f"image/{imageExt}", - base64Encoded=True - ) - )) - except Exception as imgE: - logger.warning(f"Error extracting image {imgIndex} on page {pageNum + 1}: {str(imgE)}") - - doc.close() - - return contentItems - except Exception as e: - logger.error(f"Error processing PDF document: {str(e)}") - raise FileProcessingError(f"Failed to process PDF document: {str(e)}") - - async def _processDocx(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]: - """Process Word document with enhanced formatting preservation""" - try: - self._loadOfficeExtractor() - if not officeExtractorLoaded: - raise FileProcessingError("Office extraction libraries not available") - - contentItems = [] - - with io.BytesIO(fileData) as docxStream: - doc = docx.Document(docxStream) - - # Extract document properties - doc_properties = [] - if doc.core_properties.title: - doc_properties.append(f"Title: {doc.core_properties.title}") - if doc.core_properties.author: - doc_properties.append(f"Author: {doc.core_properties.author}") - if doc.core_properties.subject: - doc_properties.append(f"Subject: {doc.core_properties.subject}") - if doc.core_properties.keywords: - doc_properties.append(f"Keywords: {doc.core_properties.keywords}") - if doc.core_properties.comments: - doc_properties.append(f"Comments: {doc.core_properties.comments}") - - # Extract main content with formatting - main_content = [] - - # Process paragraphs with formatting - for para in doc.paragraphs: - if para.text.strip(): - # Get paragraph style - style_name = para.style.name if para.style else "Normal" - - # Check for heading styles - if style_name.startswith('Heading'): - level = style_name.replace('Heading ', '') - main_content.append(f"\n{'#' * int(level)} {para.text}") - else: - # Check for bold, italic, underline formatting - formatted_text = para.text - if para.runs: - # Process individual runs for formatting - run_texts = [] - for run in para.runs: - run_text = run.text - if run.bold: - run_text = f"**{run_text}**" - if run.italic: - run_text = f"*{run_text}*" - if run.underline: - run_text = f"__{run_text}__" - run_texts.append(run_text) - formatted_text = ''.join(run_texts) - - main_content.append(formatted_text) - - # Extract tables with better formatting - table_count = 0 - for table in doc.tables: - table_count += 1 - main_content.append(f"\n\n--- Table {table_count} ---") - - # Get table headers (first row) - if table.rows: - header_row = table.rows[0] - headers = [cell.text.strip() for cell in header_row.cells] - main_content.append("| " + " | ".join(headers) + " |") - main_content.append("|" + "|".join(["---"] * len(headers)) + "|") - - # Process data rows - for row in table.rows[1:]: - row_data = [cell.text.strip() for cell in row.cells] - main_content.append("| " + " | ".join(row_data) + " |") - - main_content.append("--- End Table ---\n") - - # Extract headers and footers if available - try: - # Check for headers and footers in sections - for section in doc.sections: - # Header - if section.header: - header_text = [] - for para in section.header.paragraphs: - if para.text.strip(): - header_text.append(f"[Header] {para.text}") - if header_text: - main_content.insert(0, "\n".join(header_text) + "\n") - - # Footer - if section.footer: - footer_text = [] - for para in section.footer.paragraphs: - if para.text.strip(): - footer_text.append(f"[Footer] {para.text}") - if footer_text: - main_content.append("\n" + "\n".join(footer_text)) - except Exception as header_footer_error: - logger.debug(f"Could not extract headers/footers: {header_footer_error}") - - # Extract comments if available - try: - comments = [] - for comment in doc.part.comments_part.comments if doc.part.comments_part else []: - comment_text = comment.text.strip() - if comment_text: - comments.append(f"[Comment] {comment_text}") - - if comments: - main_content.append("\n\n--- Comments ---") - main_content.extend(comments) - main_content.append("--- End Comments ---") - except Exception as comment_error: - logger.debug(f"Could not extract comments: {comment_error}") - - # Combine all content - if doc_properties: - main_content.insert(0, "--- Document Properties ---\n" + "\n".join(doc_properties) + "\n--- End Properties ---\n") - - final_content = "\n".join(main_content) - - # Create main content item - contentItems.append(ContentItem( - label="main", - data=final_content, - metadata=ContentMetadata( - size=len(final_content.encode('utf-8')), - pages=len(doc.paragraphs), - mimeType="text/markdown", # Use markdown for better formatting - base64Encoded=False - ) - )) - - # Create separate content item for tables only (if tables exist) - if table_count > 0: - table_content = [] - for i, table in enumerate(doc.tables): - table_content.append(f"Table {i+1}:") - if table.rows: - # CSV format for tables - for row in table.rows: - row_data = [f'"{cell.text.strip()}"' for cell in row.cells] - table_content.append(",".join(row_data)) - table_content.append("") # Empty line between tables - - table_text = "\n".join(table_content) - contentItems.append(ContentItem( - label="tables", - data=table_text, - metadata=ContentMetadata( - size=len(table_text.encode('utf-8')), - pages=1, - mimeType="text/csv", - base64Encoded=False - ) - )) - - # Create separate content item for document structure - structure_info = [] - structure_info.append(f"Document Structure:") - structure_info.append(f"- Paragraphs: {len(doc.paragraphs)}") - structure_info.append(f"- Tables: {table_count}") - structure_info.append(f"- Sections: {len(doc.sections)}") - - # Count different paragraph styles - style_counts = {} - for para in doc.paragraphs: - style_name = para.style.name if para.style else "Normal" - style_counts[style_name] = style_counts.get(style_name, 0) + 1 - - for style, count in style_counts.items(): - structure_info.append(f"- {style}: {count}") - - structure_text = "\n".join(structure_info) - contentItems.append(ContentItem( - label="structure", - data=structure_text, - metadata=ContentMetadata( - size=len(structure_text.encode('utf-8')), - pages=1, - mimeType="text/plain", - base64Encoded=False - ) - )) - - return contentItems - - except Exception as e: - logger.error(f"Error processing Word document: {str(e)}") - raise FileProcessingError(f"Failed to process Word document: {str(e)}") - - async def _processXlsx(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]: - """Process Excel document with enhanced table extraction and metadata""" - try: - self._loadOfficeExtractor() - if not officeExtractorLoaded: - raise FileProcessingError("Office extraction libraries not available") - - contentItems = [] - - with io.BytesIO(fileData) as xlsxStream: - try: - workbook = openpyxl.load_workbook(xlsxStream, data_only=True) - - except Exception as load_error: - logger.error(f"Failed to load Excel workbook {fileName}: {str(load_error)}") - raise FileProcessingError(f"Failed to load Excel workbook: {str(load_error)}") - - # Extract workbook properties safely - workbook_props = [] - try: - if hasattr(workbook, 'properties'): - props = workbook.properties - - - # Log all available attributes for debugging - for attr in dir(props): - if not attr.startswith('_'): # Skip private attributes - try: - value = getattr(props, attr) - if value is not None: - pass - except Exception as attr_error: - logger.debug(f"Could not read property {attr}: {str(attr_error)}") - - # Check each property safely before accessing - if hasattr(props, 'title') and props.title: - workbook_props.append(f"Title: {props.title}") - if hasattr(props, 'creator') and props.creator: # 'creator' is the correct attribute - workbook_props.append(f"Author: {props.creator}") - if hasattr(props, 'subject') and props.subject: - workbook_props.append(f"Subject: {props.subject}") - if hasattr(props, 'keywords') and props.keywords: - workbook_props.append(f"Keywords: {props.keywords}") - if hasattr(props, 'comments') and props.comments: - workbook_props.append(f"Comments: {props.comments}") - if hasattr(props, 'category') and props.category: - workbook_props.append(f"Category: {props.category}") - if hasattr(props, 'description') and props.description: - workbook_props.append(f"Description: {props.description}") - if hasattr(props, 'lastModifiedBy') and props.lastModifiedBy: - workbook_props.append(f"Last Modified By: {props.lastModifiedBy}") - if hasattr(props, 'created') and props.created: - workbook_props.append(f"Created: {props.created}") - if hasattr(props, 'modified') and props.modified: - workbook_props.append(f"Modified: {props.modified}") - - # Try alternative property names that might exist - if hasattr(props, 'author') and props.author: # Some versions use 'author' - workbook_props.append(f"Author (alt): {props.author}") - if hasattr(props, 'manager') and props.manager: - workbook_props.append(f"Manager: {props.manager}") - if hasattr(props, 'company') and props.company: - workbook_props.append(f"Company: {props.company}") - if hasattr(props, 'status') and props.status: - workbook_props.append(f"Status: {props.status}") - if hasattr(props, 'revision') and props.revision: - workbook_props.append(f"Revision: {props.revision}") - - else: - # Try to find properties in other locations - for attr in dir(workbook): - if not attr.startswith('_') and 'prop' in attr.lower(): - pass - except Exception as props_error: - logger.warning(f"Could not extract workbook properties: {str(props_error)}") - workbook_props = [] - - # Create workbook overview content item - overview_content = [] - overview_content.append("Excel Workbook Overview") - overview_content.append("=" * 30) - overview_content.append(f"Total Sheets: {len(workbook.sheetnames)}") - overview_content.append(f"Sheet Names: {', '.join(workbook.sheetnames)}") - - if workbook_props: - overview_content.append("\nWorkbook Properties:") - overview_content.extend(workbook_props) - - overview_text = "\n".join(overview_content) - contentItems.append(ContentItem( - label="overview", - data=overview_text, - metadata=ContentMetadata( - size=len(overview_text.encode('utf-8')), - pages=1, - mimeType="text/plain", - base64Encoded=False - ) - )) - - # Process each sheet - for sheetIndex, sheetName in enumerate(workbook.sheetnames): - try: - sheet = workbook[sheetName] - logger.debug(f"Processing sheet {sheetIndex + 1}: {sheetName}") - - # Get sheet metadata - sheet_metadata = [] - sheet_metadata.append(f"Sheet: {sheetName}") - - try: - sheet_metadata.append(f"Dimensions: {sheet.dimensions}") - sheet_metadata.append(f"Max Row: {sheet.max_row}") - sheet_metadata.append(f"Max Column: {sheet.max_column}") - except Exception as dim_error: - logger.warning(f"Could not get sheet dimensions for {sheetName}: {str(dim_error)}") - sheet_metadata.append("Dimensions: Unable to determine") - sheet_metadata.append("Max Row: Unknown") - sheet_metadata.append("Max Column: Unknown") - - # Check for sheet properties safely - try: - if hasattr(sheet, 'sheet_properties'): - sheet_props = sheet.sheet_properties - if hasattr(sheet_props, 'tabColor') and sheet_props.tabColor: - sheet_metadata.append(f"Tab Color: {sheet_props.tabColor}") - if hasattr(sheet_props, 'hidden') and sheet_props.hidden: - sheet_metadata.append("Hidden: Yes") - if hasattr(sheet_props, 'name') and sheet_props.name: - sheet_metadata.append(f"Internal Name: {sheet_props.name}") - except Exception as sheet_props_error: - logger.debug(f"Could not extract sheet properties for {sheetName}: {str(sheet_props_error)}") - - # Extract data from sheet - sheet_data = [] - - try: - # Find the actual data range (skip empty rows/columns) - min_row = sheet.min_row - max_row = sheet.max_row - min_col = sheet.min_column - max_col = sheet.max_column - - # Adjust for empty sheets - if max_row == 0 or max_col == 0: - sheet_metadata.append("Content: Empty sheet") - sheet_data.append("(Empty sheet)") - else: - # Extract all data with proper CSV formatting - for row_num in range(min_row, max_row + 1): - row_data = [] - for col_num in range(min_col, max_col + 1): - try: - cell = sheet.cell(row=row_num, column=col_num) - cell_value = cell.value - - # Handle different data types - if cell_value is None: - row_data.append("") - elif isinstance(cell_value, (int, float)): - row_data.append(str(cell_value)) - elif isinstance(cell_value, datetime): - row_data.append(cell_value.strftime("%Y-%m-%d %H:%M:%S")) - else: - # Escape quotes and wrap in quotes for CSV - cell_str = str(cell_value).replace('"', '""') - row_data.append(f'"{cell_str}"') - except Exception as cell_error: - logger.debug(f"Error processing cell at row {row_num}, col {col_num}: {str(cell_error)}") - row_data.append("(Error reading cell)") - - sheet_data.append(",".join(row_data)) - - sheet_metadata.append(f"Data Rows: {len(sheet_data)}") - sheet_metadata.append(f"Data Columns: {max_col - min_col + 1}") - except Exception as data_error: - logger.warning(f"Could not extract data from sheet {sheetName}: {str(data_error)}") - sheet_metadata.append("Content: Error extracting data") - sheet_data.append(f"(Error: {str(data_error)})") - - # Create sheet content item - sheet_content = "\n".join(sheet_metadata) + "\n\n" + "\n".join(sheet_data) - contentItems.append(ContentItem( - label=f"sheet_{sheetIndex + 1}_{sheetName}", - data=sheet_content, - metadata=ContentMetadata( - size=len(sheet_content.encode('utf-8')), - pages=1, - mimeType="text/csv", - base64Encoded=False - ) - )) - - # Create separate CSV file for each sheet (clean format) - if sheet_data and sheet_data[0].strip() and not sheet_data[0].startswith("(Error"): - # Create clean CSV without metadata - csv_content = "\n".join(sheet_data) - contentItems.append(ContentItem( - label=f"csv_{sheetIndex + 1}_{sheetName}", - data=csv_content, - metadata=ContentMetadata( - size=len(csv_content.encode('utf-8')), - pages=1, - mimeType="text/csv", - base64Encoded=False - ) - )) - - except Exception as sheet_error: - logger.error(f"Error processing sheet {sheetName}: {str(sheet_error)}") - # Create error content item for this sheet - error_content = f"Error processing sheet: {sheetName}\nError: {str(sheet_error)}" - contentItems.append(ContentItem( - label=f"error_sheet_{sheetIndex + 1}_{sheetName}", - data=error_content, - metadata=ContentMetadata( - size=len(error_content.encode('utf-8')), - pages=1, - mimeType="text/plain", - base64Encoded=False - ) - )) - - # Create summary content item - try: - summary_content = [] - summary_content.append("Excel Processing Summary") - summary_content.append("=" * 30) - summary_content.append(f"Total Sheets Processed: {len(workbook.sheetnames)}") - - total_rows = 0 - total_cells = 0 - for sheetName in workbook.sheetnames: - try: - sheet = workbook[sheetName] - if hasattr(sheet, 'max_row') and hasattr(sheet, 'max_column'): - if sheet.max_row > 0 and sheet.max_column > 0: - sheet_rows = sheet.max_row - sheet_cells = sheet.max_row * sheet.max_column - total_rows += sheet_rows - total_cells += sheet_cells - summary_content.append(f"- {sheetName}: {sheet_rows} rows, {sheet_cells} cells") - except Exception as summary_error: - logger.debug(f"Could not get summary for sheet {sheetName}: {str(summary_error)}") - summary_content.append(f"- {sheetName}: Error getting summary") - - summary_content.append(f"\nTotal Rows: {total_rows}") - summary_content.append(f"Total Cells: {total_cells}") - - summary_text = "\n".join(summary_content) - contentItems.append(ContentItem( - label="summary", - data=summary_text, - metadata=ContentMetadata( - size=len(summary_text.encode('utf-8')), - pages=1, - mimeType="text/plain", - base64Encoded=False - ) - )) - except Exception as summary_error: - logger.warning(f"Could not create summary: {str(summary_error)}") - - return contentItems - - except Exception as e: - logger.error(f"Error processing Excel document: {str(e)}") - raise FileProcessingError(f"Failed to process Excel document: {str(e)}") - - async def _processLegacyDoc(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]: - """Process legacy Word .doc document""" - try: - # Try to use antiword or similar tools for .doc files - # For now, we'll provide a basic binary extraction with metadata - contentItems = [] - - # Create a basic content item explaining the limitation - info_content = f"""Legacy Word Document (.doc) - {fileName} - -Note: This is a legacy .doc format file. For better content extraction, -consider converting to .docx format. - -File size: {len(fileData)} bytes -Format: Microsoft Word 97-2003 Document - -Content extraction from .doc files requires specialized tools like: -- antiword (Linux/Unix) -- catdoc (Linux/Unix) -- Microsoft Word (for conversion) - -The raw binary content is available but not human-readable.""" - - contentItems.append(ContentItem( - label="info", - data=info_content, - metadata=ContentMetadata( - size=len(info_content.encode('utf-8')), - pages=1, - mimeType="text/plain", - base64Encoded=False - ) - )) - - # Also provide the binary content for potential processing - contentItems.append(ContentItem( - label="binary", - data=base64.b64encode(fileData).decode('utf-8'), - metadata=ContentMetadata( - size=len(fileData), - mimeType=mimeType, - base64Encoded=True - ) - )) - - return contentItems - - except Exception as e: - logger.error(f"Error processing legacy Word document: {str(e)}") - raise FileProcessingError(f"Failed to process legacy Word document: {str(e)}") - - async def _processLegacyXls(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]: - """Process legacy Excel .xls document""" - try: - # Try to use xlrd or similar tools for .xls files - # For now, we'll provide a basic binary extraction with metadata - contentItems = [] - - # Create a basic content item explaining the limitation - info_content = f"""Legacy Excel Document (.xls) - {fileName} - -Note: This is a legacy .xls format file. For better content extraction, -consider converting to .xlsx format. - -File size: {len(fileData)} bytes -Format: Microsoft Excel 97-2003 Workbook - -Content extraction from .xls files requires specialized tools like: -- xlrd (Python library) -- Microsoft Excel (for conversion) -- LibreOffice (for conversion) - -The raw binary content is available but not human-readable.""" - - contentItems.append(ContentItem( - label="info", - data=info_content, - metadata=ContentMetadata( - size=len(info_content.encode('utf-8')), - pages=1, - mimeType="text/plain", - base64Encoded=False - ) - )) - - # Also provide the binary content for potential processing - contentItems.append(ContentItem( - label="binary", - data=base64.b64encode(fileData).decode('utf-8'), - metadata=ContentMetadata( - size=len(fileData), - mimeType=mimeType, - base64Encoded=True - ) - )) - - return contentItems - - except Exception as e: - logger.error(f"Error processing legacy Excel document: {str(e)}") - raise FileProcessingError(f"Failed to process legacy Excel document: {str(e)}") - - async def _processLegacyPpt(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]: - """Process legacy PowerPoint .ppt document""" - try: - # Try to use python-pptx or similar tools for .ppt files - # For now, we'll provide a basic binary extraction with metadata - contentItems = [] - - # Create a basic content item explaining the limitation - info_content = f"""Legacy PowerPoint Document (.ppt) - {fileName} - -Note: This is a legacy .ppt format file. For better content extraction, -consider converting to .pptx format. - -File size: {len(fileData)} bytes -Format: Microsoft PowerPoint 97-2003 Presentation - -Content extraction from .ppt files requires specialized tools like: -- python-pptx (limited support for .ppt) -- Microsoft PowerPoint (for conversion) -- LibreOffice (for conversion) - -The raw binary content is available but not human-readable.""" - - contentItems.append(ContentItem( - label="info", - data=info_content, - metadata=ContentMetadata( - size=len(info_content.encode('utf-8')), - pages=1, - mimeType="text/plain", - base64Encoded=False - ) - )) - - # Also provide the binary content for potential processing - contentItems.append(ContentItem( - label="binary", - data=base64.b64encode(fileData).decode('utf-8'), - metadata=ContentMetadata( - size=len(fileData), - mimeType=mimeType, - base64Encoded=True - ) - )) - - return contentItems - - except Exception as e: - logger.error(f"Error processing legacy PowerPoint document: {str(e)}") - raise FileProcessingError(f"Failed to process legacy PowerPoint document: {str(e)}") - - async def _processPptx(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]: - """Process PowerPoint document""" - try: - self._loadOfficeExtractor() - if not officeExtractorLoaded: - raise FileProcessingError("Office extraction libraries not available") - - contentItems = [] - - try: - # Try to use python-pptx for PowerPoint processing - from pptx import Presentation - - with io.BytesIO(fileData) as pptxStream: - prs = Presentation(pptxStream) - - for slideNum, slide in enumerate(prs.slides): - slideText = [] - - # Extract text from shapes - for shape in slide.shapes: - if hasattr(shape, "text") and shape.text: - slideText.append(shape.text) - - # Extract text from text boxes - for shape in slide.shapes: - if shape.has_text_frame: - for paragraph in shape.text_frame.paragraphs: - if paragraph.text: - slideText.append(paragraph.text) - - if slideText: - content = "\n".join(slideText) - contentItems.append(ContentItem( - label=f"slide_{slideNum + 1}", - data=content, - metadata=ContentMetadata( - size=len(content.encode('utf-8')), - pages=1, - mimeType="text/plain", - base64Encoded=False - ) - )) - - if not contentItems: - # Fallback: treat as binary if no text extracted - contentItems.append(ContentItem( - label="presentation", - data=base64.b64encode(fileData).decode('utf-8'), - metadata=ContentMetadata( - size=len(fileData), - pages=len(prs.slides) if hasattr(prs, 'slides') else 1, - mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation", - base64Encoded=True - ) - )) - - except ImportError: - # python-pptx not available, treat as binary - contentItems.append(ContentItem( - label="presentation", - data=base64.b64encode(fileData).decode('utf-8'), - metadata=ContentMetadata( - size=len(fileData), - pages=1, - mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation", - base64Encoded=True - ) - )) - - return contentItems - - except Exception as e: - logger.error(f"Error processing PowerPoint document: {str(e)}") - raise FileProcessingError(f"Failed to process PowerPoint document: {str(e)}") - - async def _processBinary(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]: - """Process binary document""" - try: - return [ContentItem( - label="binary", - data=base64.b64encode(fileData).decode('utf-8'), - metadata=ContentMetadata( - size=len(fileData), - mimeType=mimeType, - base64Encoded=True, - error="Unsupported file type" - ) - )] - except Exception as e: - logger.error(f"Error processing binary document: {str(e)}") - raise FileProcessingError(f"Failed to process binary document: {str(e)}") - - async def _aiDataExtraction(self, contentItems: List[ContentItem], prompt: str) -> List[ContentItem]: - """ - Process content items with AI, handling chunking based on content type. - - Args: - contentItems: List of content items to process - prompt: Prompt for AI content extraction - - Returns: - List of processed content items - """ - processedItems = [] - - for item in contentItems: - try: - # Get content type from metadata - mimeType = item.metadata.mimeType if hasattr(item.metadata, 'mimeType') else "text/plain" - - - # Chunk content based on type - if mimeType.startswith('text/'): - chunks = self._chunkText(item.data, mimeType) - elif mimeType == "image/svg+xml": - # SVG files are XML, treat as text - chunks = self._chunkXml(item.data) - elif mimeType.startswith('image/'): - # Images should not be chunked - process as single unit - chunks = [item.data] - elif mimeType == "application/pdf": - chunks = self._chunkPdf(item.data) - elif mimeType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": - chunks = self._chunkDocx(item.data) - elif mimeType == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": - chunks = self._chunkXlsx(item.data) - elif mimeType.startswith('application/vnd.openxmlformats-officedocument.presentationml.presentation'): - chunks = self._chunkPptx(item.data) - elif mimeType.startswith('text/x-') or mimeType.startswith('application/') and any(keyword in mimeType for keyword in ['script', 'code', 'source', 'yaml', 'toml', 'dockerfile', 'makefile', 'cmake', 'gradle', 'maven']): - # Programming languages, configuration files, and build files - chunks = self._chunkCode(item.data) - else: - # Binary data - no chunking - chunks = [item.data] - - # Process each chunk - chunkResults = [] - for chunk in chunks: - # Process with AI based on content type - try: - if mimeType.startswith('image/') and mimeType != "image/svg+xml": - # For images (excluding SVG), extract meaningful content as text - # Use AI to analyze the image and extract relevant information - - - # Create a specific prompt for image content extraction - imagePrompt = f""" - Analyze this image and extract the actual content and information from it. - Focus on extracting text, data, charts, diagrams, or any meaningful content. - If there's text in the image, extract it. If there are charts or diagrams, describe the data. - Return the extracted content in a clear, structured text format. - - Original prompt: {prompt} - """ - - processedContent = await self._serviceCenter.callAiImageBasic(imagePrompt, chunk, mimeType) - else: - # For text content (including SVG), use text AI service - # Neutralize content if neutralizer is enabled (only for text) - contentToProcess = chunk - if self._neutralizer and contentToProcess: - contentToProcess = self._neutralizer.neutralize(contentToProcess) - - # Create AI prompt for text content - aiPrompt = f""" - Extract relevant information from this content based on the following prompt: - - PROMPT: {prompt} - - CONTENT: - {contentToProcess} - - Return ONLY the extracted information in a clear, concise format. - """ - - # Special handling for JavaScript and other code files - preserve complete content - if mimeType == "application/javascript" or mimeType == "application/typescript" or mimeType.startswith("text/x-") or any(keyword in mimeType for keyword in ['script', 'code', 'source']): - # For code files, preserve the complete content without AI processing - processedContent = contentToProcess - else: - processedContent = await self._serviceCenter.callAiTextBasic(aiPrompt, contentToProcess) - - chunkResults.append(processedContent) - except Exception as aiError: - logger.error(f"AI processing failed for chunk: {str(aiError)}") - # For non-text content, don't fallback to binary data - if mimeType.startswith('image/') or mimeType.startswith('video/') or mimeType.startswith('audio/'): - logger.warning(f"Skipping binary content fallback for {mimeType}") - continue # Skip this chunk entirely - else: - # Only fallback to original content for text-based formats - chunkResults.append(chunk) - - # Combine chunk results - if chunkResults: - # For text content, combine all chunks - if (mimeType.startswith('text/') or - mimeType in ["application/pdf", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "application/vnd.openxmlformats-officedocument.presentationml.presentation"] or - mimeType.startswith('text/x-') or - mimeType.startswith('application/') and any(keyword in mimeType for keyword in ['script', 'code', 'source', 'yaml', 'toml', 'dockerfile', 'makefile', 'cmake', 'gradle', 'maven', 'javascript', 'typescript', 'sql', 'dart'])): - combinedResult = "\n".join(chunkResults) - else: - # For binary content, use the first result - combinedResult = chunkResults[0] - else: - # No chunks processed, use original content - combinedResult = item.data - - # Only add processed item if we have results - if combinedResult and combinedResult.strip(): - processedItems.append(ContentItem( - label=item.label, - data=combinedResult, - metadata=ContentMetadata( - size=len(combinedResult.encode('utf-8')), - pages=item.metadata.pages if hasattr(item.metadata, 'pages') else 1, - mimeType=item.metadata.mimeType if hasattr(item.metadata, 'mimeType') else "text/plain", - base64Encoded=item.metadata.base64Encoded if hasattr(item.metadata, 'base64Encoded') else False - ) - )) - else: - logger.warning(f"No processed content available for {item.label}, skipping item") - - except Exception as e: - logger.error(f"Error processing content chunk: {str(e)}") - # Add original content if processing fails - processedItems.append(item) - - return processedItems - - - - def _chunkText(self, content: str, mimeType: str) -> List[str]: - """Chunk text content based on mime type""" - if mimeType == "text/plain": - return self._chunkPlainText(content) - elif mimeType == "text/csv": - return self._chunkCsv(content) - elif mimeType == "application/json": - return self._chunkJson(content) - elif mimeType == "application/xml": - return self._chunkXml(content) - elif mimeType == "text/html": - return self._chunkHtml(content) - elif mimeType == "text/markdown" or mimeType == "text/x-rst" or mimeType == "text/x-wiki": - return self._chunkMarkdown(content) - elif mimeType == "application/javascript" or mimeType == "application/typescript": - # JavaScript and TypeScript files get special handling - return self._chunkJavaScript(content) - elif mimeType.startswith("text/x-") or mimeType.startswith("application/") and any(keyword in mimeType for keyword in ['script', 'code', 'source', 'yaml', 'toml', 'dockerfile', 'makefile', 'cmake', 'gradle', 'maven']): - # Programming languages, configuration files, and build files - return self._chunkCode(content) - elif mimeType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": - # Word documents with markdown formatting - return self._chunkWordDocument(content) - elif mimeType == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": - # Excel documents with structured data - return self._chunkExcelDocument(content) - else: - return self._chunkPlainText(content) - - def _chunkPlainText(self, content: str) -> List[str]: - """Chunk plain text content""" - chunks = [] - currentChunk = [] - currentSize = 0 - - for line in content.split('\n'): - lineSize = len(line.encode('utf-8')) - if currentSize + lineSize > self.chunkSizes["plain"]: - if currentChunk: - chunks.append('\n'.join(currentChunk)) - currentChunk = [line] - currentSize = lineSize - else: - currentChunk.append(line) - currentSize += lineSize - - if currentChunk: - chunks.append('\n'.join(currentChunk)) - - return chunks - - def _chunkCsv(self, content: str) -> List[str]: - """Chunk CSV content""" - chunks = [] - currentChunk = [] - currentSize = 0 - - for line in content.split('\n'): - lineSize = len(line.encode('utf-8')) - if currentSize + lineSize > self.chunkSizes["csv"]: - if currentChunk: - chunks.append('\n'.join(currentChunk)) - currentChunk = [line] - currentSize = lineSize - else: - currentChunk.append(line) - currentSize += lineSize - - if currentChunk: - chunks.append('\n'.join(currentChunk)) - - return chunks - - def _chunkJson(self, content: str) -> List[str]: - """Chunk JSON content""" - try: - data = json.loads(content) - chunks = [] - currentChunk = [] - currentSize = 0 - - def processValue(value, path=""): - nonlocal currentChunk, currentSize - valueStr = json.dumps({path: value}) if path else json.dumps(value) - valueSize = len(valueStr.encode('utf-8')) - - if currentSize + valueSize > self.chunkSizes["json"]: - if currentChunk: - chunks.append(json.dumps(currentChunk)) - currentChunk = [value] - currentSize = valueSize - else: - currentChunk.append(value) - currentSize += valueSize - - if isinstance(data, list): - for i, item in enumerate(data): - processValue(item, str(i)) - elif isinstance(data, dict): - for key, value in data.items(): - processValue(value, key) - else: - processValue(data) - - if currentChunk: - chunks.append(json.dumps(currentChunk)) - - return chunks - except json.JSONDecodeError: - return [content] - - def _chunkXml(self, content: str) -> List[str]: - """Chunk XML content""" - try: - root = ET.fromstring(content) - chunks = [] - currentChunk = [] - currentSize = 0 - - def processElement(element, path=""): - nonlocal currentChunk, currentSize - elementStr = ET.tostring(element, encoding='unicode') - elementSize = len(elementStr.encode('utf-8')) - - if currentSize + elementSize > self.chunkSizes["xml"]: - if currentChunk: - chunks.append(''.join(currentChunk)) - currentChunk = [elementStr] - currentSize = elementSize - else: - currentChunk.append(elementStr) - currentSize += elementSize - - for child in root: - processElement(child) - - if currentChunk: - chunks.append(''.join(currentChunk)) - - return chunks - except ET.ParseError: - return [content] - - def _chunkHtml(self, content: str) -> List[str]: - """Chunk HTML content with improved semantic chunking""" - try: - soup = BeautifulSoup(content, 'html.parser') - chunks = [] - currentChunk = [] - currentSize = 0 - - # Use smaller chunk size for HTML to avoid token limits - html_chunk_size = min(self.chunkSizes["html"], 15000) # Max 15KB per chunk - - def processElement(element): - nonlocal currentChunk, currentSize - elementStr = str(element) - elementSize = len(elementStr.encode('utf-8')) - - # If element is too large, split it - if elementSize > html_chunk_size: - # Split large elements by their content - if hasattr(element, 'get_text'): - text_content = element.get_text(separator='\n', strip=True) - if text_content: - # Split text content into smaller chunks - text_chunks = self._chunkTextBySize(text_content, html_chunk_size) - for text_chunk in text_chunks: - if currentChunk: - chunks.append(''.join(currentChunk)) - currentChunk = [f"<{element.name}>{text_chunk}"] - currentSize = len(currentChunk[0].encode('utf-8')) - else: - # For elements without text, just add them - if currentChunk: - chunks.append(''.join(currentChunk)) - currentChunk = [elementStr] - currentSize = elementSize - elif currentSize + elementSize > html_chunk_size: - if currentChunk: - chunks.append(''.join(currentChunk)) - currentChunk = [elementStr] - currentSize = elementSize - else: - currentChunk.append(elementStr) - currentSize += elementSize - - # Process elements in order of importance - for element in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']): - processElement(element) - - for element in soup.find_all(['p', 'div', 'section', 'article']): - processElement(element) - - for element in soup.find_all(['ul', 'ol', 'table']): - processElement(element) - - # Process remaining elements - for element in soup.find_all(): - if element.name not in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'div', 'section', 'article', 'ul', 'ol', 'table']: - processElement(element) - - if currentChunk: - chunks.append(''.join(currentChunk)) - - return chunks - except Exception: - return [content] - - def _chunkTextBySize(self, text: str, max_size: int) -> List[str]: - """Helper method to chunk text by size""" - chunks = [] - current_chunk = "" - - for line in text.split('\n'): - line_size = len(line.encode('utf-8')) - if len(current_chunk.encode('utf-8')) + line_size > max_size: - if current_chunk: - chunks.append(current_chunk.strip()) - current_chunk = line - else: - current_chunk += "\n" + line if current_chunk else line - - if current_chunk: - chunks.append(current_chunk.strip()) - - return chunks - - def _chunkMarkdown(self, content: str) -> List[str]: - """Chunk Markdown content""" - chunks = [] - currentChunk = [] - currentSize = 0 - - # Split by headers, lists, and code blocks - # This is a simplified approach; a more robust solution would involve a proper Markdown parser - lines = content.split('\n') - for line in lines: - lineSize = len(line.encode('utf-8')) - if currentSize + lineSize > self.chunkSizes["text"]: # Use "text" chunk size for Markdown - if currentChunk: - chunks.append('\n'.join(currentChunk)) - currentChunk = [line] - currentSize = lineSize - else: - currentChunk.append(line) - currentSize += lineSize - - if currentChunk: - chunks.append('\n'.join(currentChunk)) - - return chunks - - def _chunkCode(self, content: str) -> List[str]: - """Chunk code content with optimized chunking for programming languages""" - chunks = [] - currentChunk = [] - currentSize = 0 - - # Use larger chunk size for code to minimize unnecessary splitting - # Code files often have long lines and complex structures - code_chunk_size = min(self.chunkSizes["code"], 80000) # Max 80KB per chunk for code - - # Split by lines to preserve code structure - lines = content.split('\n') - for line in lines: - lineSize = len(line.encode('utf-8')) - if currentSize + lineSize > code_chunk_size: - if currentChunk: - chunks.append('\n'.join(currentChunk)) - currentChunk = [line] - currentSize = lineSize - else: - currentChunk.append(line) - currentSize += lineSize - - if currentChunk: - chunks.append('\n'.join(currentChunk)) - - return chunks - - def _chunkJavaScript(self, content: str) -> List[str]: - """Chunk JavaScript content with optimized chunking for JavaScript files""" - chunks = [] - currentChunk = [] - currentSize = 0 - - # Use larger chunk size for JavaScript to minimize unnecessary splitting - # JavaScript files often have long lines and complex structures - js_chunk_size = min(self.chunkSizes["javascript"], 80000) # Max 80KB per chunk for JavaScript - - # Split by lines to preserve code structure - lines = content.split('\n') - for line in lines: - lineSize = len(line.encode('utf-8')) - if currentSize + lineSize > js_chunk_size: - if currentChunk: - chunks.append('\n'.join(currentChunk)) - currentChunk = [line] - currentSize = lineSize - else: - currentChunk.append(line) - currentSize += lineSize - - if currentChunk: - chunks.append('\n'.join(currentChunk)) - - return chunks - - def _chunkBinary(self, content: str) -> List[str]: - """Chunk binary content""" - try: - # Check if content is base64 encoded or plain text - try: - # Try to decode as base64 - binaryData = base64.b64decode(content) - # If successful, it's base64 - chunk the binary data - chunks = [] - chunkSize = self.chunkSizes["binary"] - - for i in range(0, len(binaryData), chunkSize): - chunk = binaryData[i:i + chunkSize] - chunks.append(base64.b64encode(chunk).decode('utf-8')) - - return chunks - except Exception: - # If base64 decoding fails, treat as text and chunk by lines - lines = content.split('\n') - chunks = [] - currentChunk = [] - currentSize = 0 - - for line in lines: - lineSize = len(line.encode('utf-8')) - if currentSize + lineSize > self.chunkSizes["binary"]: - if currentChunk: - chunks.append('\n'.join(currentChunk)) - currentChunk = [line] - currentSize = lineSize - else: - currentChunk.append(line) - currentSize += lineSize - - if currentChunk: - chunks.append('\n'.join(currentChunk)) - - return chunks - except Exception: - return [content] - - async def _chunkPdf(self, content: str) -> List[str]: - """Chunk PDF content""" - try: - # Content is already text from _processPdf, not base64 - # Split by lines to create chunks - lines = content.split('\n') - chunks = [] - currentChunk = [] - currentSize = 0 - - for line in lines: - lineSize = len(line.encode('utf-8')) - if currentSize + lineSize > self.chunkSizes["pdf"]: - if currentChunk: - chunks.append('\n'.join(currentChunk)) - currentChunk = [line] - currentSize = lineSize - else: - currentChunk.append(line) - currentSize += lineSize - - if currentChunk: - chunks.append('\n'.join(currentChunk)) - - return chunks - except Exception: - return [content] - - async def _chunkDocx(self, content: str) -> List[str]: - """Chunk Word document content""" - try: - # Content is already text from _processDocx, not base64 - # Split by lines to create chunks - lines = content.split('\n') - chunks = [] - currentChunk = [] - currentSize = 0 - - for line in lines: - lineSize = len(line.encode('utf-8')) - if currentSize + lineSize > self.chunkSizes["docx"]: - if currentChunk: - chunks.append('\n'.join(currentChunk)) - currentChunk = [line] - currentSize = lineSize - else: - currentChunk.append(line) - currentSize += lineSize - - if currentChunk: - chunks.append('\n'.join(currentChunk)) - - return chunks - except Exception: - return [content] - - async def _chunkXlsx(self, content: str) -> List[str]: - """Chunk Excel document content""" - try: - # Content is already text (CSV format) from _processXlsx, not base64 - # Split by lines to create chunks - lines = content.split('\n') - chunks = [] - currentChunk = [] - currentSize = 0 - - for line in lines: - lineSize = len(line.encode('utf-8')) - if currentSize + lineSize > self.chunkSizes["xlsx"]: - if currentChunk: - chunks.append('\n'.join(currentChunk)) - currentChunk = [line] - currentSize = lineSize - else: - currentChunk.append(line) - currentSize += lineSize - - if currentChunk: - chunks.append('\n'.join(currentChunk)) - - return chunks - except Exception: - return [content] - - async def _chunkPptx(self, content: str) -> List[str]: - """Chunk PowerPoint document content""" - try: - # Content is already text from PowerPoint processing, not base64 - # Split by lines to create chunks - lines = content.split('\n') - chunks = [] - currentChunk = [] - currentSize = 0 - - for line in lines: - lineSize = len(line.encode('utf-8')) - if currentSize + lineSize > self.chunkSizes["pptx"]: - if currentChunk: - chunks.append('\n'.join(currentChunk)) - currentChunk = [line] - currentSize = lineSize - else: - currentChunk.append(line) - currentSize += lineSize - - if currentChunk: - chunks.append('\n'.join(currentChunk)) - - return chunks - except Exception: - return [content] - - def _chunkWordDocument(self, content: str) -> List[str]: - """Chunk Word document content with markdown formatting preservation""" - chunks = [] - currentChunk = [] - currentSize = 0 - - # Use larger chunk size for Word documents to preserve formatting - word_chunk_size = min(self.chunkSizes["docx"], 60000) # Max 60KB per chunk - - # Split by lines to preserve document structure - lines = content.split('\n') - for line in lines: - lineSize = len(line.encode('utf-8')) - - # Check if adding this line would exceed chunk size - if currentSize + lineSize > word_chunk_size: - if currentChunk: - chunks.append('\n'.join(currentChunk)) - currentChunk = [line] - currentSize = lineSize - else: - currentChunk.append(line) - currentSize += lineSize - - # Add the last chunk if it exists - if currentChunk: - chunks.append('\n'.join(currentChunk)) - - return chunks - - def _chunkExcelDocument(self, content: str) -> List[str]: - """Chunk Excel document content with data structure preservation""" - chunks = [] - currentChunk = [] - currentSize = 0 - - # Use larger chunk size for Excel documents to preserve table structure - excel_chunk_size = min(self.chunkSizes["xlsx"], 80000) # Max 80KB per chunk - - # Split by lines to preserve CSV structure - lines = content.split('\n') - for line in lines: - lineSize = len(line.encode('utf-8')) - - # Check if adding this line would exceed chunk size - if currentSize + lineSize > excel_chunk_size: - if currentChunk: - chunks.append('\n'.join(currentChunk)) - currentChunk = [line] - currentSize = lineSize - else: - currentChunk.append(line) - currentSize += lineSize - - # Add the last chunk if it exists - if currentChunk: - chunks.append('\n'.join(currentChunk)) - - return chunks - - \ No newline at end of file diff --git a/modules/chat/documents/documentGeneration.py b/modules/chat/documents/documentGeneration.py deleted file mode 100644 index a5a9ae59..00000000 --- a/modules/chat/documents/documentGeneration.py +++ /dev/null @@ -1,157 +0,0 @@ -import logging -from typing import Any, Dict, List, Optional -from datetime import datetime, UTC -import re -from modules.shared.timezoneUtils import get_utc_timestamp -from .documentUtility import ( - getFileExtension, - getMimeTypeFromExtension, - detectMimeTypeFromContent, - detectMimeTypeFromData, - convertDocumentDataToString -) - -logger = logging.getLogger(__name__) - -class DocumentGenerator: - def __init__(self, service): - self.service = service - - def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]: - """ - Process documents produced by AI actions and convert them to ChatDocument format. - This function handles AI-generated document data, not document references. - Returns a list of processed document dictionaries. - """ - try: - # Read documents from the standard documents field (not data.documents) - documents = action_result.documents if action_result and hasattr(action_result, 'documents') else [] - - if not documents: - logger.info(f"No documents found in action_result.documents for {action.execMethod}.{action.execAction}") - return [] - - logger.info(f"Processing {len(documents)} documents from action_result.documents") - - # Process each document from the AI action result - processed_documents = [] - for doc in documents: - processed_doc = self.processSingleDocument(doc, action) - if processed_doc: - processed_documents.append(processed_doc) - - logger.info(f"Successfully processed {len(processed_documents)} documents") - return processed_documents - except Exception as e: - logger.error(f"Error processing action result documents: {str(e)}") - return [] - - def processSingleDocument(self, doc: Any, action) -> Optional[Dict[str, Any]]: - """Process a single document from action result with simplified logic""" - try: - # ActionDocument objects have documentName, documentData, and mimeType - mime_type = doc.mimeType - if mime_type == "application/octet-stream": - content = doc.documentData - mime_type = detectMimeTypeFromContent(content, doc.documentName, self.service) - - return { - 'fileName': doc.documentName, - 'fileSize': len(str(doc.documentData)), - 'mimeType': mime_type, - 'content': doc.documentData, - 'document': doc - } - except Exception as e: - logger.error(f"Error processing single document: {str(e)}") - return None - - def createDocumentsFromActionResult(self, action_result, action, workflow, message_id=None) -> List[Any]: - """ - Create actual document objects from action result and store them in the system. - Returns a list of created document objects with proper workflow context. - """ - try: - logger.info(f"Creating documents from action result for {action.execMethod}.{action.execAction}") - logger.info(f"Action result documents count: {len(action_result.documents) if action_result.documents else 0}") - - processed_docs = self.processActionResultDocuments(action_result, action, workflow) - logger.info(f"Processed {len(processed_docs)} documents") - - created_documents = [] - for i, doc_data in enumerate(processed_docs): - try: - document_name = doc_data['fileName'] - document_data = doc_data['content'] - mime_type = doc_data['mimeType'] - - logger.info(f"Creating document {i+1}: {document_name} (mime: {mime_type}, content length: {len(str(document_data))})") - - # Convert document data to string content - content = convertDocumentDataToString(document_data, getFileExtension(document_name)) - - # Skip empty or minimal content - minimal_content_patterns = ['{}', '[]', 'null', '""', "''"] - if not content or content.strip() == "" or content.strip() in minimal_content_patterns: - logger.warning(f"Empty or minimal content for document {document_name}, skipping") - continue - - logger.info(f"Document {document_name} has content: {len(content)} characters") - - # Create document with file in one step - document = self.service.createDocument( - fileName=document_name, - mimeType=mime_type, - content=content, - base64encoded=False, - messageId=message_id - ) - if document: - # Set workflow context on the document if possible - self._setDocumentWorkflowContext(document, action, workflow) - created_documents.append(document) - logger.info(f"Successfully created ChatDocument: {document_name} (ID: {document.id if hasattr(document, 'id') else 'N/A'}, fileId: {document.fileId if hasattr(document, 'fileId') else 'N/A'})") - else: - logger.error(f"Failed to create ChatDocument object for {document_name}") - except Exception as e: - logger.error(f"Error creating document {doc_data.get('fileName', 'unknown')}: {str(e)}") - continue - - logger.info(f"Successfully created {len(created_documents)} documents") - return created_documents - except Exception as e: - logger.error(f"Error creating documents from action result: {str(e)}") - return [] - - def _setDocumentWorkflowContext(self, document, action, workflow): - """Set workflow context on a document for proper routing and labeling""" - try: - # Get current workflow context from service center - workflow_context = self.service.getWorkflowContext() - workflow_stats = self.service.getWorkflowStats() - - current_round = workflow_context.get('currentRound', 0) - current_task = workflow_context.get('currentTask', 0) - current_action = workflow_context.get('currentAction', 0) - - # Try to set workflow context attributes if they exist - if hasattr(document, 'roundNumber'): - document.roundNumber = current_round - if hasattr(document, 'taskNumber'): - document.taskNumber = current_task - if hasattr(document, 'actionNumber'): - document.actionNumber = current_action - if hasattr(document, 'actionId'): - document.actionId = action.id if hasattr(action, 'id') else None - - # Set additional workflow metadata if available - if hasattr(document, 'workflowId'): - document.workflowId = workflow_stats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None) - if hasattr(document, 'workflowStatus'): - document.workflowStatus = workflow_stats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown') - - logger.debug(f"Set workflow context on document: Round {current_round}, Task {current_task}, Action {current_action}") - logger.debug(f"Document workflow metadata: ID={document.workflowId if hasattr(document, 'workflowId') else 'N/A'}, Status={document.workflowStatus if hasattr(document, 'workflowStatus') else 'N/A'}") - - except Exception as e: - logger.warning(f"Could not set workflow context on document: {str(e)}") diff --git a/modules/chat/handling/executionState.py b/modules/chat/handling/executionState.py deleted file mode 100644 index 1f806745..00000000 --- a/modules/chat/handling/executionState.py +++ /dev/null @@ -1,55 +0,0 @@ -# executionState.py -# Contains all execution state management logic extracted from managerChat.py - -import logging -from typing import List -from datetime import datetime, UTC -from modules.interfaces.interfaceChatModel import TaskStep, ActionResult - -logger = logging.getLogger(__name__) - -class TaskExecutionState: - """Manages execution state for a task with retry logic""" - - def __init__(self, task_step: TaskStep): - self.task_step = task_step - self.successful_actions: List[ActionResult] = [] # Preserved across retries - self.failed_actions: List[ActionResult] = [] # For analysis - self.current_action_index = 0 - self.retry_count = 0 - self.max_retries = 3 - - def addSuccessfulAction(self, action_result: ActionResult): - """Add a successful action to the state""" - self.successful_actions.append(action_result) - self.current_action_index += 1 - - def addFailedAction(self, action_result: ActionResult): - """Add a failed action to the state for analysis""" - self.failed_actions.append(action_result) - self.current_action_index += 1 - - def canRetry(self) -> bool: - """Check if task can be retried""" - return self.retry_count < self.max_retries - - def incrementRetryCount(self): - """Increment retry count""" - self.retry_count += 1 - - def getFailurePatterns(self) -> list: - """Analyze failure patterns from failed actions""" - patterns = [] - for action in self.failed_actions: - error = action.error.lower() if action.error else '' - if "timeout" in error: - patterns.append("timeout_issues") - elif "document_not_found" in error or "file not found" in error: - patterns.append("document_reference_issues") - elif "empty_result" in error or "no content" in error: - patterns.append("content_extraction_issues") - elif "invalid_format" in error or "wrong format" in error: - patterns.append("format_issues") - elif "permission" in error or "access denied" in error: - patterns.append("permission_issues") - return list(set(patterns)) \ No newline at end of file diff --git a/modules/chat/handling/handlingTasks.py b/modules/chat/handling/handlingTasks.py deleted file mode 100644 index 49d0b97c..00000000 --- a/modules/chat/handling/handlingTasks.py +++ /dev/null @@ -1,1524 +0,0 @@ -# handlingTasks.py -# Refactored for clarity and consolidation - -import asyncio -import logging -import json -import time -from typing import Dict, Any, Optional, List, Union -from datetime import datetime, UTC -from modules.interfaces.interfaceChatModel import ( - TaskStatus, TaskStep, TaskContext, TaskAction, ReviewResult, TaskPlan, WorkflowResult, TaskResult, ReviewContext, ActionResult -) -from modules.shared.timezoneUtils import get_utc_timestamp -from .executionState import TaskExecutionState -from .promptFactory import ( - createTaskPlanningPrompt, - createActionDefinitionPrompt, - createResultReviewPrompt -) -from modules.chat.documents.documentGeneration import DocumentGenerator -import uuid - -logger = logging.getLogger(__name__) - -class WorkflowStoppedException(Exception): - """Exception raised when a workflow is stopped by the user.""" - pass - -class HandlingTasks: - def __init__(self, chatInterface, service, workflow=None): - self.chatInterface = chatInterface - self.service = service - self.workflow = workflow - self.documentGenerator = DocumentGenerator(service) - - def _checkWorkflowStopped(self): - """ - Check if workflow has been stopped by user and raise exception if so. - This function centralizes all workflow stop checking logic to avoid code duplication. - """ - try: - # Get the current workflow status from the database to avoid stale data - current_workflow = self.chatInterface.getWorkflow(self.service.workflow.id) - if current_workflow and current_workflow.status == "stopped": - logger.info("Workflow stopped by user, aborting execution") - raise WorkflowStoppedException("Workflow was stopped by user") - except WorkflowStoppedException: - # Re-raise the WorkflowStoppedException immediately - raise - except Exception as e: - # If we can't get the current status due to other database issues, fall back to the in-memory object - logger.warning(f"Could not check current workflow status from database: {str(e)}") - if self.service.workflow.status == "stopped": - logger.info("Workflow stopped by user (from in-memory object), aborting execution") - raise WorkflowStoppedException("Workflow was stopped by user") - - async def generateTaskPlan(self, userInput: str, workflow) -> TaskPlan: - """Generate a high-level task plan for the workflow.""" - try: - # Check workflow status before generating task plan - self._checkWorkflowStopped() - - logger.info(f"=== STARTING TASK PLAN GENERATION ===") - logger.info(f"Workflow ID: {workflow.id}") - logger.info(f"User Input: {userInput}") - available_docs = self.service.getAvailableDocuments(workflow) - - # Check workflow status before calling AI service - self._checkWorkflowStopped() - - # Create proper context object for task planning - # For task planning, we need to create a minimal TaskStep since TaskContext requires it - from modules.interfaces.interfaceChatModel import TaskStep - planning_task_step = TaskStep( - id="planning", - objective=userInput, - dependencies=[], - success_criteria=[], - estimated_complexity="medium" - ) - - task_planning_context = TaskContext( - task_step=planning_task_step, - workflow=workflow, - workflow_id=workflow.id, - available_documents=available_docs, - available_connections=[], - previous_results=[], - previous_handover=None, - improvements=[], - retry_count=0, - previous_action_results=[], - previous_review_result=None, - is_regeneration=False, - failure_patterns=[], - failed_actions=[], - successful_actions=[], - criteria_progress={ - 'met_criteria': set(), - 'unmet_criteria': set(), - 'attempt_history': [] - } - ) - - # Generate the task planning prompt - task_planning_prompt = createTaskPlanningPrompt(task_planning_context, self.service) - - # Log the full task planning prompt being sent to AI for debugging - logger.info("=== TASK PLANNING PROMPT SENT TO AI ===") - logger.info(f"User Input: {userInput}") - logger.info(f"Available Documents: {available_docs}") - - prompt = await self.service.callAiTextAdvanced(task_planning_prompt) - - # Check if AI response is valid - if not prompt: - raise ValueError("AI service returned no response for task planning") - - # Log the full AI response for task planning - logger.info("=== TASK PLANNING AI RESPONSE RECEIVED ===") - logger.info(f"Response length: {len(prompt) if prompt else 0}") - logger.debug("=== FULL TASK PLANNING AI RESPONSE ===") - logger.debug(prompt) - logger.debug("=== END TASK PLANNING AI RESPONSE ===") - - # Inline _parseTaskPlanResponse logic - try: - json_start = prompt.find('{') - json_end = prompt.rfind('}') + 1 - if json_start == -1 or json_end == 0: - raise ValueError("No JSON found in response") - json_str = prompt[json_start:json_end] - task_plan_dict = json.loads(json_str) - - if 'tasks' not in task_plan_dict: - raise ValueError("Task plan missing 'tasks' field") - except Exception as e: - logger.error(f"Error parsing task plan response: {str(e)}") - task_plan_dict = {'tasks': []} - - if not self._validateTaskPlan(task_plan_dict): - logger.error("Generated task plan failed validation") - logger.error(f"AI Response: {prompt}") - logger.error(f"Parsed Task Plan: {json.dumps(task_plan_dict, indent=2)}") - raise Exception("AI-generated task plan failed validation - AI is required for task planning") - - if not task_plan_dict.get('tasks'): - raise ValueError("Task plan contains no tasks") - - # LANGUAGE DETECTION: Determine user language once for the entire workflow - # Priority: 1. languageUserDetected from AI response, 2. service.user.language, 3. "en" - detected_language = task_plan_dict.get('languageUserDetected', '').strip() - service_user_language = getattr(self.service.user, 'language', '') if self.service and self.service.user else '' - - if detected_language and len(detected_language) == 2: # Valid language code like "en", "de", "fr" - user_language = detected_language - logger.info(f"Using detected language from AI response: {user_language}") - elif service_user_language and len(service_user_language) == 2: - user_language = service_user_language - logger.info(f"Using language from service user object: {user_language}") - else: - user_language = "en" - logger.info(f"Using default language: {user_language}") - - # Set the detected language in the service for use throughout the workflow - if self.service and self.service.user: - self.service.user.language = user_language - logger.info(f"Set workflow user language to: {user_language}") - - tasks = [] - for i, task_dict in enumerate(task_plan_dict.get('tasks', [])): - if not isinstance(task_dict, dict): - logger.warning(f"Skipping invalid task {i+1}: not a dictionary") - continue - - # Map old 'description' field to new 'objective' field - if 'description' in task_dict and 'objective' not in task_dict: - task_dict['objective'] = task_dict.pop('description') - - try: - task = TaskStep(**task_dict) - tasks.append(task) - except Exception as e: - logger.warning(f"Skipping invalid task {i+1}: {str(e)}") - continue - - if not tasks: - raise ValueError("No valid tasks could be created from AI response") - - task_plan = TaskPlan( - overview=task_plan_dict.get('overview', ''), - tasks=tasks, - userMessage=task_plan_dict.get('userMessage', '') - ) - - # Set workflow totals for progress tracking - total_tasks = len(tasks) - if total_tasks == 0: - raise ValueError("Task plan contains no valid tasks") - - self.setWorkflowTotals(total_tasks=total_tasks) - - logger.info(f"Task plan generated successfully with {len(tasks)} tasks") - logger.info(f"Workflow user language set to: {user_language}") - - # PHASE 3: Create chat message containing the task plan - await self.createTaskPlanMessage(task_plan, workflow) - - return task_plan - except Exception as e: - logger.error(f"Error in generateTaskPlan: {str(e)}") - raise - - async def createTaskPlanMessage(self, task_plan: TaskPlan, workflow): - """Create a chat message containing the task plan with user-friendly messages""" - try: - # Build task plan summary - task_summary = f"📋 **Task Plan**\n\n" - - # Get overall user message from task plan if available - overall_message = task_plan.userMessage - if overall_message: - task_summary += f"{overall_message}\n\n" - - # Add each task with its user message - for i, task in enumerate(task_plan.tasks): - if task.userMessage: - task_summary += f"💬 {task.userMessage}\n" - task_summary += "\n" - - - # Create workflow message - message_data = { - "workflowId": workflow.id, - "role": "assistant", - "message": task_summary, - "status": "step", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": get_utc_timestamp(), - "documentsLabel": "task_plan", - "documents": [], - # Add workflow context fields - use current workflow round instead of hardcoded 1 - "roundNumber": workflow.currentRound, # Use current workflow round - "taskNumber": 1, # Task plan is before individual tasks; to keep 1, that UI not filtering the message - "actionNumber": 0, - # Add task progress status - "taskProgress": "pending" - } - - message = self.chatInterface.createMessage(message_data) - if message: - workflow.messages.append(message) - - # PHASE 4: Update workflow object after task plan created - # Set currentTask=1, currentAction=0, totalTasks=len(task_plan.tasks), totalActions=0 - self.updateWorkflowAfterTaskPlanCreated(len(task_plan.tasks)) - - logger.info(f"Task plan message created with {len(task_plan.tasks)} tasks") - else: - logger.error("Failed to create task plan message") - - except Exception as e: - logger.error(f"Error creating task plan message: {str(e)}") - - async def generateTaskActions(self, task_step, workflow, previous_results=None, enhanced_context=None) -> List[TaskAction]: - """Generate actions for a given task step.""" - try: - # Check workflow status before generating actions - self._checkWorkflowStopped() - - retry_info = f" (Retry #{enhanced_context.retry_count})" if enhanced_context and enhanced_context.retry_count > 0 else "" - logger.info(f"Generating actions for task: {task_step.objective}{retry_info}") - - # Log criteria progress if this is a retry - if enhanced_context and hasattr(enhanced_context, 'criteria_progress') and enhanced_context.criteria_progress is not None: - progress = enhanced_context.criteria_progress - logger.info(f"Retry attempt {enhanced_context.retry_count} - Criteria progress:") - if progress.get('met_criteria'): - logger.info(f" Met criteria: {', '.join(progress['met_criteria'])}") - if progress.get('unmet_criteria'): - logger.warning(f" Unmet criteria: {', '.join(progress['unmet_criteria'])}") - - # Show improvement trends - if progress.get('attempt_history'): - recent_attempts = progress['attempt_history'][-2:] # Last 2 attempts - if len(recent_attempts) >= 2: - prev_score = recent_attempts[0].get('quality_score', 0) - curr_score = recent_attempts[1].get('quality_score', 0) - if curr_score > prev_score: - logger.info(f" Quality improving: {prev_score} -> {curr_score}") - elif curr_score < prev_score: - logger.warning(f" Quality declining: {prev_score} -> {curr_score}") - else: - logger.info(f" Quality stable: {curr_score}") - - # Enhanced retry context logging - if enhanced_context and enhanced_context.retry_count > 0: - logger.info("=== RETRY CONTEXT FOR ACTION GENERATION ===") - logger.info(f"Retry Count: {enhanced_context.retry_count}") - logger.info(f"Previous Improvements: {enhanced_context.improvements}") - logger.info(f"Previous Review Result: {enhanced_context.previous_review_result}") - logger.info(f"Failure Patterns: {enhanced_context.failure_patterns}") - logger.info(f"Failed Actions: {enhanced_context.failed_actions}") - logger.info(f"Successful Actions: {enhanced_context.successful_actions}") - logger.info("=== END RETRY CONTEXT ===") - - available_docs = self.service.getAvailableDocuments(workflow) - available_connections = self.service.getConnectionReferenceList() - - # Log available resources for debugging - logger.info("=== AVAILABLE RESOURCES FOR ACTION GENERATION ===") - logger.info(f"Available Documents: {available_docs}") - # Note: available_docs is now a string description, not a list - logger.info(f"Available Connections: {len(available_connections) if available_connections else 0}") - if available_connections: - for i, conn in enumerate(available_connections[:5]): # Show first 5 - logger.info(f" Conn {i+1}: {conn}") - if len(available_connections) > 5: - logger.info(f" ... and {len(available_connections) - 5} more connections") - logger.info("=== END AVAILABLE RESOURCES ===") - - # Create proper context object for action definition - if enhanced_context and isinstance(enhanced_context, TaskContext): - # Use existing TaskContext if provided - action_context = TaskContext( - task_step=enhanced_context.task_step, - workflow=enhanced_context.workflow, - workflow_id=enhanced_context.workflow_id, - available_documents=enhanced_context.available_documents or available_docs, - available_connections=enhanced_context.available_connections or available_connections, - previous_results=enhanced_context.previous_results or previous_results or [], - previous_handover=enhanced_context.previous_handover, - improvements=enhanced_context.improvements or [], - retry_count=enhanced_context.retry_count or 0, - previous_action_results=enhanced_context.previous_action_results or [], - previous_review_result=enhanced_context.previous_review_result, - is_regeneration=enhanced_context.is_regeneration or False, - failure_patterns=enhanced_context.failure_patterns or [], - failed_actions=enhanced_context.failed_actions or [], - successful_actions=enhanced_context.successful_actions or [], - criteria_progress=enhanced_context.criteria_progress - ) - else: - # Create new context from scratch - action_context = TaskContext( - task_step=task_step, - workflow=workflow, - workflow_id=workflow.id, - available_documents=available_docs, - available_connections=available_connections, - previous_results=previous_results or [], - previous_handover=None, - improvements=[], - retry_count=0, - previous_action_results=[], - previous_review_result=None, - is_regeneration=False, - failure_patterns=[], - failed_actions=[], - successful_actions=[], - criteria_progress=None - ) - - # Check workflow status before calling AI service - self._checkWorkflowStopped() - - # Log the final action context being sent to AI - logger.info("=== FINAL ACTION CONTEXT FOR AI ===") - logger.info(f"Task Step ID: {action_context.task_step.id if action_context.task_step else 'None'}") - logger.info(f"Task Step Objective: {action_context.task_step.objective if action_context.task_step else 'None'}") - logger.info(f"Workflow ID: {action_context.workflow_id}") - logger.info(f"Available Documents: {action_context.available_documents or 'No documents available'}") - logger.info(f"Available Connections Count: {len(action_context.available_connections) if action_context.available_connections else 0}") - logger.info(f"Previous Results Count: {len(action_context.previous_results) if action_context.previous_results else 0}") - logger.info(f"Retry Count: {action_context.retry_count}") - logger.info(f"Is Regeneration: {action_context.is_regeneration}") - logger.info("=== END ACTION CONTEXT ===") - - # Generate the action definition prompt - action_prompt = await createActionDefinitionPrompt(action_context, self.service) - prompt = await self.service.callAiTextAdvanced(action_prompt) - - # Check if AI response is valid - if not prompt: - raise ValueError("AI service returned no response") - - # Log the full AI response for debugging - logger.debug("=== FULL AI RESPONSE ===") - logger.debug(prompt) - logger.debug("=== END AI RESPONSE ===") - - # Inline parseActionResponse logic here - json_start = prompt.find('{') - json_end = prompt.rfind('}') + 1 - if json_start == -1 or json_end == 0: - raise ValueError("No JSON found in response") - json_str = prompt[json_start:json_end] - - try: - action_data = json.loads(json_str) - except Exception as e: - logger.error(f"Error parsing action response JSON: {str(e)}") - action_data = {} - - if 'actions' not in action_data: - raise ValueError("Action response missing 'actions' field") - - actions = action_data['actions'] - if not actions: - raise ValueError("Action response contains empty actions list") - - if not isinstance(actions, list): - raise ValueError(f"Action response 'actions' field is not a list: {type(actions)}") - - if not self._validateActions(actions, action_context): - logger.error("Generated actions failed validation") - raise Exception("AI-generated actions failed validation - AI is required for action generation") - - # Convert to TaskAction objects - task_actions = [] - for i, a in enumerate(actions): - if not isinstance(a, dict): - logger.warning(f"Skipping invalid action {i+1}: not a dictionary") - continue - - task_action = self.createTaskAction({ - "execMethod": a.get('method', 'unknown'), - "execAction": a.get('action', 'unknown'), - "execParameters": a.get('parameters', {}), - "execResultLabel": a.get('resultLabel', ''), - "expectedDocumentFormats": a.get('expectedDocumentFormats', None), - "status": TaskStatus.PENDING, - # Extract user-friendly message if available - "userMessage": a.get('userMessage', None) - }) - - if task_action: - task_actions.append(task_action) - else: - logger.warning(f"Skipping invalid action {i+1}: failed to create TaskAction") - - valid_actions = [ta for ta in task_actions if ta] - - if not valid_actions: - raise ValueError("No valid actions could be created from AI response") - - return valid_actions - except Exception as e: - logger.error(f"Error in generateTaskActions: {str(e)}") - return [] - - async def executeTask(self, task_step, workflow, context, task_index=None, total_tasks=None) -> TaskResult: - """Execute all actions for a task step, with state management and retries.""" - logger.info(f"=== STARTING TASK {task_index or '?'}: {task_step.objective} ===") - - # PHASE 4: Update workflow object before executing task - # Set currentTask=task_number, currentAction=0, totalActions=0 - if task_index is not None: - self.updateWorkflowBeforeExecutingTask(task_index) - - # Update workflow context for this task - if task_index is not None: - self.service.setWorkflowContext(task_number=task_index) - # Remove the increment call that causes double-increment bug - - # Create database log entry for task start in format expected by frontend - if task_index is not None: - - # Create a task start message for the user - task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index) - task_start_message = { - "workflowId": workflow.id, - "role": "assistant", - "message": f"🚀 **Task {task_progress}**", - "status": "step", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": get_utc_timestamp(), - "documentsLabel": f"task_{task_index}_start", - "documents": [], - # Add workflow context fields - "roundNumber": workflow.currentRound, # Use current workflow round - "taskNumber": task_index, - "actionNumber": 0, - # Add task progress status - "taskProgress": "running" - } - - # Add user-friendly message if available - if task_step.userMessage: - task_start_message["message"] += f"\n\n💬 {task_step.userMessage}" - - message = self.chatInterface.createMessage(task_start_message) - if message: - workflow.messages.append(message) - logger.info(f"Task start message created for task {task_index}") - - state = TaskExecutionState(task_step) - retry_context = context - max_retries = state.max_retries - for attempt in range(max_retries): - logger.info(f"Task execution attempt {attempt+1}/{max_retries}") - - # Check workflow status before starting task execution - self._checkWorkflowStopped() - - # Update retry context with current attempt information - if retry_context: - retry_context.retry_count = attempt + 1 - - actions = await self.generateTaskActions(task_step, workflow, previous_results=retry_context.previous_results, enhanced_context=retry_context) - - # Log total actions count for this task - total_actions = len(actions) if actions else 0 - logger.info(f"Task {task_index or '?'} has {total_actions} actions") - - # PHASE 4: Update workflow object after action planning - # Set totalActions=extracted_total_actions for THIS task - self.updateWorkflowAfterActionPlanning(total_actions) - - # Set workflow action total for this task (0 if no actions generated) - self.setWorkflowTotals(total_actions=total_actions) - - if not actions: - logger.error("No actions defined for task step, aborting task execution") - break - - action_results = [] - for action_idx, action in enumerate(actions): - # Check workflow status before each action execution - self._checkWorkflowStopped() - - # PHASE 4: Update workflow object before executing action - # Set currentAction=action_number - action_number = action_idx + 1 - self.updateWorkflowBeforeExecutingAction(action_number) - - # Update workflow context for this action - self.service.setWorkflowContext(action_number=action_number) - # Remove the increment call that causes double-increment bug - - # Log action start in format expected by frontend - logger.info(f"Task {task_index} - Starting action {action_number}/{total_actions}") - - # Create an action start message for the user - action_start_message = { - "workflowId": workflow.id, - "role": "assistant", - "message": f"⚡ **Action {action_number}/{total_actions}** (Method {action.execMethod}.{action.execAction})", - "status": "step", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": get_utc_timestamp(), - "documentsLabel": f"action_{action_number}_start", - "documents": [], - # Add action progress status - "actionProgress": "running" - } - - # Add user-friendly message if available - if action.userMessage: - action_start_message["message"] += f"\n\n💬 {action.userMessage}" - - # Add workflow context fields - use current workflow round instead of hardcoded 1 - action_start_message.update({ - "roundNumber": workflow.currentRound, # Use current workflow round - "taskNumber": task_index, - "actionNumber": action_number - }) - - message = self.chatInterface.createMessage(action_start_message) - if message: - workflow.messages.append(message) - logger.info(f"Action start message created for action {action_number}") - - # Pass action index to executeSingleAction with task context - result = await self.executeSingleAction(action, workflow, task_step, task_index, action_number, total_actions) - action_results.append(result) - if result.success: - state.addSuccessfulAction(result) - else: - state.addFailedAction(result) - - # Check workflow status before review - self._checkWorkflowStopped() - - review_result = await self.reviewTaskCompletion(task_step, actions, action_results, workflow) - success = review_result.status == 'success' - feedback = review_result.reason - error = None if success else review_result.reason - if success: - logger.info(f"=== TASK {task_index or '?'} COMPLETED SUCCESSFULLY: {task_step.objective} ===") - - # Create a task completion message for the user - task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index) - - # Enhanced completion message with criteria details - completion_message = f"🎯 **Task {task_progress}**\n\n✅ {feedback or 'Task completed successfully'}" - - # Add criteria status if available - if hasattr(review_result, 'met_criteria') and review_result.met_criteria: - for criterion in review_result.met_criteria: - completion_message += f"\n• {criterion}" - - if hasattr(review_result, 'quality_score'): - completion_message += f"\n📊 Score {review_result.quality_score}/10" - - task_completion_message = { - "workflowId": workflow.id, - "role": "assistant", - "message": completion_message, - "status": "step", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": get_utc_timestamp(), - "documentsLabel": f"task_{task_index}_completion", - "documents": [], - # Add workflow context fields - "roundNumber": workflow.currentRound, # Use current workflow round - "taskNumber": task_index, - "actionNumber": 0, - # Add task progress status - "taskProgress": "success" - } - - message = self.chatInterface.createMessage(task_completion_message) - if message: - workflow.messages.append(message) - logger.info(f"Task completion message created for task {task_index}") - - return TaskResult( - taskId=task_step.id, - status=TaskStatus.COMPLETED, - success=True, - feedback=feedback, - error=None - ) - - elif review_result.status == 'retry' and state.canRetry(): - logger.warning(f"Task step '{task_step.objective}' requires retry: {review_result.improvements}") - - # Enhanced logging of criteria status - if review_result.met_criteria: - logger.info(f"Met criteria: {', '.join(review_result.met_criteria)}") - if review_result.unmet_criteria: - logger.warning(f"Unmet criteria: {', '.join(review_result.unmet_criteria)}") - - state.incrementRetryCount() - - # Update retry context with retry information and criteria tracking - if retry_context: - retry_context.retry_count = state.retry_count - retry_context.improvements = review_result.improvements - retry_context.previous_action_results = action_results - retry_context.previous_review_result = review_result - retry_context.is_regeneration = True - retry_context.failure_patterns = state.getFailurePatterns() - retry_context.failed_actions = state.failed_actions - retry_context.successful_actions = state.successful_actions - - # Track criteria progress across retries - if not hasattr(retry_context, 'criteria_progress'): - retry_context.criteria_progress = { - 'met_criteria': set(), - 'unmet_criteria': set(), - 'attempt_history': [] - } - - # Update criteria progress - convert lists to sets for deduplication - if review_result.met_criteria: - retry_context.criteria_progress['met_criteria'].update(review_result.met_criteria) - if review_result.unmet_criteria: - retry_context.criteria_progress['unmet_criteria'].update(review_result.unmet_criteria) - - # Record this attempt's criteria status - attempt_record = { - 'attempt': state.retry_count, - 'met_criteria': review_result.met_criteria or [], - 'unmet_criteria': review_result.unmet_criteria or [], - 'quality_score': review_result.quality_score, - 'improvements': review_result.improvements or [] - } - retry_context.criteria_progress['attempt_history'].append(attempt_record) - - logger.info(f"Criteria progress after {state.retry_count} attempts:") - logger.info(f" Total met: {len(retry_context.criteria_progress['met_criteria'])}") - logger.info(f" Total unmet: {len(retry_context.criteria_progress['unmet_criteria'])}") - if retry_context.criteria_progress['met_criteria']: - logger.info(f" Met criteria: {', '.join(retry_context.criteria_progress['met_criteria'])}") - if retry_context.criteria_progress['unmet_criteria']: - logger.info(f" Unmet criteria: {', '.join(retry_context.criteria_progress['unmet_criteria'])}") - - # Log retry summary for debugging - logger.info(f"=== RETRY #{state.retry_count} SUMMARY ===") - logger.info(f"Task: {task_step.objective}") - logger.info(f"Quality Score: {review_result.quality_score}/10") - logger.info(f"Status: {review_result.status}") - logger.info(f"Improvements Needed: {review_result.improvements}") - logger.info(f"Reason: {review_result.reason}") - logger.info("=== END RETRY SUMMARY ===") - - # Create retry message for user - retry_message = { - "workflowId": workflow.id, - "role": "assistant", - "message": f"🔄 **Task {task_index}** needs retry: {review_result.improvements}", - "status": "step", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": get_utc_timestamp(), - "documentsLabel": f"task_{task_index}_retry", - "documents": [], - "roundNumber": workflow.currentRound, - "taskNumber": task_index, - "actionNumber": 0, - "taskProgress": "retry" - } - - message = self.chatInterface.createMessage(retry_message) - if message: - workflow.messages.append(message) - - continue - else: - logger.error(f"=== TASK {task_index or '?'} FAILED: {task_step.objective} after {attempt+1} attempts ===") - task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index) - - # Create user-facing error message for task failure - error_message = f"**Task {task_progress}**\n\n❌ '{task_step.objective}' {attempt+1}x failed\n\n" - - # Add specific error details if available - if review_result and hasattr(review_result, 'reason') and review_result.reason: - error_message += f"{review_result.reason}\n\n" - - # Add criteria progress information if available - if retry_context and hasattr(retry_context, 'criteria_progress'): - progress = retry_context.criteria_progress - error_message += f"📊 **Details**\n" - if progress.get('met_criteria'): - error_message += f"✅ Met criteria: {', '.join(progress['met_criteria'])}\n" - if progress.get('unmet_criteria'): - error_message += f"❌ Unmet criteria: {', '.join(progress['unmet_criteria'])}\n" - error_message += "\n" - - # Add retry information - error_message += f"Attempts: {attempt+1}\n" - error_message += f"Status: Will retry automatically\n\n" - error_message += "The system will attempt to retry this task. Please wait..." - - # Create workflow message for user - message_data = { - "workflowId": workflow.id, - "role": "assistant", - "message": error_message, - "status": "step", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": get_utc_timestamp(), - "actionId": None, - "actionMethod": "task", - "actionName": "task_retry", - "documentsLabel": None, - "documents": [], - # Add workflow context fields - "roundNumber": workflow.currentRound, # Use current workflow round - "taskNumber": task_index, - "actionNumber": 0, - # Add task progress status - "taskProgress": "retry" - } - - try: - message = self.chatInterface.createMessage(message_data) - if message: - workflow.messages.append(message) - logger.info(f"Created user-facing retry message for failed task: {task_step.objective}") - else: - logger.error(f"Failed to create user-facing retry message for failed task: {task_step.objective}") - except Exception as e: - logger.error(f"Error creating user-facing retry message: {str(e)}") - - return TaskResult( - taskId=task_step.id, - status=TaskStatus.FAILED, - success=False, - feedback=feedback, - error=review_result.reason if review_result and hasattr(review_result, 'reason') else "Task failed after retry attempts" - ) - logger.error(f"=== TASK {task_index or '?'} FAILED AFTER ALL RETRIES: {task_step.objective} ===") - - # Create user-facing error message for task failure - error_message = f"**Task {task_index or '?'}**\n\n❌ '{task_step.objective}' failed after all retries\n\n" - error_message += f"{task_step.objective}\n\n" - - # Add specific error details if available - if retry_context and hasattr(retry_context, 'previous_review_result') and retry_context.previous_review_result: - reason = retry_context.previous_review_result.reason or '' - if reason and reason != "Task failed after all retries.": - error_message += f"{reason}\n\n" - - # Add retry information - error_message += f"Retries attempted: {retry_context.retry_count if retry_context else 'Unknown'}\n" - error_message += f"Status: Task failed permanently" - - # Create workflow message for user - message_data = { - "workflowId": workflow.id, - "role": "assistant", - "message": error_message, - "status": "step", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": get_utc_timestamp(), - "actionId": None, - "actionMethod": "task", - "actionName": "task_failure", - "documentsLabel": None, - "documents": [], - # Add workflow context fields - "roundNumber": workflow.currentRound, # Use current workflow round - "taskNumber": task_index, - "actionNumber": 0, - # NEW: Add task progress status - "taskProgress": "fail" - } - - try: - message = self.chatInterface.createMessage(message_data) - if message: - workflow.messages.append(message) - logger.info(f"Created user-facing error message for failed task: {task_step.objective}") - else: - logger.error(f"Failed to create user-facing error message for failed task: {task_step.objective}") - except Exception as e: - logger.error(f"Error creating user-facing error message: {str(e)}") - - return TaskResult( - taskId=task_step.id, - status=TaskStatus.FAILED, - success=False, - feedback="Task failed after all retries.", - error="Task failed after all retries." - ) - - async def reviewTaskCompletion(self, task_step, task_actions, action_results, workflow): - try: - # Check workflow status before reviewing task completion - self._checkWorkflowStopped() - - logger.info(f"=== STARTING TASK COMPLETION REVIEW ===") - logger.info(f"Task: {task_step.objective}") - logger.info(f"Actions executed: {len(task_actions) if task_actions else 0}") - logger.info(f"Action results: {len(action_results) if action_results else 0}") - - # Create proper context object for result review - review_context = ReviewContext( - task_step=task_step, - task_actions=task_actions, - action_results=action_results, - step_result={ - 'successful_actions': sum(1 for result in action_results if result.success), - 'total_actions': len(action_results), - 'results': [self._extractResultText(result) for result in action_results if result.success], - 'errors': [result.error for result in action_results if not result.success], - 'documents': [ - { - 'action_index': i, - 'documents_count': len(result.documents) if result.documents else 0, - 'documents': result.documents if result.documents else [] - } - for i, result in enumerate(action_results) - ] - }, - workflow_id=workflow.id, - previous_results=[] - ) - - # Check workflow status before calling AI service - self._checkWorkflowStopped() - - # Use promptFactory for review prompt - prompt = createResultReviewPrompt(review_context, self.service) - - # Log the full result review prompt being sent to AI for debugging - logger.info("=== RESULT REVIEW PROMPT SENT TO AI ===") - logger.info(f"Task: {task_step.objective}") - logger.info(f"Action Results Count: {len(review_context.action_results) if review_context.action_results else 0}") - logger.info(f"Task Actions Count: {len(review_context.task_actions) if review_context.task_actions else 0}") - logger.info("=== FULL RESULT REVIEW PROMPT ===") - logger.info(prompt) - logger.info("=== END RESULT REVIEW PROMPT ===") - - response = await self.service.callAiTextAdvanced(prompt) - - # Log the full AI response for result review - logger.info("=== RESULT REVIEW AI RESPONSE RECEIVED ===") - logger.info(f"Response length: {len(response) if response else 0}") - logger.debug("=== FULL RESULT REVIEW AI RESPONSE ===") - logger.debug(response) - logger.debug("=== END RESULT REVIEW AI RESPONSE ===") - - # Inline parseReviewResponse logic here - json_start = response.find('{') - json_end = response.rfind('}') + 1 - if json_start == -1 or json_end == 0: - raise ValueError("No JSON found in review response") - json_str = response[json_start:json_end] - - try: - review = json.loads(json_str) - except Exception as e: - logger.error(f"Error parsing review response JSON: {str(e)}") - review = {} - if 'status' not in review: - raise ValueError("Review response missing 'status' field") - review.setdefault('status', 'unknown') - review.setdefault('reason', 'No reason provided') - review.setdefault('quality_score', 5) - - # Ensure improvements is a list - improvements = review.get('improvements', []) - if isinstance(improvements, str): - # Split string into list if it's a single improvement - improvements = [improvements.strip()] if improvements.strip() else [] - elif not isinstance(improvements, list): - improvements = [] - - # Ensure all list fields are properly typed - met_criteria = review.get('met_criteria', []) - if not isinstance(met_criteria, list): - met_criteria = [] - - unmet_criteria = review.get('unmet_criteria', []) - if not isinstance(unmet_criteria, list): - unmet_criteria = [] - - review_result = ReviewResult( - status=review.get('status', 'unknown'), - reason=review.get('reason', 'No reason provided'), - improvements=improvements, - quality_score=review.get('quality_score', 5), - missing_outputs=[], - met_criteria=met_criteria, - unmet_criteria=unmet_criteria, - confidence=review.get('confidence', 0.5), - # Extract user-friendly message if available - userMessage=review.get('userMessage', None) - ) - - # Enhanced validation logging - logger.info(f"VALIDATION RESULT - Task: '{task_step.objective}' - Status: {review_result.status.upper()}, Quality: {review_result.quality_score}/10") - if review_result.status == 'success': - logger.info(f"VALIDATION SUCCESS - Task completed successfully") - if review_result.met_criteria: - logger.info(f"Met criteria: {', '.join(review_result.met_criteria)}") - elif review_result.status == 'retry': - logger.warning(f"VALIDATION RETRY - Task requires retry: {review_result.improvements}") - if review_result.unmet_criteria: - logger.warning(f"Unmet criteria: {', '.join(review_result.unmet_criteria)}") - else: - logger.error(f"VALIDATION FAILED - Task failed: {review_result.reason}") - - logger.info(f"=== TASK COMPLETION REVIEW FINISHED ===") - logger.info(f"Final Status: {review_result.status}") - logger.info(f"Quality Score: {review_result.quality_score}/10") - logger.info(f"Improvements: {review_result.improvements}") - logger.info("=== END REVIEW ===") - - return review_result - except Exception as e: - logger.error(f"Error in reviewTaskCompletion: {str(e)}") - return ReviewResult( - status='failed', - reason=str(e), - quality_score=0 - ) - - async def prepareTaskHandover(self, task_step, task_actions, task_result, workflow): - try: - # Check workflow status before preparing task handover - self._checkWorkflowStopped() - - # Log handover status summary - status = task_result.status if task_result else 'unknown' - - # Handle both TaskResult and ReviewResult objects - if hasattr(task_result, 'met_criteria'): - # This is a ReviewResult object - met = task_result.met_criteria if task_result.met_criteria else [] - review_result = task_result.to_dict() - else: - # This is a TaskResult object - met = [] - review_result = { - 'status': task_result.status if task_result else 'unknown', - 'reason': task_result.error if task_result and hasattr(task_result, 'error') else None, - 'success': task_result.success if task_result else False - } - - handover_data = { - 'task_id': task_step.id, - 'task_description': task_step.objective, - 'actions': [action.to_dict() for action in task_actions], - 'review_result': review_result, - 'workflow_id': workflow.id, - 'handover_time': get_utc_timestamp() - } - logger.info(f"Prepared handover for task {task_step.id} in workflow {workflow.id}") - return handover_data - except Exception as e: - logger.error(f"Error in prepareTaskHandover: {str(e)}") - return {'error': str(e)} - - def createTaskAction(self, actionData: Dict[str, Any]) -> 'TaskAction': - """Creates a new task action.""" - try: - # Ensure ID is present - if "id" not in actionData or not actionData["id"]: - actionData["id"] = f"action_{uuid.uuid4()}" - - # Ensure required fields - if "status" not in actionData: - actionData["status"] = TaskStatus.PENDING - - if "execMethod" not in actionData: - logger.error("execMethod is required for task action") - return None - - if "execAction" not in actionData: - logger.error("execAction is required for task action") - return None - - if "execParameters" not in actionData: - actionData["execParameters"] = {} - - # Use generic field separation based on TaskAction model - simple_fields, object_fields = self.chatInterface._separate_object_fields(TaskAction, actionData) - - # Create action in database - createdAction = self.chatInterface.db.recordCreate(TaskAction, simple_fields) - - # Convert to TaskAction model - return TaskAction( - id=createdAction["id"], - execMethod=createdAction["execMethod"], - execAction=createdAction["execAction"], - execParameters=createdAction.get("execParameters", {}), - execResultLabel=createdAction.get("execResultLabel"), - expectedDocumentFormats=createdAction.get("expectedDocumentFormats"), - status=createdAction.get("status", TaskStatus.PENDING), - error=createdAction.get("error"), - retryCount=createdAction.get("retryCount", 0), - retryMax=createdAction.get("retryMax", 3), - processingTime=createdAction.get("processingTime"), - timestamp=float(createdAction.get("timestamp", get_utc_timestamp())), - result=createdAction.get("result"), - resultDocuments=createdAction.get("resultDocuments", []), - userMessage=createdAction.get("userMessage") - ) - - except Exception as e: - logger.error(f"Error creating task action: {str(e)}") - return None - - # --- Helper action handling methods --- - - async def executeSingleAction(self, action, workflow, task_step, task_index=None, action_index=None, total_actions=None): - """Execute a single action and return ActionResult with enhanced document processing""" - try: - # Check workflow status before executing action - self._checkWorkflowStopped() - - # Use passed indices or fallback to '?' - task_num = task_index if task_index is not None else '?' - action_num = action_index if action_index is not None else '?' - - logger.info(f"=== TASK {task_num} ACTION {action_num}: {action.execMethod}.{action.execAction} ===") - - # Log input parameters - input_docs = action.execParameters.get('documentList', []) - input_connections = action.execParameters.get('connections', []) - logger.info(f"Input documents: {input_docs} (type: {type(input_docs)})") - if input_connections: - logger.info(f"Input connections: {input_connections}") - - # Log all action parameters for debugging - logger.info(f"All action parameters: {action.execParameters}") - - enhanced_parameters = action.execParameters.copy() - if action.expectedDocumentFormats: - enhanced_parameters['expectedDocumentFormats'] = action.expectedDocumentFormats - logger.info(f"Expected formats: {action.expectedDocumentFormats}") - - # Check workflow status before executing the action - self._checkWorkflowStopped() - - result = await self.service.executeAction( - methodName=action.execMethod, - actionName=action.execAction, - parameters=enhanced_parameters - ) - result_label = action.execResultLabel - - # Process documents from the action result - created_documents = [] - if result.success: - action.setSuccess() - # Extract result text from documents if available, otherwise use empty string - action.result = "" - if result.documents and len(result.documents) > 0: - # Try to get text content from the first document - first_doc = result.documents[0] - if isinstance(first_doc.documentData, dict): - action.result = first_doc.documentData.get("result", "") - elif isinstance(first_doc.documentData, str): - action.result = first_doc.documentData - # Preserve the action's execResultLabel for document routing - # Action methods should NOT return resultLabel - this is managed by the action handler - if not action.execResultLabel: - logger.warning(f"Action {action.execMethod}.{action.execAction} has no execResultLabel set") - # Always use the action's execResultLabel for message creation to ensure proper document routing - message_result_label = action.execResultLabel - - # Create message first to get messageId, then create documents with messageId - message = await self.createActionMessage(action, result, workflow, message_result_label, [], task_step, task_index) - if message: - # Now create documents with the messageId - created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow, message.id) - # Update the message with the created documents - if created_documents: - message.documents = created_documents - # Update the message in the database - self.chatInterface.updateMessage(message.id, {"documents": [doc.dict() for doc in created_documents]}) - - # Log action results - logger.info(f"Action completed successfully") - - if created_documents: - logger.info(f"Output documents ({len(created_documents)}):") - for i, doc in enumerate(created_documents): - logger.info(f" {i+1}. {doc.fileName}") - - # Log document details for debugging - logger.info("Document details:") - for i, doc in enumerate(created_documents): - logger.info(f" Doc {i+1}: fileName={doc.fileName}, type={type(doc)}") - logger.info(f" ID: {doc.id}") - logger.info(f" File ID: {doc.fileId}") - else: - logger.info("Output: No documents created") - else: - action.setError(result.error or "Action execution failed") - logger.error(f"Action failed: {result.error}") - - # ⚠️ IMPORTANT: Create error message for failed actions so user can see what went wrong - message = await self.createActionMessage(action, result, workflow, result_label, [], task_step, task_index) - - # Create database log entry for action failure - self.chatInterface.createLog({ - "workflowId": workflow.id, - "message": f"❌ **Task {task_num}**\n\n❌ **Action {action_num}/{total_actions}** failed: {result.error}", - "type": "error" - }) - - # Log action summary - logger.info(f"=== TASK {task_num} ACTION {action_num} COMPLETED ===") - - # Preserve the original documents field from the method result - # This ensures the standard document format is maintained - original_documents = result.documents - - # Extract result text from documents if available - result_text = self._extractResultText(result) - - return ActionResult( - success=result.success, - documents=original_documents, # Preserve original documents field from method result - resultLabel=action.execResultLabel, # Always use action's execResultLabel - error=result.error or "" - ) - except Exception as e: - logger.error(f"Error executing single action: {str(e)}") - action.setError(str(e)) - return ActionResult( - success=False, - documents=[], # Empty documents for error case - resultLabel=action.execResultLabel, - error=str(e) - ) - - async def createActionMessage(self, action, result, workflow, result_label=None, created_documents=None, task_step=None, task_index=None): - """Create and store a message for the action result in the workflow with enhanced document processing""" - try: - # Check workflow status before creating action message - self._checkWorkflowStopped() - - if result_label is None: - result_label = action.execResultLabel - - # Log delivered documents - if created_documents: - logger.info(f"Result label: {result_label} - {len(created_documents)} documents") - else: - logger.info(f"Result label: {result_label} - No documents") - - # Get current workflow context and stats - workflow_context = self.service.getWorkflowContext() - workflow_stats = self.service.getWorkflowStats() - - # Create a more meaningful message that includes task context - task_objective = task_step.objective if task_step else 'Unknown task' - - # Add comprehensive workflow context - current_round = workflow_context.get('currentRound', 0) - current_task = workflow_context.get('currentTask', 0) - total_tasks = workflow_stats.get('totalTasks', 0) - current_action = workflow_context.get('currentAction', 0) - total_actions = workflow_stats.get('totalActions', 0) - - # Build a user-friendly message based on success/failure - if result.success: - message_text = f"**Action {current_action}/{total_actions} ({action.execMethod}.{action.execAction})**\n\n" - message_text += f"✅ {task_objective}\n\n" - else: - # ⚠️ FAILURE MESSAGE - Show error details to user - error_details = result.error if result.error else "Unknown error occurred" - message_text = f"**Action {current_action}/{total_actions} ({action.execMethod}.{action.execAction})**\n\n" - message_text += f"❌ {task_objective}\n\n" - message_text += f"{error_details}\n\n" - - message_data = { - "workflowId": workflow.id, - "role": "assistant", - "message": message_text, - "status": "step", - "sequenceNr": len(workflow.messages) + 1, - "publishedAt": get_utc_timestamp(), - "actionId": action.id, - "actionMethod": action.execMethod, - "actionName": action.execAction, - "documentsLabel": result_label, - "documents": created_documents, - # Add workflow context fields - extract from result_label to match document reference - "roundNumber": current_round, - "taskNumber": current_task, - "actionNumber": current_action, - "actionProgress": "success" if result.success else "fail" - } - - # Add debugging for error messages - if not result.success: - logger.info(f"Creating ERROR message: {message_text}") - logger.info(f"Message data: {message_data}") - - message = self.chatInterface.createMessage(message_data) - if message: - workflow.messages.append(message) - logger.info(f"Message created: {action.execMethod}.{action.execAction}") - return message - else: - logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}") - return None - except Exception as e: - logger.error(f"Error creating action message: {str(e)}") - return None - - # --- Helper validation methods --- - - def _validateTaskPlan(self, task_plan: Dict[str, Any]) -> bool: - try: - - - if not isinstance(task_plan, dict): - logger.error("Task plan is not a dictionary") - return False - - if 'tasks' not in task_plan or not isinstance(task_plan['tasks'], list): - logger.error(f"Task plan missing 'tasks' field or not a list. Found: {type(task_plan.get('tasks', 'MISSING'))}") - return False - - # First pass: collect all task IDs to validate dependencies - task_ids = set() - for task in task_plan['tasks']: - if not isinstance(task, dict): - logger.error(f"Task is not a dictionary: {type(task)}") - return False - if 'id' not in task: - logger.error(f"Task missing 'id' field: {task}") - return False - task_ids.add(task['id']) - - # Second pass: validate each task - for i, task in enumerate(task_plan['tasks']): - - - if not isinstance(task, dict): - logger.error(f"Task {i} is not a dictionary: {type(task)}") - return False - - required_fields = ['id', 'objective', 'success_criteria'] - missing_fields = [field for field in required_fields if field not in task] - if missing_fields: - logger.error(f"Task {i} missing required fields: {missing_fields}") - return False - - # Check for duplicate IDs (shouldn't happen after first pass, but safety check) - if task['id'] in task_ids and list(task_plan['tasks']).count(task['id']) > 1: - logger.error(f"Task {i} has duplicate ID: {task['id']}") - return False - - dependencies = task.get('dependencies', []) - if not isinstance(dependencies, list): - logger.error(f"Task {i} dependencies is not a list: {type(dependencies)}") - return False - - for dep in dependencies: - if dep not in task_ids and dep != 'task_0': - logger.error(f"Task {i} has invalid dependency: {dep} (available: {list(task_ids) + ['task_0']})") - return False - - logger.info(f"Task plan validation successful with {len(task_ids)} tasks") - return True - - except Exception as e: - logger.error(f"Error validating task plan: {str(e)}") - return False - - def _extractActionNumberFromLabel(self, label: str) -> int: - """Extract action number from a document label like 'round1_task1_action1_diagram_analysis'""" - try: - if not label or not isinstance(label, str): - return 0 - - # Parse label format: round{round}_task{task}_action{action}_{context} - if '_action' in label: - action_part = label.split('_action')[1] - if action_part and '_' in action_part: - action_number = action_part.split('_')[0] - return int(action_number) - - return 0 - except Exception as e: - logger.warning(f"Could not extract action number from label '{label}': {str(e)}") - return 0 - - def _validateActions(self, actions: List[Dict[str, Any]], context) -> bool: - try: - if not isinstance(actions, list): - logger.error("Actions must be a list") - return False - if len(actions) == 0: - logger.warning("No actions generated") - return False - for i, action in enumerate(actions): - if not isinstance(action, dict): - logger.error(f"Action {i} must be a dictionary") - return False - required_fields = ['method', 'action', 'parameters', 'resultLabel'] - missing_fields = [] - for field in required_fields: - if field not in action or not action[field]: - missing_fields.append(field) - if missing_fields: - logger.error(f"Action {i} missing required fields: {missing_fields}") - return False - result_label = action.get('resultLabel', '') - if not result_label.startswith('round'): - logger.error(f"Action {i} result label must start with 'round': {result_label}") - return False - parameters = action.get('parameters', {}) - if not isinstance(parameters, dict): - logger.error(f"Action {i} parameters must be a dictionary") - return False - logger.info(f"Successfully validated {len(actions)} actions") - return True - except Exception as e: - logger.error(f"Error validating actions: {str(e)}") - return False - - def _extractResultText(self, result: ActionResult) -> str: - """Extract result text from ActionResult documents""" - if not result.success or not result.documents: - return "" - - # Try to get text content from the first document - first_doc = result.documents[0] - if isinstance(first_doc.documentData, dict): - return first_doc.documentData.get("result", "") - elif isinstance(first_doc.documentData, str): - return first_doc.documentData - return "" - - # PHASE 4: Workflow Object Update Rules Implementation - - def updateWorkflowAfterTaskPlanCreated(self, total_tasks: int): - """ - Update workflow object after task plan is created. - Rule: Set currentTask=1, currentAction=0, totalTasks=extracted_total_tasks, totalActions=0 - """ - try: - update_data = { - "currentTask": 1, - "currentAction": 0, - "totalTasks": total_tasks, - "totalActions": 0 - } - - # Update workflow object - self.workflow.currentTask = 1 - self.workflow.currentAction = 0 - self.workflow.totalTasks = total_tasks - self.workflow.totalActions = 0 - - # Update in database - self.chatInterface.updateWorkflow(self.workflow.id, update_data) - logger.info(f"Updated workflow {self.workflow.id} after task plan created: {update_data}") - - except Exception as e: - logger.error(f"Error updating workflow after task plan created: {str(e)}") - - def updateWorkflowBeforeExecutingTask(self, task_number: int): - """ - Update workflow object before executing a task. - Rule: Set currentTask=task_number, currentAction=0, totalActions=0 - """ - try: - update_data = { - "currentTask": task_number, - "currentAction": 0, - "totalActions": 0 - } - - # Update workflow object - self.workflow.currentTask = task_number - self.workflow.currentAction = 0 - self.workflow.totalActions = 0 - - # Update in database - self.chatInterface.updateWorkflow(self.workflow.id, update_data) - logger.info(f"Updated workflow {self.workflow.id} before executing task {task_number}: {update_data}") - - except Exception as e: - logger.error(f"Error updating workflow before executing task: {str(e)}") - - def updateWorkflowAfterActionPlanning(self, total_actions: int): - """ - Update workflow object after action planning for current task. - Rule: Set totalActions=extracted_total_actions for THIS task - """ - try: - update_data = { - "totalActions": total_actions - } - - # Update workflow object - self.workflow.totalActions = total_actions - - # Update in database - self.chatInterface.updateWorkflow(self.workflow.id, update_data) - logger.info(f"Updated workflow {self.workflow.id} after action planning: {update_data}") - - except Exception as e: - logger.error(f"Error updating workflow after action planning: {str(e)}") - - def updateWorkflowBeforeExecutingAction(self, action_number: int): - """ - Update workflow object before executing an action. - Rule: Set currentAction=action_number - """ - try: - update_data = { - "currentAction": action_number - } - - # Update workflow object - self.workflow.currentAction = action_number - - # Update in database - self.chatInterface.updateWorkflow(self.workflow.id, update_data) - logger.info(f"Updated workflow {self.workflow.id} before executing action {action_number}: {update_data}") - - except Exception as e: - logger.error(f"Error updating workflow before executing action: {str(e)}") - - def setWorkflowTotals(self, total_tasks: int = None, total_actions: int = None): - """Set total counts for workflow progress tracking and update database""" - try: - update_data = {} - - if total_tasks is not None: - self.workflow.totalTasks = total_tasks - update_data["totalTasks"] = total_tasks - - if total_actions is not None: - self.workflow.totalActions = total_actions - update_data["totalActions"] = total_actions - - # Update workflow object in database if we have changes - if update_data: - self.chatInterface.updateWorkflow(self.workflow.id, update_data) - logger.info(f"Updated workflow {self.workflow.id} totals in database: {update_data}") - - logger.debug(f"Updated workflow totals: Tasks {self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 'N/A'}, Actions {self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 'N/A'}") - except Exception as e: - logger.error(f"Error setting workflow totals: {str(e)}") - - def resetWorkflowForNewSession(self): - """Reset workflow values for a new workflow session""" - try: - # Reset all workflow progress values to initial state - self.workflow.currentRound = 0 - self.workflow.currentTask = 0 - self.workflow.currentAction = 0 - self.workflow.totalTasks = 0 - self.workflow.totalActions = 0 - self.workflow.status = 'ready' - - # Update workflow object in database with reset values - self.chatInterface.updateWorkflow(self.workflow.id, { - "currentRound": 0, - "currentTask": 0, - "currentAction": 0, - "totalTasks": 0, - "totalActions": 0, - "status": "ready" - }) - - logger.info("Workflow reset for new session - all values set to initial state and updated in database") - except Exception as e: - logger.error(f"Error resetting workflow for new session: {str(e)}") \ No newline at end of file diff --git a/modules/chat/handling/promptFactory.py b/modules/chat/handling/promptFactory.py deleted file mode 100644 index ada386ba..00000000 --- a/modules/chat/handling/promptFactory.py +++ /dev/null @@ -1,770 +0,0 @@ -# promptFactory.py -# Contains all prompt creation functions extracted from managerChat.py - -import json -import logging -from typing import Any, Dict -from modules.interfaces.interfaceChatModel import TaskContext, ReviewContext - -# Set up logger -logger = logging.getLogger(__name__) - -# Prompt creation helpers extracted from managerChat.py - -def _getPreviousRoundContext(service, workflow) -> str: - """Get context from previous workflow rounds to help understand follow-up prompts""" - try: - if not workflow or not hasattr(workflow, 'messages') or not workflow.messages: - return "" - - # Get current round number - current_round = getattr(workflow, 'currentRound', 0) - - # If this is round 0 or 1, there's no previous context - if current_round <= 1: - return "" - - # Find messages from previous rounds (rounds before current) - previous_messages = [] - for message in workflow.messages: - message_round = getattr(message, 'roundNumber', 0) - if message_round > 0 and message_round < current_round: - previous_messages.append(message) - - if not previous_messages: - return "" - - # Sort by round number and sequence to get chronological order - previous_messages.sort(key=lambda msg: (getattr(msg, 'roundNumber', 0), getattr(msg, 'sequenceNr', 0))) - - # Build context summary - context_parts = [] - current_round_context = {} - - for message in previous_messages: - round_num = getattr(message, 'roundNumber', 0) - if round_num not in current_round_context: - current_round_context[round_num] = { - 'user_inputs': [], - 'assistant_responses': [], - 'task_outcomes': [], - 'documents_processed': [] - } - - # Categorize messages - if message.role == 'user': - current_round_context[round_num]['user_inputs'].append(message.message) - elif message.role == 'assistant': - # Check if it's a task completion or error message - if 'task' in message.message.lower() and ('completed' in message.message.lower() or 'failed' in message.message.lower() or 'error' in message.message.lower()): - current_round_context[round_num]['task_outcomes'].append(message.message) - else: - current_round_context[round_num]['assistant_responses'].append(message.message) - - # Check for document processing - if hasattr(message, 'documents') and message.documents: - doc_names = [doc.fileName for doc in message.documents if hasattr(doc, 'fileName')] - if doc_names: - current_round_context[round_num]['documents_processed'].extend(doc_names) - - # Build context summary - for round_num in sorted(current_round_context.keys()): - round_data = current_round_context[round_num] - context_parts.append(f"ROUND {round_num} CONTEXT:") - - if round_data['user_inputs']: - context_parts.append(f" User requests: {'; '.join(round_data['user_inputs'])}") - - if round_data['task_outcomes']: - context_parts.append(f" Task outcomes: {'; '.join(round_data['task_outcomes'])}") - - if round_data['documents_processed']: - context_parts.append(f" Documents processed: {', '.join(set(round_data['documents_processed']))}") - - if context_parts: - return "\n".join(context_parts) - else: - return "" - - except Exception as e: - logger.error(f"Error getting previous round context: {str(e)}") - return "" - -def createTaskPlanningPrompt(context: TaskContext, service) -> str: - """Create enhanced prompt for task planning with user-friendly message generation and language detection""" - # Get user language directly from service.user.language - user_language = service.user.language if service and service.user else 'en' - - # Extract user request from context - use Pydantic model directly - user_request = context.task_step.objective if context.task_step else 'No request specified' - - # Extract available documents from context - use Pydantic model directly - available_documents = context.available_documents or "No documents available" - - # Get previous workflow round context for better understanding of follow-up prompts - previous_round_context = _getPreviousRoundContext(service, context.workflow) - - return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages. - -USER REQUEST: {user_request} - -AVAILABLE DOCUMENTS: {available_documents} - -PREVIOUS WORKFLOW ROUNDS CONTEXT: -{previous_round_context if previous_round_context else "No previous workflow rounds - this is the first round."} - -INSTRUCTIONS: -1. Analyze the user request, available documents, and previous workflow rounds context -2. If the user request appears to be a follow-up (like "try again", "versuche es nochmals", "retry", etc.), - use the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what the user wants to retry or continue -3. Group related topics and sequential steps into single, comprehensive tasks -4. Focus on business outcomes, not technical operations -5. Each task should produce meaningful, usable outputs -6. Ensure proper handover between tasks using result labels -7. Detect the language of the user request and include it in languageUserDetected -8. Generate user-friendly messages for each task in the user's request language -9. Return a JSON object with the exact structure shown below - -TASK GROUPING PRINCIPLES: -- COMBINE RELATED TOPICS: Group related subjects, sequential steps, or workflow-structured activities into single tasks -- SEQUENTIAL WORKFLOWS: If the user says "first do this, then that, then that" → create ONE task that handles the entire sequence -- SIMILAR CONTENT: If multiple items deal with the same subject matter → combine into ONE comprehensive task -- ONLY SPLIT WHEN DIFFERENT: Create separate tasks ONLY when the user explicitly wants different, independent things - -EXAMPLES OF GOOD TASK GROUPING: - -COMBINE INTO ONE TASK: -- "Analyze the documents, extract key insights, and create a summary report" → ONE task: "Analyze documents and create comprehensive summary report" -- "First check my emails, then respond to urgent ones, then organize my inbox" → ONE task: "Process and organize email inbox with priority responses" -- "Review the budget, analyze spending patterns, and suggest cost-cutting measures" → ONE task: "Comprehensive budget analysis with optimization recommendations" -- "Create a business strategy, develop marketing plan, and prepare presentation" → ONE task: "Develop complete business strategy with marketing plan and presentation" - -SPLIT INTO MULTIPLE TASKS: -- "Create a business strategy for Q4" AND "Check my emails for messages from my assistant" → TWO separate tasks (different subjects) -- "Analyze customer feedback" AND "Prepare quarterly financial report" → TWO separate tasks (different business areas) -- "Review project timeline" AND "Update employee handbook" → TWO separate tasks (unrelated activities) - -TASK PLANNING PRINCIPLES: -- Break down complex requests into logical, sequential steps -- Focus on business value and outcomes -- Keep tasks at a meaningful level of abstraction -- Each task should produce results that can be used by subsequent tasks -- Ensure clear dependencies and handovers between tasks -- Provide clear, actionable user messages in the user's request language -- Group related activities to minimize task fragmentation -- Only create multiple tasks when dealing with truly different, independent objectives - -FOLLOW-UP PROMPT HANDLING: -- If the user request is a follow-up (e.g., "try again", "versuche es nochmals", "retry", "continue", "proceed"), - analyze the PREVIOUS WORKFLOW ROUNDS CONTEXT to understand what failed or was incomplete -- Use the previous round's user requests and task outcomes to determine what the user wants to retry -- If previous rounds failed due to missing documents, and documents are now available, - create tasks that use the newly available documents to accomplish the original request -- Maintain the same business objective from previous rounds but adapt to current available resources - -SPECIFIC SCENARIO HANDLING: -- If previous round failed with "documents missing" error and current round has documents available, - the user likely wants to retry the same operation with the newly provided documents -- Example: Previous round "speichere mir die 3 dokumente im sharepoint unter xxx" failed due to missing documents, - current round "versuche es nochmals" with documents should retry the SharePoint save operation -- Always check if the current request is a retry by looking for retry keywords and previous round context - -REQUIRED JSON STRUCTURE: -{{ - "overview": "Brief description of the overall plan", - "languageUserDetected": "en", // Language code detected from user request (en, de, fr, it, es, etc.) - "userMessage": "User-friendly message explaining the task plan in user's request language", - "tasks": [ - {{ - "id": "task_1", - "objective": "Clear business objective this task accomplishes (combining related activities)", - "dependencies": ["task_0"], // IDs of tasks that must complete first - "success_criteria": ["criteria1", "criteria2"], - "estimated_complexity": "low|medium|high", - "userMessage": "User-friendly message explaining what this task will accomplish in user's request language" - }} - ] -}} - -EXAMPLES OF GOOD TASK OBJECTIVES (COMBINING RELATED ACTIVITIES): -- "Analyze documents and extract key insights for business communication" -- "Create professional business communication incorporating analyzed information" -- "Execute business communication using specified channels and document outcomes" -- "Develop comprehensive business strategy with implementation roadmap and success metrics" - -EXAMPLES OF GOOD SUCCESS CRITERIA: -- "Key insights extracted and ready for business use" -- "Professional communication created with clear business value" -- "Business communication successfully delivered and documented" -- "All outcomes properly documented and accessible" - -EXAMPLES OF BAD TASK OBJECTIVES: -- "Read the PDF file" (too granular - should be "Analyze document content") -- "Convert data to CSV" (implementation detail - should be "Structure data for analysis") -- "Send email" (too specific - should be "Deliver business communication") - -LANGUAGE DETECTION: -- Analyze the user request text to identify the language -- Use standard language codes: en (English), de (German), fr (French), it (Italian), es (Spanish), etc. -- If the language cannot be determined, use "en" as default -- Include the detected language in the languageUserDetected field - -NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" - -async def createActionDefinitionPrompt(context: TaskContext, service) -> str: - """Create enhanced prompt for action generation with user-friendly messages and enhanced document context""" - methodList = service.getMethodsList() - method_actions = {} - for sig in methodList: - if '.' in sig: - method, rest = sig.split('.', 1) - action = rest.split('(')[0] - method_actions.setdefault(method, []).append((action, sig)) - - messageSummary = await service.summarizeChat(context.workflow.messages) if context.workflow else "" - - # Get enhanced document context using the new method - available_documents_str = service.getEnhancedDocumentContext() - - connRefs = service.getConnectionReferenceList() - - # Create a structured JSON format for better AI parsing - # This replaces the old hard-to-read format with a clean JSON structure - # that the AI can easily parse and understand - available_methods_json = {} - for method, actions in method_actions.items(): - available_methods_json[method] = {} - # Get the method instance for accessing docstrings - method_instance = service.methods.get(method, {}).get('instance') if hasattr(service, 'methods') else None - - for action, sig in actions: - # Parse the signature to extract parameters - if '(' in sig and ')' in sig: - # Extract parameters from signature - params_start = sig.find('(') - params_end = sig.find(')') - params_str = sig[params_start+1:params_end] - - # Parse parameters directly from the docstring - much simpler and more reliable! - parameters = [] - - # Get the actual function's docstring - if method_instance and hasattr(method_instance, action): - func = getattr(method_instance, action) - if hasattr(func, '__doc__') and func.__doc__: - docstring = func.__doc__ - - # Parse Parameters section from docstring - lines = docstring.split('\n') - in_parameters = False - for i, line in enumerate(lines): - original_line = line - line = line.strip() - - if line == 'Parameters:': - in_parameters = True - continue - elif in_parameters and line and not original_line.startswith(' ') and not original_line.startswith('\t'): - # End of parameters section - break - elif in_parameters and (original_line.startswith(' ') or original_line.startswith('\t')): - # This is a parameter line - already stripped - # Format: "paramName (type): description" - if ':' in line: - # Find the colon that separates param from description - colon_pos = line.find(':') - param_part = line[:colon_pos].strip() - description = line[colon_pos+1:].strip() - - # Parse parameter name and type - if '(' in param_part and ')' in param_part: - param_name = param_part.split('(')[0].strip() - type_part = param_part[param_part.find('(')+1:param_part.find(')')].strip() - - # Check if optional - is_optional = 'optional' in type_part - param_type = type_part.replace('optional', '').strip().rstrip(',').strip() - - parameters.append({ - "name": param_name, - "type": param_type, - "description": description, - "required": not is_optional - }) - - available_methods_json[method][action] = { - "signature": sig, - "parameters": parameters, - "description": f"{method}.{action} action" - } - - # Convert to a compact, AI-friendly format - available_methods_str = f""" -AVAILABLE ACTIONS (JSON format for better AI parsing): -{json.dumps(available_methods_json, indent=1, separators=(',', ':'))} -""" - retry_context = "" - if context.retry_count and context.retry_count > 0: - retry_context = f""" -RETRY CONTEXT (Attempt {context.retry_count}): -Previous action results that failed or were incomplete: -""" - for i, result in enumerate(context.previous_action_results or []): - retry_context += f"- Action {i+1}: ActionResult\n" - retry_context += f" Status: {result.success and 'success' or 'failed'}\n" - retry_context += f" Error: {result.error or 'None'}\n" - # Check if result has documents and show document info - if result.documents: - doc_info = f"Documents: {len(result.documents)} document(s)" - if result.documents[0].documentName: - doc_info += f" - {result.documents[0].documentName}" - retry_context += f" {doc_info}\n" - else: - retry_context += f" Documents: None\n" - - if context.previous_review_result: - retry_context += f""" -Previous review feedback: -- Status: {context.previous_review_result.status or 'unknown'} -- Reason: {context.previous_review_result.reason or 'No reason provided'} -- Quality Score: {context.previous_review_result.quality_score or 0}/10 -- Unmet Criteria: {', '.join(context.previous_review_result.unmet_criteria or [])} -""" - - # Use Pydantic model directly - no need for getattr - success_criteria_str = ', '.join(context.task_step.success_criteria) if context.task_step and context.task_step.success_criteria else 'No criteria specified' - previous_results_str = ', '.join(context.previous_results) if context.previous_results else 'None' - improvements_str = str(context.improvements) if context.improvements else 'None' - available_connections_str = '\n'.join(f"- {conn}" for conn in connRefs) - - # Get user language from service - this is the correct way - user_language = service.user.language if service and service.user else 'en' - - # Get current workflow context for dynamic examples - workflow_context = service.getWorkflowContext() - current_round = workflow_context.get('currentRound', 0) - current_task = workflow_context.get('currentTask', 1) - - prompt = f""" -You are an action generation AI that creates specific actions to accomplish a task step with user-friendly messages. - -DOCUMENT REFERENCE TYPES: -- docItem: Reference to a single document -- docList: Reference to a group of documents -- round{{round_number}}_task{{task_number}}_action{{action_number}}_{{context}}: Reference to resulting document list from previous action - -USAGE GUIDE: -- Use docItem when you need a specific document: "docItem:doc_123:component_diagram.pdf" -- Use docList when you need all documents in a group: "docList:msg_456:AnalysisResults" -- Use round/task/action format when referencing outputs from previous actions: "round{current_round}_task{current_task}_action2_AnalysisResults" - -CRITICAL DOCUMENT REFERENCE RULES: -- ONLY use the exact labels listed in AVAILABLE DOCUMENTS below, or result labels from previous actions -- When generating multiple actions, you may only use as input documents those that are already present in AVAILABLE DOCUMENTS or produced by actions that come earlier in the list. Do NOT use as input any document label that will be produced by a later action. -- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you CANNOT create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed, if you miss something. - -CURRENT WORKFLOW CONTEXT: -- Current Round: {current_round} -- Current Task: {current_task} -- Use these values when creating resultLabel references - -TASK STEP: {context.task_step.objective if context.task_step else 'No task step specified'} (ID: {context.task_step.id if context.task_step else 'unknown'}) - -SUCCESS CRITERIA: {success_criteria_str} - -CONTEXT - Chat History: -{messageSummary} - -WORKFLOW CONTEXT - Previous Messages Summary: -The following summarizes key information from previous workflow interactions to provide context for continued workflows: -- Previous user inputs and their outcomes -- Key decisions and findings from earlier tasks -- Document processing results and insights -- User preferences and requirements established - -This context helps ensure your actions build upon previous work and maintain consistency with the overall workflow objectives. - -AVAILABLE METHODS AND ACTIONS (with signatures): -{available_methods_str} - -AVAILABLE CONNECTIONS: -{available_connections_str} - -AVAILABLE DOCUMENTS: -{available_documents_str} - -DOCUMENT REFERENCE EXAMPLES: -✅ CORRECT: Use exact references from AVAILABLE DOCUMENTS above or result labels from previous actions -- "docList:msg_456:diagram_analysis_results" (access all documents in a list) -- "docItem:doc_123:component_diagram.pdf" (access specific document) -- "round{current_round}_task{current_task}_action3_contextinfo" (document list from previous action) - -❌ INCORRECT: These will cause errors -- "msg_xxx:documents" (invalid format - missing docList/docItem prefix) -- "task_2_results" (not a valid reference - use exact references from AVAILABLE DOCUMENTS) -- Inventing document IDs not produces from a preceeding action - -PREVIOUS RESULTS: {previous_results_str} -IMPROVEMENTS NEEDED: {improvements_str} - -PREVIOUS TASK HANDOVER CONTEXT: -{context.previous_handover.workflowSummary if context.previous_handover and context.previous_handover.workflowSummary else 'No previous task handover available'} - -{retry_context} - -ACTION GENERATION PRINCIPLES: -- Create meaningful actions per task step -- Use comprehensive AI prompts for document processing -- Focus on business outcomes, not technical operations -- Combine related operations into single actions when possible -- Use the task's AI prompt if provided, or create a comprehensive one -- Each action should produce meaningful, usable outputs -- For document extraction, ensure prompts are specific and detailed -- Include validation steps in extraction prompts -- If this is a retry, learn from previous failures and improve the approach -- Address specific issues mentioned in previous review feedback -- When specifying expectedDocumentFormats, ensure AI prompts explicitly request pure data without markdown formatting -- Generate user-friendly messages for each action in the user's language ({user_language}) - -USER LANGUAGE: {user_language} - All user messages must be generated in this language. - -DOCUMENT ROUTING GUIDANCE: -- Each action should produce documents with a clear resultLabel for routing -- Use consistent naming: "round{current_round}_task{{task_id}}_action{{action_number}}_{{descriptive_label}}" -- Ensure document flow: Action A produces documents that Action B can consume -- Document labels should be descriptive of content, not just "results" or "output" -- Consider what subsequent actions will need and structure outputs accordingly - -INSTRUCTIONS: -- Generate actions to accomplish this task step using available documents, connections, and previous results -- Use docItem for single documents and docList for groups of documents as shown in AVAILABLE DOCUMENTS -- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you cannot create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed. -- Always pass documentList as a LIST of references (docItem and/or docList) - this list CANNOT be empty for document extraction actions -- For referencing documents from previous actions, use the format "round{{round_number}}_task{{task_number}}_action{{action_number}}_{{context}}" -- For resultLabel, use the format: "round{current_round}_task{{task_id}}_action{{action_number}}_{{short_label}}" where: - - {{round_number}} = the current round number ({current_round}) - - {{task_id}} = the current task's id ({current_task}) - - {{action_number}} = the sequence number of the action within the task (e.g., 1, 2, 3) - - {{short_label}} = a short, descriptive label for the output (e.g., "AnalysisResults") - Example: "round{current_round}_task{current_task}_action1_AnalysisResults" -- If this is a retry, ensure the new actions address the specific issues from previous attempts -- Follow the JSON structure below. All fields are required. - -REQUIRED JSON STRUCTURE: -{{ - "actions": [ - {{ - "method": "method_name", // Use only the method name (e.g., "document") - "action": "action_name", // Use only the action name (e.g., "extract") - "parameters": {{ - "documentList": ["docItem:doc_abc:round{current_round}_task{current_task}_action1_AnalysisResults", "round{current_round}_task{current_task}_action1_input"], - "aiPrompt": "Comprehensive AI prompt describing what to accomplish" - }}, - "resultLabel": "round{current_round}_task{current_task}_action2_AnalysisResults", - "expectedDocumentFormats": [ // OPTIONAL: Specify expected document formats when needed - {{ - "extension": ".txt", - "mimeType": "text/plain", - "description": "Structured data output" - }} - ], - "description": "What this action accomplishes (business outcome)", - "userMessage": "User-friendly message explaining what this action will do in the user's language" - }} - ] -}} - -FIELD REQUIREMENTS: -- "method": Must be from AVAILABLE METHODS -- "action": Must be valid for the method -- "parameters": Method-specific, must include documentList as a list if required by the signature -- "resultLabel": Must follow the format above (e.g., "round{current_round}_task{current_task}_action3_AnalysisResults") -- "expectedDocumentFormats": OPTIONAL - Only specify when you need to control output format - - Use when you need specific file types (e.g., CSV for data, JSON for structured output) - - Omit when format is flexible (e.g., folder queries with mixed file types) - - Each format should specify: extension, mimeType, description - - When using expectedDocumentFormats, ensure the aiPrompt explicitly requests pure data without markdown formatting -- "description": Clear summary of the business outcome -- "userMessage": User-friendly message explaining what the action will accomplish in the user's language - -EXAMPLES OF GOOD ACTIONS: - -1. Document analysis with specific output format and user message: -{{ - "method": "document", - "action": "extract", - "parameters": {{ - "documentList": ["docItem:doc_57520394-6b6d-41c2-b641-bab3fc6d7f4b:candidate_profile.txt"], - "aiPrompt": "Extract and analyze the candidate's qualifications, experience, skills, and suitability for the product designer position. Identify key strengths, relevant experience, technical skills, and any areas of concern. Provide a comprehensive assessment that can be used for evaluation." - }}, - "resultLabel": "round{current_round}_task{current_task}_action2_candidate_analysis", - "expectedDocumentFormats": [ - {{ - "extension": ".json", - "mimeType": "application/json", - "description": "Structured candidate analysis data" - }} - ], - "description": "Comprehensive analysis of candidate profile for evaluation", - "userMessage": "Ich analysiere das Kandidatenprofil und extrahiere alle wichtigen Informationen für die Bewertung." -}} - -2. Multi-document processing with user message: -{{ - "method": "document", - "action": "extract", - "parameters": {{ - "documentList": ["docList:msg_456:candidate_analysis_results"], - "aiPrompt": "Compare all candidate profiles and create an evaluation matrix. Rate each candidate on technical skills, experience level, cultural fit, portfolio quality, and communication skills. Provide clear rankings and recommendations for the product designer position." - }}, - "resultLabel": "round{current_round}_task{current_task}_action5_evaluation_matrix", - "description": "Create comprehensive evaluation matrix comparing all candidates", - "userMessage": "Ich vergleiche alle Kandidatenprofile und erstelle eine umfassende Bewertungsmatrix mit klaren Empfehlungen." -}} - -3. Data extraction with specific CSV format and user message: -{{ - "method": "document", - "action": "extract", - "parameters": {{ - "documentList": ["docItem:doc_abc:table_data.pdf"], - "aiPrompt": "Extract all table data and convert to structured CSV format with proper headers and data types. IMPORTANT: Deliver pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows." - }}, - "resultLabel": "round{current_round}_task{current_task}_action2_structured_data", - "expectedDocumentFormats": [ - {{ - "extension": ".csv", - "mimeType": "text/csv", - "description": "Structured table data in CSV format" - }} - ], - "description": "Extract and structure table data for analysis", - "userMessage": "Ich extrahiere alle Tabellendaten und konvertiere sie in ein strukturiertes CSV-Format für die weitere Analyse." -}} - -4. Comprehensive summary report with user message: -{{ - "method": "document", - "action": "generateReport", - "parameters": {{ - "documentList": ["docList:msg_456:candidate_analysis_results"], - "title": "Comprehensive Candidate Evaluation Report" - }}, - "resultLabel": "round{current_round}_task{current_task}_action6_summary_report", - "description": "Generate a comprehensive, professional HTML report consolidating all candidate analyses and findings", - "userMessage": "Ich erstelle einen umfassenden, professionellen Bericht, der alle Kandidatenanalysen und Erkenntnisse zusammenfasst." -}} - -5. Correct chaining of actions within a task: -{{ - "actions": [ - {{ - "method": "document", - "action": "extract", - "parameters": {{ - "documentList": ["docItem:doc_abc:round{current_round}_task{current_task}_action1_file1.txt"], - "aiPrompt": "Extract data from file1." - }}, - "resultLabel": "round{current_round}_task{current_task}_action1_extracted_data", - "description": "Extract data from file1.", - "userMessage": "Ich extrahiere die Daten aus der Datei." - }}, - {{ - "method": "document", - "action": "generateReport", - "parameters": {{ - "documentList": ["round{current_round}_task{current_task}_action1_extracted_data"], - "title": "Report" - }}, - "resultLabel": "round{current_round}_task{current_task}_action2_report", - "description": "Generate report from extracted data.", - "userMessage": "Ich erstelle einen Bericht basierend auf den extrahierten Daten." - }} - ] -}} - -6. When no documents are available (NO DOCUMENTS AVAILABLE scenario): -{{ - "method": "document", - "action": "generateReport", - "parameters": {{ - "documentList": [], - "title": "Workflow Status Report" - }}, - "resultLabel": "round{current_round}_task{current_task}_action1_status_report", - "description": "Generate a status report informing the user that no documents are available for processing and requesting document upload or alternative input.", - "userMessage": "Ich erstelle einen Statusbericht, der Sie darüber informiert, dass keine Dokumente zur Verarbeitung verfügbar sind und um Dokumente oder alternative Eingaben bittet." -}} - -IMPORTANT NOTES: -- Respond with ONLY the JSON object. Do not include any explanatory text. -- Before creating any document extraction action, verify that AVAILABLE DOCUMENTS contains actual document references. -- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", use example 6 above to create a status report action instead of document extraction. -- Always include a user-friendly userMessage for each action in the user's language ({user_language}). -- The examples above show German user messages as reference - adapt the language to match the USER LANGUAGE specified above.""" - - logging.debug(f"[ACTION PLAN PROMPT] Enhanced Document Context:\n{available_documents_str}\nUser Connections Section:\n{available_connections_str}\nAvailable Methods (detailed):\n{available_methods_str}") - - return prompt - -def createResultReviewPrompt(context: ReviewContext, service) -> str: - """Create enhanced prompt for result review with user-friendly messages and document context""" - # Build comprehensive action and result summary - action_summary = "" - for i, action in enumerate(context.task_actions or []): - action_summary += f"\nACTION {i+1}: {action.execMethod}.{action.execAction}\n" - action_summary += f" Status: {action.status}\n" - if action.error: - action_summary += f" Error: {action.error}\n" - if action.resultDocuments: - action_summary += f" Documents: {len(action.resultDocuments)} document(s)\n" - for doc in action.resultDocuments: - # Use Pydantic model properties directly - fileName = doc.fileName - fileSize = doc.fileSize - mimeType = doc.mimeType - - action_summary += f" - {fileName} ({fileSize} bytes, {mimeType})\n" - else: - action_summary += f" Documents: None\n" - - # Build result summary with SIMPLE DOCUMENT VALIDATION - result_summary = "" - document_validation_summary = "" - document_access_warnings = [] - - if context.action_results: - for i, result in enumerate(context.action_results): - result_summary += f"\nRESULT {i+1}:\n" - result_summary += f" Success: {result.success}\n" - if result.error: - result_summary += f" Error: {result.error}\n" - - if result.documents: - result_summary += f" Documents: {len(result.documents)} document(s)\n" - for doc in result.documents: - # Use correct ActionDocument attributes - doc_name = getattr(doc, 'documentName', 'Unknown') - doc_mime = getattr(doc, 'mimeType', 'Unknown') - doc_data = getattr(doc, 'documentData', None) - - result_summary += f" - {doc_name} ({doc_mime})\n" - - # SIMPLE VALIDATION: Check if documents exist and have basic properties - validation_status = "✅ Valid" - if not doc_name or str(doc_name).strip() == "": - validation_status = "❌ Missing document name" - elif not doc_mime or str(doc_mime).strip() == "": - validation_status = "❌ Missing MIME type" - elif doc_data is None: - validation_status = "⚠️ No document data" - elif hasattr(doc_data, '__len__') and len(doc_data) == 0: - validation_status = "⚠️ Empty document data" - - document_validation_summary += f" - {doc_name}: {validation_status}\n" - else: - result_summary += f" Documents: None\n" - document_validation_summary += f" - No documents produced\n" - - # Get enhanced document context using the new method - document_context = service.getEnhancedDocumentContext() - - # Get user language from service - user_language = service.user.language if service and service.user else 'en' - - # Build warnings section (only for critical issues) - warnings_section = "" - if document_access_warnings: - warnings_section = f""" -⚠️ DOCUMENT VALIDATION ISSUES: -{chr(10).join(f"- {warning}" for warning in document_access_warnings)} -""" - - prompt = f""" -You are a result review AI that evaluates task execution results and provides feedback with user-friendly messages. - -TASK OBJECTIVE: {context.task_step.objective if context.task_step else 'No task objective specified'} -SUCCESS CRITERIA: {', '.join(context.task_step.success_criteria) if context.task_step and context.task_step.success_criteria else 'No success criteria specified'} - -EXECUTION SUMMARY: -{action_summary} - -RESULT SUMMARY: -{result_summary} - -{warnings_section} - -DOCUMENT VALIDATION SUMMARY: -{document_validation_summary if document_validation_summary else "No documents to validate"} - -DOCUMENT CONTEXT (Available Documents): -{document_context} - -PREVIOUS RESULTS: {', '.join(context.previous_results) if context.previous_results else 'None'} - -REVIEW INSTRUCTIONS: -1. Evaluate if the task step was completed successfully -2. Check if all success criteria were met -3. Assess the quality and completeness of outputs -4. Identify any missing or incomplete results -5. Provide specific improvement suggestions -6. Generate user-friendly messages explaining the results -7. Return a JSON object with the exact structure shown below - -DOCUMENT VALIDATION FOCUS: -- Check if the agreed result documents label is correct (matches expected format) -- Verify that documents are actually present and have basic properties -- Do NOT attempt to analyze document content deeply -- Focus on document existence and basic metadata validation - -REQUIRED JSON STRUCTURE: -{{ - "status": "success|retry|failed", - "reason": "Brief explanation of the status", - "improvements": ["improvement1", "improvement2"], - "quality_score": 8, // 1-10 scale - "missing_outputs": ["missing_output1", "missing_output2"], - "met_criteria": ["criteria1", "criteria2"], - "unmet_criteria": ["criteria3", "criteria4"], - "confidence": 0.85, // 0.0-1.0 confidence level in this assessment - "userMessage": "User-friendly message explaining the review results in the user's language" -}} - -FIELD REQUIREMENTS: -- "status": Overall task completion status - - "success": All criteria met, high-quality outputs - - "retry": Some criteria met, outputs need improvement and retry - - "failed": Most criteria unmet, significant issues -- "reason": Clear explanation of why this status was assigned -- "improvements": List of specific, actionable improvements -- "quality_score": 1-10 rating of output quality -- "missing_outputs": List of expected outputs that were not produced -- "met_criteria": List of success criteria that were fully met -- "unmet_criteria": List of success criteria that were not met -- "confidence": 0.0-1.0 confidence level in this assessment -- "userMessage": User-friendly explanation of results in the user's language - -EXAMPLES OF GOOD IMPROVEMENTS: -- "Increase AI prompt specificity for better data extraction" -- "Add validation steps to ensure output completeness" -- "Improve error handling for failed document processing" -- "Enhance document format specifications for better output quality" - -EXAMPLES OF GOOD MISSING OUTPUTS: -- "Structured analysis report in JSON format" -- "Comparison matrix of candidate profiles" -- "Data validation summary with quality metrics" -- "Professional business communication document" - -QUALITY SCORE GUIDELINES: -- 9-10: Exceptional quality, exceeds expectations -- 7-8: Good quality, meets all requirements -- 5-6: Acceptable quality, minor issues -- 3-4: Poor quality, significant issues -- 1-2: Very poor quality, major problems - -USER LANGUAGE: {user_language} - All user messages must be generated in this language. - -NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" - - return prompt diff --git a/modules/chat/managerChat.py b/modules/chat/managerChat.py deleted file mode 100644 index 6be21739..00000000 --- a/modules/chat/managerChat.py +++ /dev/null @@ -1,118 +0,0 @@ -import logging -from typing import Dict, Any, List -from modules.interfaces.interfaceAppModel import User -from modules.interfaces.interfaceChatModel import ChatWorkflow, UserInputRequest, TaskStep, TaskAction, ActionResult, ReviewResult, TaskPlan, WorkflowResult, TaskContext -from modules.chat.serviceCenter import ServiceCenter -from modules.interfaces.interfaceChatObjects import ChatObjects -from .handling.handlingTasks import HandlingTasks, WorkflowStoppedException - -logger = logging.getLogger(__name__) - -# ===== STATE MANAGEMENT AND VALIDATION CLASSES ===== - -class ChatManager: - """Chat manager with improved AI integration and method handling""" - - def __init__(self, currentUser: User, chatInterface: ChatObjects): - self.currentUser = currentUser - self.chatInterface = chatInterface - self.service: ServiceCenter = None - self.workflow: ChatWorkflow = None - self.handlingTasks: HandlingTasks = None - - async def initialize(self, workflow: ChatWorkflow) -> None: - """Initialize chat manager with workflow""" - self.workflow = workflow - self.service = ServiceCenter(self.currentUser, self.workflow) - self.handlingTasks = HandlingTasks(self.chatInterface, self.service, self.workflow) - - async def executeUnifiedWorkflow(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> WorkflowResult: - """Unified Workflow Execution""" - try: - logger.info(f"Starting unified workflow execution for workflow {workflow.id}") - logger.debug(f"User request: {userInput.prompt}") - - # Phase 1: High-Level Task Planning - logger.info("Phase 1: Generating task plan") - task_plan = await self.handlingTasks.generateTaskPlan(userInput.prompt, workflow) - if not task_plan or not task_plan.tasks: - raise Exception("No tasks generated in task plan.") - - # Phase 2-5: For each task, execute and get results - total_tasks = len(task_plan.tasks) - logger.info(f"Phase 2: Executing {total_tasks} tasks") - all_task_results = [] - previous_results = [] - for idx, task_step in enumerate(task_plan.tasks): - # Pass task index to executeTask method - current_task_index = idx + 1 - - logger.info(f"Task {idx+1}/{total_tasks}: {task_step.objective}") - - # Create proper context object for this task - task_context = TaskContext( - task_step=task_step, - workflow=workflow, - workflow_id=workflow.id, - available_documents=self.service.getAvailableDocuments(workflow), - available_connections=self.service.getConnectionReferenceList(), - previous_results=previous_results, - previous_handover=None, - improvements=[], - retry_count=0, - previous_action_results=[], - previous_review_result=None, - is_regeneration=False, - failure_patterns=[], - failed_actions=[], - successful_actions=[], - criteria_progress={ - 'met_criteria': set(), - 'unmet_criteria': set(), - 'attempt_history': [] - } - ) - - # Execute task (this handles action generation, execution, and review internally) - task_result = await self.handlingTasks.executeTask(task_step, workflow, task_context, current_task_index, total_tasks) - # Handover - handover_data = await self.handlingTasks.prepareTaskHandover(task_step, [], task_result, workflow) - # Collect results - all_task_results.append({ - 'task_step': task_step, - 'task_result': task_result, - 'handover_data': handover_data - }) - # Update previous results for next task - if task_result.success and task_result.feedback: - previous_results.append(task_result.feedback) - - # Final workflow result - workflow_result = WorkflowResult( - status="completed", - completed_tasks=len(all_task_results), - total_tasks=len(task_plan.tasks), - execution_time=0.0, # TODO: Calculate actual execution time - final_results_count=len(all_task_results) - ) - logger.info(f"Unified workflow execution completed successfully for workflow {workflow.id}") - return workflow_result - except WorkflowStoppedException: - logger.info(f"Workflow {workflow.id} was stopped by user") - return WorkflowResult( - status="stopped", - completed_tasks=0, - total_tasks=0, - execution_time=0.0, - final_results_count=0 - ) - except Exception as e: - logger.error(f"Error in executeUnifiedWorkflow: {str(e)}") - return WorkflowResult( - status="failed", - completed_tasks=0, - total_tasks=0, - execution_time=0.0, - final_results_count=0, - error=str(e) - ) diff --git a/modules/chat/serviceCenter.py b/modules/chat/serviceCenter.py deleted file mode 100644 index 9160a3ae..00000000 --- a/modules/chat/serviceCenter.py +++ /dev/null @@ -1,1190 +0,0 @@ -import logging -import importlib -import pkgutil -import inspect -import os -from typing import Dict, Any, List, Optional -from modules.interfaces.interfaceAppModel import User, UserConnection -from modules.interfaces.interfaceChatModel import ( - TaskStatus, ChatDocument, TaskItem, TaskAction, TaskResult, ChatStat, ChatLog, ChatMessage, ChatWorkflow, DocumentExchange, ExtractedContent -) -from modules.interfaces.interfaceAiCalls import AiCalls -from modules.interfaces.interfaceChatObjects import getInterface as getChatObjects -from modules.interfaces.interfaceChatModel import ActionResult -from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects -from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects -from modules.chat.documents.documentExtraction import DocumentExtraction -from modules.chat.documents.documentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData -from modules.chat.methodBase import MethodBase -from modules.shared.timezoneUtils import get_utc_timestamp -import uuid - -import asyncio - -logger = logging.getLogger(__name__) - -class ServiceCenter: - """Service center that provides access to all services and their functions""" - - def __init__(self, currentUser: User, workflow: ChatWorkflow): - # Core services - self.user = currentUser - self.workflow = workflow - self.tasks = workflow.tasks - self.statusEnums = TaskStatus - self.currentTask = None # Initialize current task as None - - # Initialize managers - self.interfaceChat = getChatObjects(currentUser) - self.interfaceComponent = getComponentObjects(currentUser) - self.interfaceApp = getAppObjects(currentUser) - self.interfaceAiCalls = AiCalls() - self.documentProcessor = DocumentExtraction(self) - - # Initialize methods catalog - self.methods = {} - # Discover additional methods - self._discoverMethods() - - def _discoverMethods(self): - """Dynamically discover all method classes and their actions in modules.methods package""" - try: - # Import the methods package - methodsPackage = importlib.import_module('modules.methods') - - # Discover all modules in the package - for _, name, isPkg in pkgutil.iter_modules(methodsPackage.__path__): - if not isPkg and name.startswith('method'): - try: - # Import the module - module = importlib.import_module(f'modules.methods.{name}') - - # Find all classes in the module that inherit from MethodBase - for itemName, item in inspect.getmembers(module): - if (inspect.isclass(item) and - issubclass(item, MethodBase) and - item != MethodBase): - # Instantiate the method - methodInstance = item(self) - - # Discover actions from public methods - actions = {} - for methodName, method in inspect.getmembers(type(methodInstance), predicate=inspect.iscoroutinefunction): - if not methodName.startswith('_'): - # Bind the method to the instance - bound_method = method.__get__(methodInstance, type(methodInstance)) - sig = inspect.signature(method) - params = {} - for paramName, param in sig.parameters.items(): - if paramName not in ['self']: - # Get parameter type - paramType = param.annotation if param.annotation != param.empty else Any - - # Get parameter description from docstring or default - paramDesc = None - if param.default != param.empty and hasattr(param.default, '__doc__'): - paramDesc = param.default.__doc__ - - params[paramName] = { - 'type': paramType, - 'required': param.default == param.empty, - 'description': paramDesc, - 'default': param.default if param.default != param.empty else None - } - - actions[methodName] = { - 'description': method.__doc__ or '', - 'parameters': params, - 'method': bound_method - } - - # Add method instance with discovered actions - self.methods[methodInstance.name] = { - 'instance': methodInstance, - 'description': methodInstance.description, - 'actions': actions - } - logger.info(f"Discovered method: {methodInstance.name} with {len(actions)} actions") - - except Exception as e: - logger.error(f"Error loading method module {name}: {str(e)}", exc_info=True) - - except Exception as e: - logger.error(f"Error discovering methods: {str(e)}") - - - - # ===== Functions for Prompts: Context ===== - - def getMethodsList(self) -> List[str]: - """Get list of available methods with their signatures in the required format""" - methodList = [] - for methodName, method in self.methods.items(): - methodInstance = method['instance'] - for actionName, action in method['actions'].items(): - # Use the new signature format from MethodBase - signature = methodInstance.getActionSignature(actionName) - if signature: - methodList.append(signature) - return methodList - - async def summarizeChat(self, messages: List[ChatMessage]) -> str: - """ - Summarize chat messages from last to first message with status="first" - - Args: - messages: List of chat messages to summarize - - Returns: - str: Summary of the chat in user's language - """ - try: - # Get messages from last to first, stopping at first message with status="first" - relevantMessages = [] - for msg in reversed(messages): - relevantMessages.append(msg) - if msg.status == "first": - break - - # Create prompt for AI - prompt = f"""You are an AI assistant providing a summary of a chat conversation. -Please respond in '{self.user.language}' language. - -Chat History: -{chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))} - -Instructions: -1. Summarize the conversation's key points and outcomes -2. Be concise but informative -3. Use a professional but friendly tone -4. Focus on important decisions and next steps if any - -Please provide a comprehensive summary of this conversation.""" - - # Get summary using AI - return await self.callAiTextBasic(prompt) - - except Exception as e: - logger.error(f"Error summarizing chat: {str(e)}") - return f"Error summarizing chat: {str(e)}" - - # ===== Functions for Prompts + Actions: Document References generation and resolution ===== - - def getEnhancedDocumentContext(self) -> str: - """Get enhanced document context formatted for action planning prompts with proper docList and docItem references""" - try: - document_list = self.getDocumentReferenceList() - - # Build technical context string for AI action planning - context = "AVAILABLE DOCUMENTS:\n\n" - - # Process chat exchanges (current round) - if document_list["chat"]: - context += "CURRENT ROUND DOCUMENTS:\n" - for exchange in document_list["chat"]: - # Generate docList reference for the exchange (using message ID and label) - # Find the message that corresponds to this exchange - message_id = None - for message in self.workflow.messages: - if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange.documentsLabel: - message_id = message.id - break - - if message_id: - doc_list_ref = f"docList:{message_id}:{exchange.documentsLabel}" - else: - # Fallback to label-only format if message ID not found - doc_list_ref = f"docList:{exchange.documentsLabel}" - - logger.debug(f"Using document label for action planning: {exchange.documentsLabel} (message_id: {message_id})") - context += f"- {doc_list_ref} contains:\n" - # Generate docItem references for each document in the list - for doc_ref in exchange.documents: - if doc_ref.startswith("docItem:"): - context += f" - {doc_ref}\n" - else: - # Convert to proper docItem format if needed - context += f" - docItem:{doc_ref}\n" - context += "\n" - - # Process history exchanges (previous rounds) - if document_list["history"]: - context += "WORKFLOW HISTORY DOCUMENTS:\n" - for exchange in document_list["history"]: - # Generate docList reference for the exchange (using message ID and label) - # Find the message that corresponds to this exchange - message_id = None - for message in self.workflow.messages: - if hasattr(message, 'documentsLabel') and message.documentsLabel == exchange.documentsLabel: - message_id = message.id - break - - if message_id: - doc_list_ref = f"docList:{message_id}:{exchange.documentsLabel}" - else: - # Fallback to label-only format if message ID not found - doc_list_ref = f"docList:{exchange.documentsLabel}" - - logger.debug(f"Using history document label for action planning: {exchange.documentsLabel} (message_id: {message_id})") - context += f"- {doc_list_ref} contains:\n" - # Generate docItem references for each document in the list - for doc_ref in exchange.documents: - if doc_ref.startswith("docItem:"): - context += f" - {doc_ref}\n" - else: - # Convert to proper docItem format if needed - context += f" - docItem:{doc_ref}\n" - context += "\n" - - if not document_list["chat"] and not document_list["history"]: - context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n" - - return context - - except Exception as e: - logger.error(f"Error generating enhanced document context: {str(e)}") - return "NO DOCUMENTS AVAILABLE - Error generating document context." - - def getDocumentReferenceList(self) -> Dict[str, List[DocumentExchange]]: - """Get list of document exchanges with new labeling format, sorted by recency""" - # Collect all documents first and refresh their attributes - all_documents = [] - for message in self.workflow.messages: - if message.documents: - all_documents.extend(message.documents) - - # Refresh file attributes for all documents - if all_documents: - self._refreshDocumentFileAttributes(all_documents) - - chat_exchanges = [] - history_exchanges = [] - - # Process messages in reverse order; "first" marks boundary - in_current_round = True - for message in reversed(self.workflow.messages): - is_first = message.status == "first" if hasattr(message, 'status') else False - - # Build a DocumentExchange if message has documents - doc_exchange = None - if message.documents: - if message.actionId and message.documentsLabel: - # Validate that we use the same label as in the message - validated_label = self._validateDocumentLabelConsistency(message) - - # Use the message's actual documentsLabel - doc_refs = [] - for doc in message.documents: - doc_ref = self._getDocumentReferenceFromChatDocument(doc, message) - doc_refs.append(doc_ref) - - doc_exchange = DocumentExchange( - documentsLabel=validated_label, - documents=doc_refs - ) - else: - # Generate new labels for documents without explicit labels - doc_refs = [] - for doc in message.documents: - doc_ref = self._getDocumentReferenceFromChatDocument(doc, message) - doc_refs.append(doc_ref) - - if doc_refs: - # Create a label based on message context - context_prefix = self._generateWorkflowContextPrefix(message) - context_label = f"{context_prefix}_context" - - doc_exchange = DocumentExchange( - documentsLabel=context_label, - documents=doc_refs - ) - - # Append to appropriate container based on boundary - if doc_exchange: - if in_current_round: - chat_exchanges.append(doc_exchange) - else: - history_exchanges.append(doc_exchange) - - # Flip boundary after including the "first" message in chat - if in_current_round and is_first: - in_current_round = False - - # Sort by recency: most recent first, then current round, then earlier rounds - # Sort chat exchanges by message sequence number (most recent first) - chat_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x), reverse=True) - # Sort history exchanges by message sequence number (most recent first) - history_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x), reverse=True) - - return { - "chat": chat_exchanges, - "history": history_exchanges - } - - def _refreshDocumentFileAttributes(self, documents: List[ChatDocument]) -> None: - """Update file attributes (fileName, fileSize, mimeType) for documents""" - for doc in documents: - try: - file_item = self.interfaceComponent.getFile(doc.fileId) - if file_item: - doc.fileName = file_item.fileName - doc.fileSize = file_item.fileSize - doc.mimeType = file_item.mimeType - else: - logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}") - except Exception as e: - logger.error(f"Error refreshing file attributes for document {doc.id}: {e}") - - def _generateWorkflowContextPrefix(self, message: ChatMessage) -> str: - """Generate workflow context prefix: round{num}_task{num}_action{num}""" - round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1 - task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0 - action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0 - return f"round{round_num}_task{task_num}_action{action_num}" - - def _getDocumentReferenceFromChatDocument(self, document: ChatDocument, message: ChatMessage) -> str: - """Get document reference using document ID and filename.""" - try: - # Use document ID and filename for simple reference - return f"docItem:{document.id}:{document.fileName}" - except Exception as e: - logger.error(f"Critical error creating document reference for document {document.id}: {str(e)}") - # Re-raise the error to prevent workflow from continuing with invalid data - raise - - def _getMessageSequenceForExchange(self, exchange: DocumentExchange) -> int: - """Get message sequence number for sorting exchanges by recency""" - try: - # Extract message ID from the first document reference - if exchange.documents and len(exchange.documents) > 0: - first_doc_ref = exchange.documents[0] - if first_doc_ref.startswith("docItem:"): - # docItem::