From 82eb4f62b9fe545620d828489a6c1e4cf919af7d Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 14 Oct 2025 01:13:25 +0200 Subject: [PATCH] Refactored AI Service components - ready for UI workflow testing --- env_dev.20251012_121418.backup | 90 - env_dev.20251013_140140.backup | 94 - env_int.20251012_121418.backup | 90 - env_int.20251013_140140.backup | 94 - env_prod.20251012_121418.backup | 90 - env_prod.20251013_140140.backup | 94 - modules/services/serviceAi/mainServiceAi.py | 2382 +---------------- modules/services/serviceAi/subCoreAi.py | 596 +++++ .../serviceAi/subDocumentGeneration.py | 459 ++++ .../serviceAi/subDocumentProcessing.py | 1042 +++++++ modules/services/serviceAi/subUtilities.py | 316 +++ modules/services/serviceAi/subWebResearch.py | 384 +++ test_extractor_formats.py | 117 - test_image_processing.py | 83 - test_multifile_processing.py | 263 -- 15 files changed, 2874 insertions(+), 3320 deletions(-) delete mode 100644 env_dev.20251012_121418.backup delete mode 100644 env_dev.20251013_140140.backup delete mode 100644 env_int.20251012_121418.backup delete mode 100644 env_int.20251013_140140.backup delete mode 100644 env_prod.20251012_121418.backup delete mode 100644 env_prod.20251013_140140.backup create mode 100644 modules/services/serviceAi/subCoreAi.py create mode 100644 modules/services/serviceAi/subDocumentGeneration.py create mode 100644 modules/services/serviceAi/subDocumentProcessing.py create mode 100644 modules/services/serviceAi/subUtilities.py create mode 100644 modules/services/serviceAi/subWebResearch.py delete mode 100644 test_extractor_formats.py delete mode 100644 test_image_processing.py delete mode 100644 test_multifile_processing.py diff --git a/env_dev.20251012_121418.backup b/env_dev.20251012_121418.backup deleted file mode 100644 index 9ebbb93b..00000000 --- a/env_dev.20251012_121418.backup +++ /dev/null @@ -1,90 +0,0 @@ -# Development Environment Configuration - -# System Configuration -APP_ENV_TYPE = dev -APP_ENV_LABEL = Development Instance Patrick -APP_API_URL = http://localhost:8000 -APP_KEY_SYSVAR = D:/Athi/Local/Web/poweron/local/key.txt -APP_INIT_PASS_ADMIN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEeFFtRGtQeVUtcjlrU3dab1ZxUm9WSks0MlJVYUtERFlqUElHemZrOGNENk1tcmJNX3Vxc01UMDhlNU40VzZZRVBpUGNmT3podzZrOGhOeEJIUEt4eVlSWG5UYXA3d09DVXlLT21Kb1JYSUU9 -APP_INIT_PASS_EVENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERzZjNm56WGVBdjJTeG5Udjd6OGQwUVotYXUzQjJ1YVNyVXVBa3NZVml3ODU0MVNkZjhWWmJwNUFkc19BcHlHMTU1Q3BRcHU0cDBoZkFlR2l6UEZQU3d2U3MtMDh5UDZteGFoQ0EyMUE1ckE9 - -# PostgreSQL Storage (new) -DB_APP_HOST=localhost -DB_APP_DATABASE=poweron_app -DB_APP_USER=poweron_dev -DB_APP_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEcUIxNEFfQ2xnS0RrSC1KNnUxTlVvTGZoMHgzaEI4Z3NlVzVROTVLak5Ubi1vaEZubFZaMTFKMGd6MXAxekN2d2NvMy1hRjg2UVhybktlcFA5anZ1WjFlQmZhcXdwaGhWdzRDc3ExeUhzWTg9 -DB_APP_PORT=5432 - -# PostgreSQL Storage (new) -DB_CHAT_HOST=localhost -DB_CHAT_DATABASE=poweron_chat -DB_CHAT_USER=poweron_dev -DB_CHAT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERFNzNVhoalpCR0QxYXAwdEpXWXVVOTdZdWtqWW5FNXFGcFl2amNYLWYwYl9STXltRlFxLWNzVWlMVnNYdXk0RklnRExFT0FaQjg2aGswNnhhSGhCN29KN2VEb2FlUV9NTlV3b0tLelplSVU9 -DB_CHAT_PORT=5432 - -# PostgreSQL Storage (new) -DB_MANAGEMENT_HOST=localhost -DB_MANAGEMENT_DATABASE=poweron_management -DB_MANAGEMENT_USER=poweron_dev -DB_MANAGEMENT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEUldqSTVpUnFqdGhITDYzT3RScGlMYVdTMmZhOXdudDRCc3dhdllOd3l6MS1vWHY2MjVsTUF1Sk9saEJOSk9ONUlBZjQwb2c2T1gtWWJhcXFzVVVXd01xc0U0b0lJX0JyVDRxaDhNS01JcWs9 -DB_MANAGEMENT_PORT=5432 - -# Security Configuration -APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2ZlUFRlcFdOZ001RnlzM2FhLWhRV2tjWWFhaWQwQ3hkcUFvbThMcndxSjFpYTdfRV9OZGhTcksxbXFTZWg5MDZvOHpCVXBHcDJYaHlJM0tyNWRZckZsVHpQcmxTZHJoZUs1M3lfU2ljRnJaTmNSQ0w0X085OXI0QW80M2xfQnJqZmZ6VEh3TUltX0xzeE42SGtZPQ== -APP_TOKEN_EXPIRY=300 - -# CORS Configuration -APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net - -# Logging configuration -APP_LOGGING_LOG_LEVEL = DEBUG -APP_LOGGING_LOG_DIR = D:/Athi/Local/Web/poweron/local/logs -APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s -APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S -APP_LOGGING_CONSOLE_ENABLED = True -APP_LOGGING_FILE_ENABLED = True -APP_LOGGING_ROTATION_SIZE = 10485760 -APP_LOGGING_BACKUP_COUNT = 5 - -# Service Redirects -Service_MSFT_REDIRECT_URI = http://localhost:8000/api/msft/auth/callback -Service_GOOGLE_REDIRECT_URI = http://localhost:8000/api/google/auth/callback - -# OpenAI configuration -Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions -Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEajBuZmtYTVdqLTBpQm9KZ2pCXzRCV3VhZzlYTEhKb1FqWXNrV3lyb25uZUN1WVVQUEY3dGYtejludV9MNGlKeVREanZGOGloV09mY2ttQ3k5SjBFOGFac2ZQTkNKNUZWVnRINVQyeWhsR2wyYnVrRDNzV2NqSHB0ajQ4UWtGeGZtbmR0Q3VvS0hDZlphVmpSc2Z6RG5nPT0= -Connector_AiOpenai_MODEL_NAME = gpt-4o -Connector_AiOpenai_TEMPERATURE = 0.2 -Connector_AiOpenai_MAX_TOKENS = 2000 - -# Anthropic configuration -Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages -Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpENmFBWG16STFQUVZxNzZZRzRLYTA4X3lRanF1VkF4cU45OExNMzlsQmdISGFxTUxud1dXODBKcFhMVG9KNjdWVnlTTFFROVc3NDlsdlNHLUJXeG41NDBHaXhHR0VHVWl5UW9RNkVWbmlhakRKVW5pM0R4VHk0LUw0TV9LdkljNHdBLXJua21NQkl2b3l4UkVkMGN1YjBrMmJEeWtMay1jbmxrYWJNbUV0aktCXzU1djR2d2RSQXZORTNwcG92ZUVvVGMtQzQzTTVncEZTRGRtZUFIZWQ0dz09 -Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022 -Connector_AiAnthropic_TEMPERATURE = 0.2 -Connector_AiAnthropic_MAX_TOKENS = 2000 - -# Perplexity AI configuration -Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions -Connector_AiPerplexity_API_SECRET = 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 -Connector_AiPerplexity_MODEL_NAME = sonar -Connector_AiPerplexity_TEMPERATURE = 0.2 -Connector_AiPerplexity_MAX_TOKENS = 2000 - -# Agent Mail configuration -Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQk4xYnpmbnItUEU3dHU4eHB5dzVYay1WT012RTRLUWJDTlBILVY5dC1FX3VMNjZmLThrbDRFNWFSNGprY3RRTlpYNGlubVBpNnY3MjNJcGtzVk9PMzRacl9LUlM2RU5vTVVZWHJvaUhWSHVfc1pNR0pfQmI5SEprOG5KdlB1QnQ= -Service_MSFT_TENANT_ID = common - -# Google Service configuration -Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM= - -# Tavily Web Search configuration -Connector_WebTavily_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQTdnUHMwd2pIaXNtMmtCTFREd0pyQXRKb1F5eGtHSnkyOGZiUnlBOFc0b3Vzcndrc3ViRm1nMDJIOEZKYWxqdWNkZGh5N0Z4R0JlQmxXSG5pVnJUR2VYckZhMWNMZ1FNeXJ3enJLVlpiblhOZTNleUg3ZzZyUzRZanFSeDlVMkI= - -# Google Cloud Speech Services configuration -Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhIYTlyMkhuNjA4NlF4dk82U2NScHhTVGY3UG83NkhfX3RrcWVtWWcyLXRjU1dTT21zWEl6YWRMMUFndXpsUnJOeHh3QThsNDZKRXROTzdXRUdsT0JZajZJNVlfb0gtMXkwWm9DOERPVnpjU0pyUEZfOGJsUnprT3ltMVVhalUyUm9hMUFtZEtHUnJqOGZ4dEZjZm5SWVVTckVCWnY1UkdVSHVmUlgwbnAyc0xDQW84R3ViSko5OHVCVWZRUVNiaG1pVFB6X3EwS0FPd2dUYjhiSmRjcXh2WEZiXzI4SFZqT21tbDduUWRyVWdFZXpmcVM5ZDR0VWtzZnF5UER6cGwwS2JlLV9CSTZ0Z0IyQ1h0YW9TcmhRTXZEckp4bWhmTkt6UTNYMk4zVkpnbUJmaDIxZnoyR2dWTEYwTUFEV0w2eUdUUGpoZk9XRkt4RVF1Z1NPdUpBeTcyWV9PY1Ffd2s0ZEdVekxGekhoeEl4TmNqaXYtbUJuSVdycFducERWdWtZajZnX011Q2w4eE9VMTBqQ1ZxRmdScWhXY1E3WWhzX1JZcHhxam9FbDVPN3Q1MWtrMUZuTUg3LVFQVHp1T1hpQWNDMzEzekVJWk9ybl91YUVjSkFob1VaMi1ONEtuMnRSOEg1S3QybUMwbVZDejItajBLTjM2Zy1hNzZQMW5LLVVDVGdFWm5BZUxNeEFnUkZzU3dxV0lCUlc0LWo4b05GczVpOGZSV2ZxbFBwUml6OU5tYjdnTks3Y3hrVEZVTHlmc1NPdFh4WE5pWldEZklOQUxBbjBpMTlkX3FFQVJ6c2NSZGdzTThycE92VW82enZKamhiRGFnU25aZGlHZHhZd2lUUmhuTVptNjhoWVlJQkxIOEkzbzJNMjZCZFJyM25tdXBnQ2ZWaHV3b2p6UWJpdk9xUEhBc1dyTlNmeF9wbm5yYUhHV01UZnVXWDFlNzBkdXlWUWhvcmJpSmljbmE3LUpUZEg4VzRwZ2JVSjdYUm1sODViQXVxUzdGTmZFbVpiN2V1YW5XV3U4b2VRWmxldGVGVHZsSldoekhVLU9wZ2V0cGZIYkNqM2pXVGctQVAyUm4xTHhpd1VVLXFhcnVEV21Rby1hbTlqTl84TjVveHdYTExUVkhHQ0ltaTB2WXJnY1NQVE5PbWg3ejgySElYc1JSTlQ3NDlFUWR6STZVUjVqaXFRN200NF9LY1ljQ0R2UldlWUtKY1NQVnJ4QXRyYTBGSWVuenhyM0Z0cWtndTd1eG8xRzY5a2dNZ1hkQm5MV3BHVzA2N1QwUkd6WlRGYTZQOUhnVWQ2S0Y5U0s1dXFNVXh5Q2pLWVUxSUQ2MlR1ak52NmRIZ2hlYTk1SGZGWS1RV3hWVU9rR3d1Rk9MLS11REZXbzhqMHpsSm1HYW1jMUNLT29YOHZsRWNaLTVvOFpmT3l3MHVwaERTT0dNLWFjcGRYZ25qT2szTkVFUnRFR3JWYS1aNXFIRnMyalozTlQzNFF2NXJLVHVPVF9zdTF6ZjlkbzJ4RFc2ZENmNFFxZDZzTzhfMUl0bW96V0lPZkh1dXFYZlEteFBlSG84Si1FNS1TTi1OMkFnX2pOYW8xY3MxMVJnVC02MDUyaXZfMEVHWDQtVlRpcENmV0h3V0dCWEFRS2prQXdNRlQ5dnRFVHU0Q1dNTmh0SlBCaU55bFMydWM1TTFFLW96ODBnV3dNZHFZTWZhRURYSHlrdzF3RlRuWDBoQUhSOUJWemtRM3pxcDJFbGJoaTJ3ZktRTlJxbXltaHBoZXVJVDlxS3cxNWo2c0ZBV0NzaUstRWdsMW1xLXFkanZGYUFiU0tSLXFQa0tkcDFoMV9kak41ZjQ0R214UmtOR1ZBanRuemY3Mmw1SkZ5aDZodGIzT3N2aV85MW9kcld6c0g0ZDgtTWo3b3Y3VjJCRnR2U2tMVm9rUXNVRnVHbzZXVTZ6RmI2RkNmajBfMWVnODVFbnpkT0oyci15czJHU0p1cUowTGZJMzVnd3hIRjQyTVhKOGRkcFRKdVpyQ3Yzd01Jb1lSajFmV0paeEV0cjk1SmpmdWpDVFJMUmMtUFctOGhaTmlKQXNRVlVUNlhJemxudHZCR056SVlBb3NOTEYxRTRLaFlVd2d3TWtxVlB6ZEtQLTkxOGMyY3N0a2pYRFUweDBNaGhja2xSSklPOUZla1dKTWRNbG8tUGdSNEV5cW90OWlOZFlIUExBd3U2b2hyS1owbXVMM3p0Qm41cUtzWUxYNzB1N3JpUTNBSGdsT0NuamNTb1lIbXR4MG1sakNPVkxBUXRLVE1xX0YxWDhOcERIY1lTQVFqS01CaXZKNllFaXlIR0JsM1pKMmV1OUo3TGI1WkRaVnYxUTl1LTM0SU1qN1V1b0RCT0x0VHNLTmNLZnk1S0MxYnBBcm03WnVua0xqaEhGUzhOU253ZkppRzdudXBSVlMxeFVOSWxtZ1o2RVBSQUhEUEFuQ1hxSVZMME4yWUtaU3VyRGo3RkUyRUNjT0pNcE1BdE1ZRzdXVl8ydUtXZjdMdHdEVW4teHUtTi1HSGliLUxud21TX0NtcGVkRFBHNkZ1WTlNczR4OUJfUVluc1BoV09oWS1scUdsNnB5d1U5M1huX3k4QzAyNldtb2hybktYN2xKZ1NTNWFsaWwzV3pCRVhkaGR5eTNlV1d6ZzFfaFZTT0E4UjRpQ3pKdEZxUlJ6UFZXM3laUndyWEk2NlBXLUpoajVhZzVwQXpWVzUtVjVNZFBwdWdQa3AxZC1KdGdqNnhibjN4dmFYb2cxcEVwc1g5R09zRUdINUZtOE5QRjVUU0dpZy1QVl9odnFtVDNuWFZLSURtMXlSMlhRNTBWSVFJbEdOOWpfVWV0SmdRWDdlUXZZWE8xRUxDN1I0aEN6MHYwNzM1cmpJS0ZpMnBYWkxfb3FsbEV1VnlqWGxqdVJ6SHlwSjAzRlMycTBaQ295NXNnZERpUnJQcjhrUUd3bkI4bDVzRmxQblhkaFJPTTdISnVUQmhET3BOMTM4bjVvUEc2VmZhb2lrR1FyTUl2RWNEeGg0U0dsNnV6eU5zOUxiNDY5SXBxR0hBS00wOTgyWTFnWkQyaEtLVUloT3ZxZGh0RWVGRmJzenFsaUtfZENQM0JzdkVVeTdXR3hUSmJST1NBMUI1NkVFWncwNW5JZVVLX1p1RXdqVnFfQWpvQ08yQjZhN1NkTkpTSnUxOVRXZXE0WFEtZWxhZW1NNXYtQ2sya0VGLURmS01lMkctNVY3c2ZhN0ZGRFgwWHlabTFkeS1hcUZ1dDZ3cnpPQ3hha2IzVE11M0pqbklmU0diczBqTFBNZC1QZGp6VzNTSnJVSjJoWkJUQjVORG4tYUJmMEJtSUNUdVpEaGt6OTM3TjFOdVhXUHItZjRtZ25nU3NhZC1sVTVXNTRDTmxZbnlfeHNsdkpuMXhUYnE1MnpVQ0ZOclRWM1M4eHdXTzRXbFRZZVQtTS1iRVdXVWZMSGotcWg3MUxUYTFnSEEtanBCRHlZRUNIdGdpUFhsYjdYUndCZnRITzhMZVJ1dHFoVlVNb0duVjlxd0U4OGRuQVV3MG90R0hiYW5MWkxWVklzbWFRNzBfSUNrdzc5bVdtTXg0dExEYnRCaDI3c1I4TWFwLXZKR0wxSjRZYjZIV3ZqZjNqTWhFT0RGSDVMc1A1UzY2bDBiMGFSUy1fNVRQRzRJWDVydUpqb1ZfSHNVbldVeUN2YlAxSW5WVDdxVzJ1WHpLeUdmb0xWMDNHN05oQzY3YnhvUUdhS2xaOHNidkVvbTZtSHFlblhOYmwyR3NQdVJDRUdxREhWdF9ZcXhwUWxHc2hyLW5vUGhIUVhJNUNhY0hFU0ptVnI0TFVhZDE1TFBBUEstSkRoZWJ5MHJhUmZrR1ZrRlFtRGpxS1pOMmFMQjBsdjluY3FiYUU4eGJVVXlZVEpuNWdHVVhJMGtwaTdZR2NDbXd2eHpOQ09SeTV6N1BaVUpsR1pQVDBZcElJUUt6VnVpQmxSYnE4Y1BCWV9IRWdVV0p3enBGVHItdnBGN3NyNWFBWmkySnByWThsbDliSlExQmp3LVlBaDIyZXp6UnR6cU9rTzJmTDBlSVpON0tiWllMdm1oME1zTFl2S2ZYYllhQlY2VHNZRGtHUDY4U1lIVExLZTU4VzZxSTZrZHl1ZTBDc0g4SjI4WGYyZHV1bm9wQ3R2Z09ld1ZmUkN5alJGeHZKSHl1bWhQVXpNMzdjblpLcUhfSm02Qlh5S1FVN3lIcHl0NnlRPT0= - -# Feature SyncDelta JIRA configuration -Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEbm0yRUJ6VUJKbUwyRW5kMnRaNW4wM2YxMkJUTXVXZUdmdVRCaUZIVHU2TTV2RWZLRmUtZkcwZE4yRUNlNDQ0aUJWYjNfdVg5YjV5c2JwMHhoUUYxZWdkeS11bXR0eGxRLWRVaVU3cUVQZWJlNDRtY1lWUDdqeDVFSlpXS0VFX21WajlRS3lHQjc0bS11akkybWV3QUFlR2hNWUNYLUdiRjZuN2dQODdDSExXWG1Dd2ZGclI2aUhlSWhETVZuY3hYdnhkb2c2LU1JTFBvWFpTNmZtMkNVOTZTejJwbDI2eGE0OS1xUlIwQnlCSmFxRFNCeVJNVzlOMDhTR1VUamx4RDRyV3p6Tk9qVHBrWWdySUM3TVRaYjd3N0JHMFhpdzFhZTNDLTFkRVQ2RVE4U19COXRhRWtNc0NVOHRqUS1CRDFpZ19xQmtFLU9YSDU3TXBZQXpVcld3PT0= diff --git a/env_dev.20251013_140140.backup b/env_dev.20251013_140140.backup deleted file mode 100644 index f8913497..00000000 --- a/env_dev.20251013_140140.backup +++ /dev/null @@ -1,94 +0,0 @@ -# Development Environment Configuration - -# System Configuration -APP_ENV_TYPE = dev -APP_ENV_LABEL = Development Instance Patrick -APP_API_URL = http://localhost:8000 -APP_KEY_SYSVAR = D:/Athi/Local/Web/poweron/local/key.txt -APP_INIT_PASS_ADMIN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEeFFtRGtQeVUtcjlrU3dab1ZxUm9WSks0MlJVYUtERFlqUElHemZrOGNENk1tcmJNX3Vxc01UMDhlNU40VzZZRVBpUGNmT3podzZrOGhOeEJIUEt4eVlSWG5UYXA3d09DVXlLT21Kb1JYSUU9 -APP_INIT_PASS_EVENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERzZjNm56WGVBdjJTeG5Udjd6OGQwUVotYXUzQjJ1YVNyVXVBa3NZVml3ODU0MVNkZjhWWmJwNUFkc19BcHlHMTU1Q3BRcHU0cDBoZkFlR2l6UEZQU3d2U3MtMDh5UDZteGFoQ0EyMUE1ckE9 - -# PostgreSQL Storage (new) -DB_APP_HOST=localhost -DB_APP_DATABASE=poweron_app -DB_APP_USER=poweron_dev -DB_APP_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEcUIxNEFfQ2xnS0RrSC1KNnUxTlVvTGZoMHgzaEI4Z3NlVzVROTVLak5Ubi1vaEZubFZaMTFKMGd6MXAxekN2d2NvMy1hRjg2UVhybktlcFA5anZ1WjFlQmZhcXdwaGhWdzRDc3ExeUhzWTg9 -DB_APP_PORT=5432 - -# PostgreSQL Storage (new) -DB_CHAT_HOST=localhost -DB_CHAT_DATABASE=poweron_chat -DB_CHAT_USER=poweron_dev -DB_CHAT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERFNzNVhoalpCR0QxYXAwdEpXWXVVOTdZdWtqWW5FNXFGcFl2amNYLWYwYl9STXltRlFxLWNzVWlMVnNYdXk0RklnRExFT0FaQjg2aGswNnhhSGhCN29KN2VEb2FlUV9NTlV3b0tLelplSVU9 -DB_CHAT_PORT=5432 - -# PostgreSQL Storage (new) -DB_MANAGEMENT_HOST=localhost -DB_MANAGEMENT_DATABASE=poweron_management -DB_MANAGEMENT_USER=poweron_dev -DB_MANAGEMENT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEUldqSTVpUnFqdGhITDYzT3RScGlMYVdTMmZhOXdudDRCc3dhdllOd3l6MS1vWHY2MjVsTUF1Sk9saEJOSk9ONUlBZjQwb2c2T1gtWWJhcXFzVVVXd01xc0U0b0lJX0JyVDRxaDhNS01JcWs9 -DB_MANAGEMENT_PORT=5432 - -# Security Configuration -APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2ZlUFRlcFdOZ001RnlzM2FhLWhRV2tjWWFhaWQwQ3hkcUFvbThMcndxSjFpYTdfRV9OZGhTcksxbXFTZWg5MDZvOHpCVXBHcDJYaHlJM0tyNWRZckZsVHpQcmxTZHJoZUs1M3lfU2ljRnJaTmNSQ0w0X085OXI0QW80M2xfQnJqZmZ6VEh3TUltX0xzeE42SGtZPQ== -APP_TOKEN_EXPIRY=300 - -# CORS Configuration -APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net - -# Logging configuration -APP_LOGGING_LOG_LEVEL = DEBUG -APP_LOGGING_LOG_DIR = D:/Athi/Local/Web/poweron/local/logs -APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s -APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S -APP_LOGGING_CONSOLE_ENABLED = True -APP_LOGGING_FILE_ENABLED = True -APP_LOGGING_ROTATION_SIZE = 10485760 -APP_LOGGING_BACKUP_COUNT = 5 - -# Service Redirects -Service_MSFT_REDIRECT_URI = http://localhost:8000/api/msft/auth/callback -Service_GOOGLE_REDIRECT_URI = http://localhost:8000/api/google/auth/callback - -# OpenAI configuration -Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions -Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEajBuZmtYTVdqLTBpQm9KZ2pCXzRCV3VhZzlYTEhKb1FqWXNrV3lyb25uZUN1WVVQUEY3dGYtejludV9MNGlKeVREanZGOGloV09mY2ttQ3k5SjBFOGFac2ZQTkNKNUZWVnRINVQyeWhsR2wyYnVrRDNzV2NqSHB0ajQ4UWtGeGZtbmR0Q3VvS0hDZlphVmpSc2Z6RG5nPT0= -Connector_AiOpenai_MODEL_NAME = gpt-4o -Connector_AiOpenai_TEMPERATURE = 0.2 -Connector_AiOpenai_MAX_TOKENS = 2000 - -# Anthropic configuration -Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages -Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpENmFBWG16STFQUVZxNzZZRzRLYTA4X3lRanF1VkF4cU45OExNMzlsQmdISGFxTUxud1dXODBKcFhMVG9KNjdWVnlTTFFROVc3NDlsdlNHLUJXeG41NDBHaXhHR0VHVWl5UW9RNkVWbmlhakRKVW5pM0R4VHk0LUw0TV9LdkljNHdBLXJua21NQkl2b3l4UkVkMGN1YjBrMmJEeWtMay1jbmxrYWJNbUV0aktCXzU1djR2d2RSQXZORTNwcG92ZUVvVGMtQzQzTTVncEZTRGRtZUFIZWQ0dz09 -Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022 -Connector_AiAnthropic_TEMPERATURE = 0.2 -Connector_AiAnthropic_MAX_TOKENS = 2000 - -# Perplexity AI configuration -Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions -Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQm82Mzk2Q1MwZ0dNcUVBcUtuRDJIcTZkMXVvYnpjM3JEMzJiT1NKSHljX282ZDIyZTJYc09VSTdVNXAtOWU2UXp5S193NTk5dHJsWlFjRjhWektFOG1DVGY4ZUhHTXMzS0RPN1lNcF9nSlVWbW5BZ1hkZDVTejl6bVZNRFVvX29xamJidWRFMmtjQmkyRUQ2RUh6UTN1aWNPSUJBPT0= -Connector_AiPerplexity_MODEL_NAME = sonar -Connector_AiPerplexity_TEMPERATURE = 0.2 -Connector_AiPerplexity_MAX_TOKENS = 2000 - -# Agent Mail configuration -Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_CLIENT_SECRET = KDH8Q~H2OCtdvYy5yx6HOCYEbdnJCq90G21vTcPw -Service_MSFT_TENANT_ID = common - -# Google Service configuration -Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM= - -# Tavily Web Search configuration -Connector_WebTavily_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQTdnUHMwd2pIaXNtMmtCTFREd0pyQXRKb1F5eGtHSnkyOGZiUnlBOFc0b3Vzcndrc3ViRm1nMDJIOEZKYWxqdWNkZGh5N0Z4R0JlQmxXSG5pVnJUR2VYckZhMWNMZ1FNeXJ3enJLVlpiblhOZTNleUg3ZzZyUzRZanFSeDlVMkI= - -# Google Cloud Speech Services configuration -Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhIYTlyMkhuNjA4NlF4dk82U2NScHhTVGY3UG83NkhfX3RrcWVtWWcyLXRjU1dTT21zWEl6YWRMMUFndXpsUnJOeHh3QThsNDZKRXROTzdXRUdsT0JZajZJNVlfb0gtMXkwWm9DOERPVnpjU0pyUEZfOGJsUnprT3ltMVVhalUyUm9hMUFtZEtHUnJqOGZ4dEZjZm5SWVVTckVCWnY1UkdVSHVmUlgwbnAyc0xDQW84R3ViSko5OHVCVWZRUVNiaG1pVFB6X3EwS0FPd2dUYjhiSmRjcXh2WEZiXzI4SFZqT21tbDduUWRyVWdFZXpmcVM5ZDR0VWtzZnF5UER6cGwwS2JlLV9CSTZ0Z0IyQ1h0YW9TcmhRTXZEckp4bWhmTkt6UTNYMk4zVkpnbUJmaDIxZnoyR2dWTEYwTUFEV0w2eUdUUGpoZk9XRkt4RVF1Z1NPdUpBeTcyWV9PY1Ffd2s0ZEdVekxGekhoeEl4TmNqaXYtbUJuSVdycFducERWdWtZajZnX011Q2w4eE9VMTBqQ1ZxRmdScWhXY1E3WWhzX1JZcHhxam9FbDVPN3Q1MWtrMUZuTUg3LVFQVHp1T1hpQWNDMzEzekVJWk9ybl91YUVjSkFob1VaMi1ONEtuMnRSOEg1S3QybUMwbVZDejItajBLTjM2Zy1hNzZQMW5LLVVDVGdFWm5BZUxNeEFnUkZzU3dxV0lCUlc0LWo4b05GczVpOGZSV2ZxbFBwUml6OU5tYjdnTks3Y3hrVEZVTHlmc1NPdFh4WE5pWldEZklOQUxBbjBpMTlkX3FFQVJ6c2NSZGdzTThycE92VW82enZKamhiRGFnU25aZGlHZHhZd2lUUmhuTVptNjhoWVlJQkxIOEkzbzJNMjZCZFJyM25tdXBnQ2ZWaHV3b2p6UWJpdk9xUEhBc1dyTlNmeF9wbm5yYUhHV01UZnVXWDFlNzBkdXlWUWhvcmJpSmljbmE3LUpUZEg4VzRwZ2JVSjdYUm1sODViQXVxUzdGTmZFbVpiN2V1YW5XV3U4b2VRWmxldGVGVHZsSldoekhVLU9wZ2V0cGZIYkNqM2pXVGctQVAyUm4xTHhpd1VVLXFhcnVEV21Rby1hbTlqTl84TjVveHdYTExUVkhHQ0ltaTB2WXJnY1NQVE5PbWg3ejgySElYc1JSTlQ3NDlFUWR6STZVUjVqaXFRN200NF9LY1ljQ0R2UldlWUtKY1NQVnJ4QXRyYTBGSWVuenhyM0Z0cWtndTd1eG8xRzY5a2dNZ1hkQm5MV3BHVzA2N1QwUkd6WlRGYTZQOUhnVWQ2S0Y5U0s1dXFNVXh5Q2pLWVUxSUQ2MlR1ak52NmRIZ2hlYTk1SGZGWS1RV3hWVU9rR3d1Rk9MLS11REZXbzhqMHpsSm1HYW1jMUNLT29YOHZsRWNaLTVvOFpmT3l3MHVwaERTT0dNLWFjcGRYZ25qT2szTkVFUnRFR3JWYS1aNXFIRnMyalozTlQzNFF2NXJLVHVPVF9zdTF6ZjlkbzJ4RFc2ZENmNFFxZDZzTzhfMUl0bW96V0lPZkh1dXFYZlEteFBlSG84Si1FNS1TTi1OMkFnX2pOYW8xY3MxMVJnVC02MDUyaXZfMEVHWDQtVlRpcENmV0h3V0dCWEFRS2prQXdNRlQ5dnRFVHU0Q1dNTmh0SlBCaU55bFMydWM1TTFFLW96ODBnV3dNZHFZTWZhRURYSHlrdzF3RlRuWDBoQUhSOUJWemtRM3pxcDJFbGJoaTJ3ZktRTlJxbXltaHBoZXVJVDlxS3cxNWo2c0ZBV0NzaUstRWdsMW1xLXFkanZGYUFiU0tSLXFQa0tkcDFoMV9kak41ZjQ0R214UmtOR1ZBanRuemY3Mmw1SkZ5aDZodGIzT3N2aV85MW9kcld6c0g0ZDgtTWo3b3Y3VjJCRnR2U2tMVm9rUXNVRnVHbzZXVTZ6RmI2RkNmajBfMWVnODVFbnpkT0oyci15czJHU0p1cUowTGZJMzVnd3hIRjQyTVhKOGRkcFRKdVpyQ3Yzd01Jb1lSajFmV0paeEV0cjk1SmpmdWpDVFJMUmMtUFctOGhaTmlKQXNRVlVUNlhJemxudHZCR056SVlBb3NOTEYxRTRLaFlVd2d3TWtxVlB6ZEtQLTkxOGMyY3N0a2pYRFUweDBNaGhja2xSSklPOUZla1dKTWRNbG8tUGdSNEV5cW90OWlOZFlIUExBd3U2b2hyS1owbXVMM3p0Qm41cUtzWUxYNzB1N3JpUTNBSGdsT0NuamNTb1lIbXR4MG1sakNPVkxBUXRLVE1xX0YxWDhOcERIY1lTQVFqS01CaXZKNllFaXlIR0JsM1pKMmV1OUo3TGI1WkRaVnYxUTl1LTM0SU1qN1V1b0RCT0x0VHNLTmNLZnk1S0MxYnBBcm03WnVua0xqaEhGUzhOU253ZkppRzdudXBSVlMxeFVOSWxtZ1o2RVBSQUhEUEFuQ1hxSVZMME4yWUtaU3VyRGo3RkUyRUNjT0pNcE1BdE1ZRzdXVl8ydUtXZjdMdHdEVW4teHUtTi1HSGliLUxud21TX0NtcGVkRFBHNkZ1WTlNczR4OUJfUVluc1BoV09oWS1scUdsNnB5d1U5M1huX3k4QzAyNldtb2hybktYN2xKZ1NTNWFsaWwzV3pCRVhkaGR5eTNlV1d6ZzFfaFZTT0E4UjRpQ3pKdEZxUlJ6UFZXM3laUndyWEk2NlBXLUpoajVhZzVwQXpWVzUtVjVNZFBwdWdQa3AxZC1KdGdqNnhibjN4dmFYb2cxcEVwc1g5R09zRUdINUZtOE5QRjVUU0dpZy1QVl9odnFtVDNuWFZLSURtMXlSMlhRNTBWSVFJbEdOOWpfVWV0SmdRWDdlUXZZWE8xRUxDN1I0aEN6MHYwNzM1cmpJS0ZpMnBYWkxfb3FsbEV1VnlqWGxqdVJ6SHlwSjAzRlMycTBaQ295NXNnZERpUnJQcjhrUUd3bkI4bDVzRmxQblhkaFJPTTdISnVUQmhET3BOMTM4bjVvUEc2VmZhb2lrR1FyTUl2RWNEeGg0U0dsNnV6eU5zOUxiNDY5SXBxR0hBS00wOTgyWTFnWkQyaEtLVUloT3ZxZGh0RWVGRmJzenFsaUtfZENQM0JzdkVVeTdXR3hUSmJST1NBMUI1NkVFWncwNW5JZVVLX1p1RXdqVnFfQWpvQ08yQjZhN1NkTkpTSnUxOVRXZXE0WFEtZWxhZW1NNXYtQ2sya0VGLURmS01lMkctNVY3c2ZhN0ZGRFgwWHlabTFkeS1hcUZ1dDZ3cnpPQ3hha2IzVE11M0pqbklmU0diczBqTFBNZC1QZGp6VzNTSnJVSjJoWkJUQjVORG4tYUJmMEJtSUNUdVpEaGt6OTM3TjFOdVhXUHItZjRtZ25nU3NhZC1sVTVXNTRDTmxZbnlfeHNsdkpuMXhUYnE1MnpVQ0ZOclRWM1M4eHdXTzRXbFRZZVQtTS1iRVdXVWZMSGotcWg3MUxUYTFnSEEtanBCRHlZRUNIdGdpUFhsYjdYUndCZnRITzhMZVJ1dHFoVlVNb0duVjlxd0U4OGRuQVV3MG90R0hiYW5MWkxWVklzbWFRNzBfSUNrdzc5bVdtTXg0dExEYnRCaDI3c1I4TWFwLXZKR0wxSjRZYjZIV3ZqZjNqTWhFT0RGSDVMc1A1UzY2bDBiMGFSUy1fNVRQRzRJWDVydUpqb1ZfSHNVbldVeUN2YlAxSW5WVDdxVzJ1WHpLeUdmb0xWMDNHN05oQzY3YnhvUUdhS2xaOHNidkVvbTZtSHFlblhOYmwyR3NQdVJDRUdxREhWdF9ZcXhwUWxHc2hyLW5vUGhIUVhJNUNhY0hFU0ptVnI0TFVhZDE1TFBBUEstSkRoZWJ5MHJhUmZrR1ZrRlFtRGpxS1pOMmFMQjBsdjluY3FiYUU4eGJVVXlZVEpuNWdHVVhJMGtwaTdZR2NDbXd2eHpOQ09SeTV6N1BaVUpsR1pQVDBZcElJUUt6VnVpQmxSYnE4Y1BCWV9IRWdVV0p3enBGVHItdnBGN3NyNWFBWmkySnByWThsbDliSlExQmp3LVlBaDIyZXp6UnR6cU9rTzJmTDBlSVpON0tiWllMdm1oME1zTFl2S2ZYYllhQlY2VHNZRGtHUDY4U1lIVExLZTU4VzZxSTZrZHl1ZTBDc0g4SjI4WGYyZHV1bm9wQ3R2Z09ld1ZmUkN5alJGeHZKSHl1bWhQVXpNMzdjblpLcUhfSm02Qlh5S1FVN3lIcHl0NnlRPT0= - -# Feature SyncDelta JIRA configuration -Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEbm0yRUJ6VUJKbUwyRW5kMnRaNW4wM2YxMkJUTXVXZUdmdVRCaUZIVHU2TTV2RWZLRmUtZkcwZE4yRUNlNDQ0aUJWYjNfdVg5YjV5c2JwMHhoUUYxZWdkeS11bXR0eGxRLWRVaVU3cUVQZWJlNDRtY1lWUDdqeDVFSlpXS0VFX21WajlRS3lHQjc0bS11akkybWV3QUFlR2hNWUNYLUdiRjZuN2dQODdDSExXWG1Dd2ZGclI2aUhlSWhETVZuY3hYdnhkb2c2LU1JTFBvWFpTNmZtMkNVOTZTejJwbDI2eGE0OS1xUlIwQnlCSmFxRFNCeVJNVzlOMDhTR1VUamx4RDRyV3p6Tk9qVHBrWWdySUM3TVRaYjd3N0JHMFhpdzFhZTNDLTFkRVQ2RVE4U19COXRhRWtNc0NVOHRqUS1CRDFpZ19xQmtFLU9YSDU3TXBZQXpVcld3PT0= - -# Debug Configuration -APP_DEBUG_CHAT_WORKFLOW_ENABLED = True -APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat \ No newline at end of file diff --git a/env_int.20251012_121418.backup b/env_int.20251012_121418.backup deleted file mode 100644 index 4a0f3e39..00000000 --- a/env_int.20251012_121418.backup +++ /dev/null @@ -1,90 +0,0 @@ -# Integration Environment Configuration - -# System Configuration -APP_ENV_TYPE = int -APP_ENV_LABEL = Integration Instance -APP_API_URL = https://gateway-int.poweron-center.net -APP_KEY_SYSVAR = CONFIG_KEY -APP_INIT_PASS_ADMIN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjWm41MWZ4TUZGaVlrX3pWZWNwakJsY3Facm0wLVZDd1VKeTFoZEVZQnItcEdUUnVJS1NXeDBpM2xKbGRsYmxOSmRhc29PZjJSU2txQjdLbUVrTTE1NEJjUXBHbV9NOVJWZUR3QlJkQnJvTEU9 -APP_INIT_PASS_EVENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjdmtrakgxa0djekZVNGtTZV8wM2I5UUpCZllveVBMWXROYk5yS3BiV3JEelJSM09VYTRONHpnY3VtMGxDRk5JTEZSRFhtcDZ0RVRmZ1RicTFhb3c5dVZRQ1o4SmlkLVpPTW5MMTU2eTQ0Vkk9 - -# PostgreSQL Storage (new) -DB_APP_HOST=gateway-int-server.postgres.database.azure.com -DB_APP_DATABASE=poweron_app -DB_APP_USER=heeshkdlby -DB_APP_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjb2dka2pnN0tUbW1EU0w1Rk1jNERKQ0Z1U3JkVDhuZWZDM0g5M0kwVDE5VHdubkZna3gtZVAxTnl4MDdrR1c1ZXJ3ejJHYkZvcGUwbHJaajBGOWJob0EzRXVHc0JnZkJyNGhHZTZHOXBxd2c9 -DB_APP_PORT=5432 - -# PostgreSQL Storage (new) -DB_CHAT_HOST=gateway-int-server.postgres.database.azure.com -DB_CHAT_DATABASE=poweron_chat -DB_CHAT_USER=heeshkdlby -DB_CHAT_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjczYzOUtTa21MMGJVTUQ5UmFfdWc3YlhCbWZOeXFaNEE1QzdJV3BLVjhnalBkLVVCMm5BZzdxdlFXQXc2RHYzLWtPSFZkZE1iWG9rQ1NkVWlpRnF5TURVbnl1cm9iYXlSMGYxd1BGYVc0VDA9 -DB_CHAT_PORT=5432 - -# PostgreSQL Storage (new) -DB_MANAGEMENT_HOST=gateway-int-server.postgres.database.azure.com -DB_MANAGEMENT_DATABASE=poweron_management -DB_MANAGEMENT_USER=heeshkdlby -DB_MANAGEMENT_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjTnJKNlJMNmEwQ0Y5dVNrR3pkZk9SQXVvLTRTNW9lQ1g3TTE5cFhBNTd5UENqWW9qdWd3NWNseWhnUHJveDJyd1Z3X1czS3VuZnAwZHBXYVNQWlZsRy12ME42NndEVlR5X3ZPdFBNNmhLYm89 -DB_MANAGEMENT_PORT=5432 - -# Security Configuration -APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZzTEp4aDR0MktWRjNoeVBrY1Npd1R0VE9YVHp3M2w1cXRzbUxNaU82QUJvaDNFeVQyN05KblRWblBvbWtoT0VXbkNBbDQ5OHhwSUFnaDZGRG10Vmgtdm1YUkRsYUhFMzRVZURmSFlDTFIzVWg4MXNueDZyMGc5aVpFdWRxY3dkTExGM093ZTVUZVl5LUhGWnlRPQ== -APP_TOKEN_EXPIRY=300 - -# CORS Configuration -APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,https://playground-int.poweron-center.net,http://localhost:5176,https://nyla.poweron-center.net, https://nyla-int.poweron-center.net - -# Logging configuration -APP_LOGGING_LOG_LEVEL = DEBUG -APP_LOGGING_LOG_DIR = /home/site/wwwroot/ -APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s -APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S -APP_LOGGING_CONSOLE_ENABLED = True -APP_LOGGING_FILE_ENABLED = True -APP_LOGGING_ROTATION_SIZE = 10485760 -APP_LOGGING_BACKUP_COUNT = 5 - -# Service Redirects -Service_MSFT_REDIRECT_URI = https://gateway-int.poweron-center.net/api/msft/auth/callback -Service_GOOGLE_REDIRECT_URI = https://gateway-int.poweron-center.net/api/google/auth/callback - -# OpenAI configuration -Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions -Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjSDBNYkptSkQxTUotYVVpZVNZc0dxNGNwSEtkOEE0T3RZWjROTEhSRlRXdlZmQUxxZ0w3Y0xOV2JNV19LNF9yTUZiU1pUNG15U2VDUDdSVlI4VlpnR3JXVFFtcXBaTEZiaUtSclVFd0lCZG1rWVhra1dfWTVQOTBEYUU0MjByYVNEMTFmeXNOcmpUT216MmJKdlVPeW5nPT0= -Connector_AiOpenai_MODEL_NAME = gpt-4o -Connector_AiOpenai_TEMPERATURE = 0.2 -Connector_AiOpenai_MAX_TOKENS = 2000 - -# Anthropic configuration -Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages -Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjT1ZlRWVJdVZMT3ljSFJDcFdxRFBRVkZhS204NnN5RDBlQ0tpenhTM0FFVktuWW9mWHNwRWx2dHB0eDBSZ0JFQnZKWlp6c01pVGREWHd1eGpERnU0Q2xhaks1clQ1ZXVsdnd2ZzhpNXNQS1BhY3FjSkdkVEhHalNaRGR4emhpakZncnpDQUVxOHVXQzVUWmtQc0FsYmFwTF9TSG5FOUFtWk5Ick1NcHFvY2s1T1c2WXlRUFFJZnh6TWhuaVpMYmppcDR0QUx0a0R6RXlwbGRYb1R4dzJkUT09 -Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022 -Connector_AiAnthropic_TEMPERATURE = 0.2 -Connector_AiAnthropic_MAX_TOKENS = 2000 - -# Perplexity AI configuration -Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions -Connector_AiPerplexity_API_SECRET = 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 -Connector_AiPerplexity_MODEL_NAME = sonar -Connector_AiPerplexity_TEMPERATURE = 0.2 -Connector_AiPerplexity_MAX_TOKENS = 2000 - -# Agent Mail configuration -Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNzB2M3ZjaE1SVE9ON2FKam9yVURxcHl1Ym5VNVUtS0MyWUpNVXVlaWpWS2U3VVd3em9vQl9lcnVYay03bS04YjNBbDZZNTB4eUtjT3ppQjJjY3dOT0FNLW9LeDhIUU5iaTNqNURUWE5La3kzaHNGcU9yNVI0YjhWZTZRRFktcTk= -Service_MSFT_TENANT_ID = common - -# Google Service configuration -Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo= - -# Tavily Web Search configuration -Connector_WebTavily_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkdkJMTDY0akhXNzZDWHVYSEt1cDZoOWEzSktneHZEV2JndTNmWlNSMV9KbFNIZmQzeVlrNE5qUEIwcUlBSGM1a0hOZ3J6djIyOVhnZzI3M1dIUkdicl9FVXF3RGktMmlEYmhnaHJfWTdGUkktSXVUSGdQMC1vSEV6VE8zR2F1SVk= - -# Google Cloud Speech Services configuration -Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2MXF0ZGJoWHBydF85bTczTktiaEJ3Wk1vMW1mZVhDSG1yd0ZxR2ZuSGJTX0N3MWptWXFJTkNTWjh1SUVVTXI4UDVzcGdLMkU5SHJ2TUpkRlRoRWdnSldtYjNTQkh4UDJHY2xmdTdZQ1ZiMTZZcGZxS3RzaHdjV3dtVkZUcEpJcWx0b2xuQVR6ZmpoVFZPY1hNMTV2SnhDaC1IZEh4UUpLTy1ILXA4RG1zamJTbUJ4X0t2M2NkdzJPbEJxSmFpRzV3WC0wZThoVzlxcmpHZ3ZkLVlVY3REZk1vV19WQ05BOWN6cnJ4MWNYYnNiQ0FQSUVnUlpfM3BhMnlsVlZUOG5wM3pzM1lSN1UzWlZKUXRLczlHbjI1LTFvSUJ4SlVXMy1BNk43bE5Hb0RfTTVlWk9oZnFIaVg0SW5pbm9EcXRTTzU1RFlYY3dTcnpKWWNyNjN5T1BGZ0FmX253cEFncmhvZVRuM05KYzhkOEhFMFJsc2NBSEwzZVZ1R0JMOGxsekVwUE55alZaRXFrdzNWWVNGWXNmbnhKeWhQSFo2VXBTUlRPeHdvdVdncEFuOWgydEtsSUFneUN6cGVaTnBSdjNCdVJseGJFdmlMc203UFhLVlYyTENkaGg2dVN6Z2xwT1ZmTmN5bVZGUkM3ZWcyVkt2ckFUVVd3WFFwYnJjNVRobEh2SkVJbXRwUUpEOFJKQ1NUc0Q4NHNqUFhPSDh5cTV6MEcwSDEwRUJCQ2JiTTJlOE5nd3pMMkJaQ1dVYjMwZVVWWnlETmp2dkZ3aXEtQ29WNkxZTFkzYUkxdTlQUU1OTnhWWU12YU9MVnJQa1d2ZjRtUlhneTNubEMxTmp1eUNPOThSMlB3Y1F0T2tCdFNsNFlKalZPV25yR2QycVBUb096RmZ1V0FTaGsxLV9FWDBmenBIOXpMdGpLcUc0TWRoY2hlMFhYTzlET1ZRekw0ZHNwUVBQdVJBX2h6Q2ZzWVZJWTNybTJiekp3WmhmWF9SUFBXQzlqUjctcVlHWWVMZWVQallzR0JGTVF0WmtnWlg1aTM1bFprNVExZXY5dnNvWF93UjhwbkJ3RzNXaVJ2d2RRU3JJVlBvaVh4eTlBRUtqWkJia3dJQVVBV2Nqdm9FUTRUVW1TaHp2ZUwxT0N2ZndxQ2Nka1RYWXF0LWxIWFE0dTFQcVhncFFPM0hFdUUtYlFnemx3WkF4bjA1aDFULUdrZlVZbEJtRGRCdjJyVkdJSXozd0I0dF9zbWhOeHFqRDA4T1NVaWR5cjBwSVgwbllPU294NjZGTnM1bFhIdGpNQUxFOENWd3FCbGpSRFRmRXotQnU0N2lCVEU5RGF6Qi10S2U2NGdadDlrRjZtVE5oZkw5ZWFjXzhCTmxXQzNFTFgxRXVYY3J3YkxnbnlBSm9PY3h4MlM1NVFQbVNDRW5Ld1dvNWMxSmdoTXJuaE1pT2VFeXYwWXBHZ29MZDVlN2lwUUNIeGNCVVdQVi1rRXdJMWFncUlPTXR0MmZVQ1l0d09mZTdzWGFBWUJMUFd3b0RSOU8zeER2UWpNdzAxS0ZJWnB5S3FJdU9wUDJnTTNwMWw3VFVqVXQ3ZGZnU1RkUktkc0NhUHJ0SGFxZ0lVWDEzYjNtU2JfMGNWM1Y0dHlCTzNESEdENC1jUWF5MVppRzR1QlBNSUJySjFfRi1ENHEwcmJ4S3hQUFpXVHA0TG9DZWdoUlo5WnNSM1lCZm1KbEs2ak1yUUU4Wk9JcVJGUkJwc0NvUkMyTjhoTWxtZmVQeDREZVRKZkhYN2duLVNTeGZzdFdBVnhEandJSXB5QjM0azF0ckI3Tk1wSzFhNGVOUVRrNjU0cG9JQ29pN09xOFkwR1lMTlktaGp4TktxdTVtTnNEcldsV2pEZm5nQWpJc2hxY0hjQnVSWUR5VVdaUXBHWUloTzFZUC1oNzJ4UjZ1dnpLcDJxWEZtQlNIMWkzZ0hXWXdKeC1iLXdZWVJhcU04VFlpMU5pd2ZIdTdCdkVWVFVBdmJuRk16bEFFQTh4alBrcTV2RzliT2hGdTVPOXlRMjFuZktiRTZIamQ1VFVqS0hRTXhxcU1mdkgyQ1NjQmZfcjl4c3NJd0RIeDVMZUFBbHJqdEJxWWl3aWdGUEQxR3ZnMkNGdVB4RUxkZi1xOVlFQXh1NjRfbkFEaEJ5TVZlUGFrWVhSTVRPeGxqNlJDTHNsRWRrei1pYjhnUmZrb3BvWkQ2QXBzYjFHNXZoWU1LSExhLWtlYlJTZlJmYUM5Y1Rhb1pkMVYyWTByM3NTS0VXMG1ybm1BTVN2QXRYaXZqX2dKSkZrajZSS2cyVlNOQnd5Y29zMlVyaWlNbTJEb3FuUFFtbWNTNVpZTktUenFZSl91cVFXZjRkQUZyYmtPczU2S1RKQ19ONGFOTHlwX2hOOEE1UHZEVjhnT0xxRjMxTEE4SHhRbmlmTkZwVXJBdlJDbU5oZS05SzI4QVhEWDZaN2ZiSlFwUGRXSnB5TE9MZV9ia3pYcmZVa1dicG5FMHRXUFZXMWJQVDAwOEdDQzJmZEl0ZDhUOEFpZXZWWXl5Q2xwSmFienNCMldlb2NKb2ZRYV9KbUdHRzNUcjU1VUFhMzk1a2J6dDVuNTl6NTdpM0hGa3k0UWVtbF9pdDVsQVp2cndDLUU5dnNYOF9CLS0ySXhBSFdCSnpqV010bllBb3U0cEZZYVF5R2tSNFM5NlRhdS1fb1NqbDBKMkw0V2N0VEZhNExtQlR3ckZ3cVlCeHVXdXJ6X0s4cEtsaG5rVUxCN2RRbHQxTmcyVFBqYUxyOHJzeFBXVUJaRHpXbUoxdHZzMFBzQk1UTUFvX1pGNFNMNDFvZWdTdEUtMUNKMXNIeVlvQk1CeEdpZVdmN0tsSDVZZHJXSGt5c2o2MHdwSTZIMVBhRzM1eU43Q2FtcVNidExxczNJeUx5U2RuUG5EeHpCTlg2SV9WNk1ET3BRNXFuc0pNWlVvZUYtY21oRGtJSmwxQ09QbHBUV3BuS3B5NE9RVkhfellqZjJUQ0diSV94QlhQWmdaaC1TRWxsMUVWSXB0aE1McFZDZDNwQUVKZ2t5cXRTXzlRZVJwN0pZSnJSV21XMlh0TzFRVEl0c2I4QjBxOGRCYkNxek04a011X1lrb2poQ3h2LUhKTGJiUlhneHp5QWFBcE5nMElkNTVzM3JGOWtUQ19wNVBTaVVHUHFDNFJnNXJaWDNBSkMwbi1WbTdtSnFySkhNQl9ZQjZrR2xDcXhTRExhMmNHcGlyWjR3ZU9SSjRZd1l4ZjVPeHNiYk53SW5SYnZPTzNkd1lnZmFseV9tQ3BxM3lNYVBHT0J0elJnMTByZ3VHemxta0tVQzZZRllmQ2VLZ1ZCNDhUUTc3LWNCZXBMekFwWW1fQkQ1NktzNGFMYUdYTU0xbXprY1FONUNlUHNMY3h2NFJMMmhNa3VNdzF4TVFWQk9odnJUMjFJMVd3Z2N6Sms5aEM2SWlWZFViZ0JWTEpUWWM5NmIzOS1oQmRqdkt1NUUycFlVcUxERUZGbnZqTUxIYnJmMDBHZDEzbnJsWEEzSUo3UmNPUDg1dnRUU1FzcWtjTWZwUG9zM0JTY3RqMDdST2UxcXFTM0d0bGkwdFhnMk5LaUlxNWx3V1pLaVlLUFJXZzBzVl9Ia1V1OHdYUEFWOU50UndycGtCdzM0Q0NQamp2VTNqbFBLaGhsbUk5dUI5MjU5OHVySk1oY0drUWtXUloyVVRvOWJmbUVYRzFVeWNQczh2NXJCeVppRlZiWDNJaDhOSmRmX2lURTNVS3NXQXFZT1QtUmdvMWJoVWYxU3lqUUJhbzEyX3I3TXhwbm9wc1FoQ1ZUTlNBRjMyQTBTY2tzbHZ3RFUtTjVxQ0o1QXRTVks2WENwMGZCRGstNU1jN3FhUFJCQThyaFhhMVRsbnlSRXNGRmt3Yk01X21ldmV3bTItWm1JaGpZQWZROEFtT1d1UUtPQlhYVVFqT2NxLUxQenJHX3JfMEdscDRiMXcyZ1ZmU3NFMzVoelZJaDlvT0ZoRGQ2bmtlM0M5ZHlCd2ZMbnRZRkZUWHVBUEx4czNfTmtMckh5eXZrZFBzOEItOGRYOEhsMzBhZ0xlOWFjZzgteVBsdnpPT1pYdUxnbFNXYnhKaVB6QUxVdUJCOFpvU2x2c1FHZV94MDBOVWJhYkxISkswc0U5UmdPWFJLXzZNYklHTjN1QzRKaldKdEVHb0pOU284N3c2LXZGMGVleEZ5NGZ6OGV1dm1tM0J0aTQ3VFlNOEJrdEh3PT0= - -# Feature SyncDelta JIRA configuration -Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkTUNsWm4wX0p6eXFDZmJ4dFdHNEs1MV9MUzdrb3RzeC1jVWVYZ0REWHRyZkFiaGZLcUQtTXFBZzZkNzRmQ0gxbEhGbUNlVVFfR1JEQTc0aldkZkgyWnBOcjdlUlZxR0tDTEdKRExULXAyUEtsVmNTMkRKU1BJNnFiM0hlMXo4YndMcHlRMExtZDQ3Zm9vNFhMcEZCcHpBPT0= diff --git a/env_int.20251013_140140.backup b/env_int.20251013_140140.backup deleted file mode 100644 index 28188ffb..00000000 --- a/env_int.20251013_140140.backup +++ /dev/null @@ -1,94 +0,0 @@ -# Integration Environment Configuration - -# System Configuration -APP_ENV_TYPE = int -APP_ENV_LABEL = Integration Instance -APP_API_URL = https://gateway-int.poweron-center.net -APP_KEY_SYSVAR = CONFIG_KEY -APP_INIT_PASS_ADMIN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjWm41MWZ4TUZGaVlrX3pWZWNwakJsY3Facm0wLVZDd1VKeTFoZEVZQnItcEdUUnVJS1NXeDBpM2xKbGRsYmxOSmRhc29PZjJSU2txQjdLbUVrTTE1NEJjUXBHbV9NOVJWZUR3QlJkQnJvTEU9 -APP_INIT_PASS_EVENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjdmtrakgxa0djekZVNGtTZV8wM2I5UUpCZllveVBMWXROYk5yS3BiV3JEelJSM09VYTRONHpnY3VtMGxDRk5JTEZSRFhtcDZ0RVRmZ1RicTFhb3c5dVZRQ1o4SmlkLVpPTW5MMTU2eTQ0Vkk9 - -# PostgreSQL Storage (new) -DB_APP_HOST=gateway-int-server.postgres.database.azure.com -DB_APP_DATABASE=poweron_app -DB_APP_USER=heeshkdlby -DB_APP_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjb2dka2pnN0tUbW1EU0w1Rk1jNERKQ0Z1U3JkVDhuZWZDM0g5M0kwVDE5VHdubkZna3gtZVAxTnl4MDdrR1c1ZXJ3ejJHYkZvcGUwbHJaajBGOWJob0EzRXVHc0JnZkJyNGhHZTZHOXBxd2c9 -DB_APP_PORT=5432 - -# PostgreSQL Storage (new) -DB_CHAT_HOST=gateway-int-server.postgres.database.azure.com -DB_CHAT_DATABASE=poweron_chat -DB_CHAT_USER=heeshkdlby -DB_CHAT_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjczYzOUtTa21MMGJVTUQ5UmFfdWc3YlhCbWZOeXFaNEE1QzdJV3BLVjhnalBkLVVCMm5BZzdxdlFXQXc2RHYzLWtPSFZkZE1iWG9rQ1NkVWlpRnF5TURVbnl1cm9iYXlSMGYxd1BGYVc0VDA9 -DB_CHAT_PORT=5432 - -# PostgreSQL Storage (new) -DB_MANAGEMENT_HOST=gateway-int-server.postgres.database.azure.com -DB_MANAGEMENT_DATABASE=poweron_management -DB_MANAGEMENT_USER=heeshkdlby -DB_MANAGEMENT_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjTnJKNlJMNmEwQ0Y5dVNrR3pkZk9SQXVvLTRTNW9lQ1g3TTE5cFhBNTd5UENqWW9qdWd3NWNseWhnUHJveDJyd1Z3X1czS3VuZnAwZHBXYVNQWlZsRy12ME42NndEVlR5X3ZPdFBNNmhLYm89 -DB_MANAGEMENT_PORT=5432 - -# Security Configuration -APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZzTEp4aDR0MktWRjNoeVBrY1Npd1R0VE9YVHp3M2w1cXRzbUxNaU82QUJvaDNFeVQyN05KblRWblBvbWtoT0VXbkNBbDQ5OHhwSUFnaDZGRG10Vmgtdm1YUkRsYUhFMzRVZURmSFlDTFIzVWg4MXNueDZyMGc5aVpFdWRxY3dkTExGM093ZTVUZVl5LUhGWnlRPQ== -APP_TOKEN_EXPIRY=300 - -# CORS Configuration -APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,https://playground-int.poweron-center.net,http://localhost:5176,https://nyla.poweron-center.net, https://nyla-int.poweron-center.net - -# Logging configuration -APP_LOGGING_LOG_LEVEL = DEBUG -APP_LOGGING_LOG_DIR = /home/site/wwwroot/ -APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s -APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S -APP_LOGGING_CONSOLE_ENABLED = True -APP_LOGGING_FILE_ENABLED = True -APP_LOGGING_ROTATION_SIZE = 10485760 -APP_LOGGING_BACKUP_COUNT = 5 - -# Service Redirects -Service_MSFT_REDIRECT_URI = https://gateway-int.poweron-center.net/api/msft/auth/callback -Service_GOOGLE_REDIRECT_URI = https://gateway-int.poweron-center.net/api/google/auth/callback - -# OpenAI configuration -Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions -Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjSDBNYkptSkQxTUotYVVpZVNZc0dxNGNwSEtkOEE0T3RZWjROTEhSRlRXdlZmQUxxZ0w3Y0xOV2JNV19LNF9yTUZiU1pUNG15U2VDUDdSVlI4VlpnR3JXVFFtcXBaTEZiaUtSclVFd0lCZG1rWVhra1dfWTVQOTBEYUU0MjByYVNEMTFmeXNOcmpUT216MmJKdlVPeW5nPT0= -Connector_AiOpenai_MODEL_NAME = gpt-4o -Connector_AiOpenai_TEMPERATURE = 0.2 -Connector_AiOpenai_MAX_TOKENS = 2000 - -# Anthropic configuration -Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages -Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjT1ZlRWVJdVZMT3ljSFJDcFdxRFBRVkZhS204NnN5RDBlQ0tpenhTM0FFVktuWW9mWHNwRWx2dHB0eDBSZ0JFQnZKWlp6c01pVGREWHd1eGpERnU0Q2xhaks1clQ1ZXVsdnd2ZzhpNXNQS1BhY3FjSkdkVEhHalNaRGR4emhpakZncnpDQUVxOHVXQzVUWmtQc0FsYmFwTF9TSG5FOUFtWk5Ick1NcHFvY2s1T1c2WXlRUFFJZnh6TWhuaVpMYmppcDR0QUx0a0R6RXlwbGRYb1R4dzJkUT09 -Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022 -Connector_AiAnthropic_TEMPERATURE = 0.2 -Connector_AiAnthropic_MAX_TOKENS = 2000 - -# Perplexity AI configuration -Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions -Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQm82Mzk2UWZJdUFhSW8yc3RKc0tKRXphd0xWMkZOVlFpSGZ4SGhFWnk0cTF5VjlKQVZjdS1QSWdkS0pUSWw4OFU5MjUxdTVQel9aeWVIZTZ5TXRuVmFkZG0zWEdTOGdHMHpsTzI0TGlWYURKU1Q0VVpKTlhxUk5FTmN6SUJScDZ3ZldIaUJZcWpaQVRiSEpyQm9tRTNDWk9KTnZBPT0= -Connector_AiPerplexity_MODEL_NAME = sonar -Connector_AiPerplexity_TEMPERATURE = 0.2 -Connector_AiPerplexity_MAX_TOKENS = 2000 - -# Agent Mail configuration -Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_CLIENT_SECRET = KDH8Q~H2OCtdvYy5yx6HOCYEbdnJCq90G21vTcPw -Service_MSFT_TENANT_ID = common - -# Google Service configuration -Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo= - -# Tavily Web Search configuration -Connector_WebTavily_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkdkJMTDY0akhXNzZDWHVYSEt1cDZoOWEzSktneHZEV2JndTNmWlNSMV9KbFNIZmQzeVlrNE5qUEIwcUlBSGM1a0hOZ3J6djIyOVhnZzI3M1dIUkdicl9FVXF3RGktMmlEYmhnaHJfWTdGUkktSXVUSGdQMC1vSEV6VE8zR2F1SVk= - -# Google Cloud Speech Services configuration -Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2MXF0ZGJoWHBydF85bTczTktiaEJ3Wk1vMW1mZVhDSG1yd0ZxR2ZuSGJTX0N3MWptWXFJTkNTWjh1SUVVTXI4UDVzcGdLMkU5SHJ2TUpkRlRoRWdnSldtYjNTQkh4UDJHY2xmdTdZQ1ZiMTZZcGZxS3RzaHdjV3dtVkZUcEpJcWx0b2xuQVR6ZmpoVFZPY1hNMTV2SnhDaC1IZEh4UUpLTy1ILXA4RG1zamJTbUJ4X0t2M2NkdzJPbEJxSmFpRzV3WC0wZThoVzlxcmpHZ3ZkLVlVY3REZk1vV19WQ05BOWN6cnJ4MWNYYnNiQ0FQSUVnUlpfM3BhMnlsVlZUOG5wM3pzM1lSN1UzWlZKUXRLczlHbjI1LTFvSUJ4SlVXMy1BNk43bE5Hb0RfTTVlWk9oZnFIaVg0SW5pbm9EcXRTTzU1RFlYY3dTcnpKWWNyNjN5T1BGZ0FmX253cEFncmhvZVRuM05KYzhkOEhFMFJsc2NBSEwzZVZ1R0JMOGxsekVwUE55alZaRXFrdzNWWVNGWXNmbnhKeWhQSFo2VXBTUlRPeHdvdVdncEFuOWgydEtsSUFneUN6cGVaTnBSdjNCdVJseGJFdmlMc203UFhLVlYyTENkaGg2dVN6Z2xwT1ZmTmN5bVZGUkM3ZWcyVkt2ckFUVVd3WFFwYnJjNVRobEh2SkVJbXRwUUpEOFJKQ1NUc0Q4NHNqUFhPSDh5cTV6MEcwSDEwRUJCQ2JiTTJlOE5nd3pMMkJaQ1dVYjMwZVVWWnlETmp2dkZ3aXEtQ29WNkxZTFkzYUkxdTlQUU1OTnhWWU12YU9MVnJQa1d2ZjRtUlhneTNubEMxTmp1eUNPOThSMlB3Y1F0T2tCdFNsNFlKalZPV25yR2QycVBUb096RmZ1V0FTaGsxLV9FWDBmenBIOXpMdGpLcUc0TWRoY2hlMFhYTzlET1ZRekw0ZHNwUVBQdVJBX2h6Q2ZzWVZJWTNybTJiekp3WmhmWF9SUFBXQzlqUjctcVlHWWVMZWVQallzR0JGTVF0WmtnWlg1aTM1bFprNVExZXY5dnNvWF93UjhwbkJ3RzNXaVJ2d2RRU3JJVlBvaVh4eTlBRUtqWkJia3dJQVVBV2Nqdm9FUTRUVW1TaHp2ZUwxT0N2ZndxQ2Nka1RYWXF0LWxIWFE0dTFQcVhncFFPM0hFdUUtYlFnemx3WkF4bjA1aDFULUdrZlVZbEJtRGRCdjJyVkdJSXozd0I0dF9zbWhOeHFqRDA4T1NVaWR5cjBwSVgwbllPU294NjZGTnM1bFhIdGpNQUxFOENWd3FCbGpSRFRmRXotQnU0N2lCVEU5RGF6Qi10S2U2NGdadDlrRjZtVE5oZkw5ZWFjXzhCTmxXQzNFTFgxRXVYY3J3YkxnbnlBSm9PY3h4MlM1NVFQbVNDRW5Ld1dvNWMxSmdoTXJuaE1pT2VFeXYwWXBHZ29MZDVlN2lwUUNIeGNCVVdQVi1rRXdJMWFncUlPTXR0MmZVQ1l0d09mZTdzWGFBWUJMUFd3b0RSOU8zeER2UWpNdzAxS0ZJWnB5S3FJdU9wUDJnTTNwMWw3VFVqVXQ3ZGZnU1RkUktkc0NhUHJ0SGFxZ0lVWDEzYjNtU2JfMGNWM1Y0dHlCTzNESEdENC1jUWF5MVppRzR1QlBNSUJySjFfRi1ENHEwcmJ4S3hQUFpXVHA0TG9DZWdoUlo5WnNSM1lCZm1KbEs2ak1yUUU4Wk9JcVJGUkJwc0NvUkMyTjhoTWxtZmVQeDREZVRKZkhYN2duLVNTeGZzdFdBVnhEandJSXB5QjM0azF0ckI3Tk1wSzFhNGVOUVRrNjU0cG9JQ29pN09xOFkwR1lMTlktaGp4TktxdTVtTnNEcldsV2pEZm5nQWpJc2hxY0hjQnVSWUR5VVdaUXBHWUloTzFZUC1oNzJ4UjZ1dnpLcDJxWEZtQlNIMWkzZ0hXWXdKeC1iLXdZWVJhcU04VFlpMU5pd2ZIdTdCdkVWVFVBdmJuRk16bEFFQTh4alBrcTV2RzliT2hGdTVPOXlRMjFuZktiRTZIamQ1VFVqS0hRTXhxcU1mdkgyQ1NjQmZfcjl4c3NJd0RIeDVMZUFBbHJqdEJxWWl3aWdGUEQxR3ZnMkNGdVB4RUxkZi1xOVlFQXh1NjRfbkFEaEJ5TVZlUGFrWVhSTVRPeGxqNlJDTHNsRWRrei1pYjhnUmZrb3BvWkQ2QXBzYjFHNXZoWU1LSExhLWtlYlJTZlJmYUM5Y1Rhb1pkMVYyWTByM3NTS0VXMG1ybm1BTVN2QXRYaXZqX2dKSkZrajZSS2cyVlNOQnd5Y29zMlVyaWlNbTJEb3FuUFFtbWNTNVpZTktUenFZSl91cVFXZjRkQUZyYmtPczU2S1RKQ19ONGFOTHlwX2hOOEE1UHZEVjhnT0xxRjMxTEE4SHhRbmlmTkZwVXJBdlJDbU5oZS05SzI4QVhEWDZaN2ZiSlFwUGRXSnB5TE9MZV9ia3pYcmZVa1dicG5FMHRXUFZXMWJQVDAwOEdDQzJmZEl0ZDhUOEFpZXZWWXl5Q2xwSmFienNCMldlb2NKb2ZRYV9KbUdHRzNUcjU1VUFhMzk1a2J6dDVuNTl6NTdpM0hGa3k0UWVtbF9pdDVsQVp2cndDLUU5dnNYOF9CLS0ySXhBSFdCSnpqV010bllBb3U0cEZZYVF5R2tSNFM5NlRhdS1fb1NqbDBKMkw0V2N0VEZhNExtQlR3ckZ3cVlCeHVXdXJ6X0s4cEtsaG5rVUxCN2RRbHQxTmcyVFBqYUxyOHJzeFBXVUJaRHpXbUoxdHZzMFBzQk1UTUFvX1pGNFNMNDFvZWdTdEUtMUNKMXNIeVlvQk1CeEdpZVdmN0tsSDVZZHJXSGt5c2o2MHdwSTZIMVBhRzM1eU43Q2FtcVNidExxczNJeUx5U2RuUG5EeHpCTlg2SV9WNk1ET3BRNXFuc0pNWlVvZUYtY21oRGtJSmwxQ09QbHBUV3BuS3B5NE9RVkhfellqZjJUQ0diSV94QlhQWmdaaC1TRWxsMUVWSXB0aE1McFZDZDNwQUVKZ2t5cXRTXzlRZVJwN0pZSnJSV21XMlh0TzFRVEl0c2I4QjBxOGRCYkNxek04a011X1lrb2poQ3h2LUhKTGJiUlhneHp5QWFBcE5nMElkNTVzM3JGOWtUQ19wNVBTaVVHUHFDNFJnNXJaWDNBSkMwbi1WbTdtSnFySkhNQl9ZQjZrR2xDcXhTRExhMmNHcGlyWjR3ZU9SSjRZd1l4ZjVPeHNiYk53SW5SYnZPTzNkd1lnZmFseV9tQ3BxM3lNYVBHT0J0elJnMTByZ3VHemxta0tVQzZZRllmQ2VLZ1ZCNDhUUTc3LWNCZXBMekFwWW1fQkQ1NktzNGFMYUdYTU0xbXprY1FONUNlUHNMY3h2NFJMMmhNa3VNdzF4TVFWQk9odnJUMjFJMVd3Z2N6Sms5aEM2SWlWZFViZ0JWTEpUWWM5NmIzOS1oQmRqdkt1NUUycFlVcUxERUZGbnZqTUxIYnJmMDBHZDEzbnJsWEEzSUo3UmNPUDg1dnRUU1FzcWtjTWZwUG9zM0JTY3RqMDdST2UxcXFTM0d0bGkwdFhnMk5LaUlxNWx3V1pLaVlLUFJXZzBzVl9Ia1V1OHdYUEFWOU50UndycGtCdzM0Q0NQamp2VTNqbFBLaGhsbUk5dUI5MjU5OHVySk1oY0drUWtXUloyVVRvOWJmbUVYRzFVeWNQczh2NXJCeVppRlZiWDNJaDhOSmRmX2lURTNVS3NXQXFZT1QtUmdvMWJoVWYxU3lqUUJhbzEyX3I3TXhwbm9wc1FoQ1ZUTlNBRjMyQTBTY2tzbHZ3RFUtTjVxQ0o1QXRTVks2WENwMGZCRGstNU1jN3FhUFJCQThyaFhhMVRsbnlSRXNGRmt3Yk01X21ldmV3bTItWm1JaGpZQWZROEFtT1d1UUtPQlhYVVFqT2NxLUxQenJHX3JfMEdscDRiMXcyZ1ZmU3NFMzVoelZJaDlvT0ZoRGQ2bmtlM0M5ZHlCd2ZMbnRZRkZUWHVBUEx4czNfTmtMckh5eXZrZFBzOEItOGRYOEhsMzBhZ0xlOWFjZzgteVBsdnpPT1pYdUxnbFNXYnhKaVB6QUxVdUJCOFpvU2x2c1FHZV94MDBOVWJhYkxISkswc0U5UmdPWFJLXzZNYklHTjN1QzRKaldKdEVHb0pOU284N3c2LXZGMGVleEZ5NGZ6OGV1dm1tM0J0aTQ3VFlNOEJrdEh3PT0= - -# Feature SyncDelta JIRA configuration -Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkTUNsWm4wX0p6eXFDZmJ4dFdHNEs1MV9MUzdrb3RzeC1jVWVYZ0REWHRyZkFiaGZLcUQtTXFBZzZkNzRmQ0gxbEhGbUNlVVFfR1JEQTc0aldkZkgyWnBOcjdlUlZxR0tDTEdKRExULXAyUEtsVmNTMkRKU1BJNnFiM0hlMXo4YndMcHlRMExtZDQ3Zm9vNFhMcEZCcHpBPT0= - -# Debug Configuration -APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE -APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat \ No newline at end of file diff --git a/env_prod.20251012_121418.backup b/env_prod.20251012_121418.backup deleted file mode 100644 index c1ba8086..00000000 --- a/env_prod.20251012_121418.backup +++ /dev/null @@ -1,90 +0,0 @@ -# Production Environment Configuration - -# System Configuration -APP_ENV_TYPE = prod -APP_ENV_LABEL = Production Instance -APP_API_URL = https://gateway.poweron-center.net -APP_KEY_SYSVAR = CONFIG_KEY -APP_INIT_PASS_ADMIN_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pSXoyVEVwNDZ6cmthQTROUkxGUjh1UWF2UU5zaWRuX3p2aHJCVFo2NEstR0RqdnQ5clZmeVliRlhHZGFHTlhZV2dzMmRPZFVEemVlSHd5VHR3cmpNUXRaRlhZSFZ6d1dsX2Y5Zl9lOXdYdEU9 -APP_INIT_PASS_EVENT_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5peGNMWExjWGZxQ2VndXVOSUVGcWhQTWd0N3d0blU3bGJvNjgzNVVNNktCQnZlTEtVckV5RUtQMjMwRTBkdmxEMlZwX0k1M1hlOFFNY3hjaWsyd2JmRGl2UWxfSXEwenVnQ3NmaTlxckp2VXM9 - -# PostgreSQL Storage (new) -DB_APP_HOST=gateway-prod-server.postgres.database.azure.com -DB_APP_DATABASE=poweron_app -DB_APP_USER=gzxxmcrdhn -DB_APP_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pVmtwYWZQakdWZnJPamVlRWJPa0tnc3daSVVHejVrQ0x1VFZZbHhVSkk0S2tFWl92T2NwWURBMU9UbFROMHZ2TkNKZFlEWjhJZDZ0bnFndC1oYjhNRW1VLWpEYnlDNEJwcGVKckpUVlp6YTg9 -DB_APP_PORT=5432 - -# PostgreSQL Storage (new) -DB_CHAT_HOST=gateway-prod-server.postgres.database.azure.com -DB_CHAT_DATABASE=poweron_chat -DB_CHAT_USER=gzxxmcrdhn -DB_CHAT_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pZVZnTzBPTDY1Q3c2U1pDV0lxbXhoWnlYSXRDWVhIeGJwSkdNMzMxR2h5a1FRN00xcWtYUE4ySGpqRllSaGM5SmRZZk9Bd2trVDJNZDdWcEFIbTJtel91MHpsazlTQnRsV2docGdBc0RVeEU9 -DB_CHAT_PORT=5432 - -# PostgreSQL Storage (new) -DB_MANAGEMENT_HOST=gateway-prod-server.postgres.database.azure.com -DB_MANAGEMENT_DATABASE=poweron_management -DB_MANAGEMENT_USER=gzxxmcrdhn -DB_MANAGEMENT_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pQXdaRnVEQUx2MmU5ck9XZzNfaGVoRXlYMlVjSVM5dWNTekhmR2VYNkd6WVhELUlkLWdFWWRWQ1JJLWZ4WUNwclZVRlg3ZHBCS0xwM1laNklTaEs1czFDRTMxYlV2TWNueEJlTHFyNEt4aVk9 -DB_MANAGEMENT_PORT=5432 - -# Security Configuration -APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pY3JfX1R3cEJhTjAzZGx2amtRSE4yVzZhMmY3a3FHam9BdzBxVWd5R0FRSW1KbmNGS3JDMktKTWptZm4wYmZZZTVDQkh3NVlxSW1MZEdiVWdORng4dm0xV08wZDh0YlBNQTdEbmlnVWduMzNWY1RPX1BqaGtnOTc2ZWNBTnNnd1AtaTNRUExpRThVdzNmdVFHM2hkTjFjcW0ya2szMWNaT3VDeDhXMlJ1NDM4PQ== -APP_TOKEN_EXPIRY=300 - -# CORS Configuration -APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,https://playground-int.poweron-center.net,http://localhost:5176,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net - -# Logging configuration -APP_LOGGING_LOG_LEVEL = DEBUG -APP_LOGGING_LOG_DIR = /home/site/wwwroot/ -APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s -APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S -APP_LOGGING_CONSOLE_ENABLED = True -APP_LOGGING_FILE_ENABLED = True -APP_LOGGING_ROTATION_SIZE = 10485760 -APP_LOGGING_BACKUP_COUNT = 5 - -# Service Redirects -Service_MSFT_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/msft/auth/callback -Service_GOOGLE_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/google/auth/callback - -# OpenAI configuration -Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions -Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pU05XM2hMaExPMnpYeFpwRVhyYl9JZmRITmlmRDlWOUJSSWE4NTFLZUptSkJhNlEycHBLZmh3WFA2ZmU5VmxHZks1UUNVOUZnckZNdXZ2MTY2dFg1Nl8yWDRrcTRlT0tHYkhyRGZINTEzU25iYVFRMzJGeUZIdlc4LU9GbmpQYmtmU3lJT2VVZ1UzLVd3R25ZQ092SUVnPT0= -Connector_AiOpenai_MODEL_NAME = gpt-4o -Connector_AiOpenai_TEMPERATURE = 0.2 -Connector_AiOpenai_MAX_TOKENS = 2000 - -# Anthropic configuration -Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages -Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pNTA1RkZ3UllCOXVsNVZzbkw2Rkl1TWxCZ0wwWEVXUm9ReUhBcVl1cGFUdW9FRVh4elVxR0x3NVRxZkc4SkxHVFdzSU1YNG5Rb0FqSHJhdElwWm1iLWdubTVDcUl3UkVjVHNoU0xLa0ZTSFlfTlJUVXg4cVVwUWdlVDBTSFU5SnBzS0ZnVjlQcmtiNzV2UTNMck1IakZ0OWlubUtlWDZnMk4yX2JsZ1U4Wm1yT29fM2d2NVBNOWNBbWtTRWNyQ2tZNjhwSVF6bG5SU3dTenR2MzA3Z19NUT09 -Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022 -Connector_AiAnthropic_TEMPERATURE = 0.2 -Connector_AiAnthropic_MAX_TOKENS = 2000 - -# Perplexity AI configuration -Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions -Connector_AiPerplexity_API_SECRET = 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 -Connector_AiPerplexity_MODEL_NAME = sonar -Connector_AiPerplexity_TEMPERATURE = 0.2 -Connector_AiPerplexity_MAX_TOKENS = 2000 - -# Agent Mail configuration -Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pVEhHdlZHU3FNMmhuRGVwaGc3YzIxSjlZNzBCQjlOV2pSYVNXb0t1ZnVwQzZsQzY4cHMtVlZtNF85OEVaV1BMTzdXMmpzaGZpaG1DalJ0bkNPMHA5ZUcwZjNDdGk1TFdxYTJSZnVrVmhhZ2VRUEZxbjJOOGFhWk9EYlY3dmRVTnI= -Service_MSFT_TENANT_ID = common - -# Google Service configuration -Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pV2JEV0lNUXhwa1VTUGh2RWcyYnJHSFQyTmdBOEhwRkJWc3MwOFZlcHJGUmlGOVVFbG1XalNyUXVuaExESy1xeFNIQlRiSFVIWTB6Rm1fNFg0OHZZSkF4ZlBIcFZDMjZHcFRERXJ0WlVFclhHa29Za1BqWGxsM05NZGFRc1BLZnE= - -# Tavily Web Search configuration -Connector_WebTavily_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pMjhJNS1CZFJubUlkN3ZrTUoxR0Y1QzJFWEJSMk0wQkI0UndqOW1UelVieWhGaTVBcHoxRXo1VjRzVVRROHFIeHMyS3Q5cDZCeUlEMzE1ZlhVTmNveFk5VmFQMm80NTRyVW1TZHVsR3dUN0RtMnd4LW1VWlpqOXJPeXZBTmg4OEM= - -# Google Cloud Speech Services configuration -Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pNjlJdmFMeERXUUQzR0duRUY4cGRZRzdwQlpnVFAzSzQ5cHZNRnVUZ0xWd3dQMHR3QjVsdF92NmdUQlJGRk1RcG1RYWZzcE9RbEhjQmR5Yk5Ud3ZKTW5jbmpEVGJ2ZkxVeVJpcUxaT2lNREFXaks5WHg5aVlHcXlUZldMdnZGYklHWjlJOWJ6Wm5RSkNmdm5feENjS1E0QUVXTTE5SW5sNFBEeTJ1RjRmVm9SQUNIYmF2U1U2dklsbTVlWFpCcHMwTFF1SUg5NmNfcWhQRFlpeWt0U19HMXNuUHd2RFdrVl9XdUFaY0hWdVBPYWlybU1CdGlCN1A0RzZBbi1IUVJ1TWMxTE9Ea09sTURhcDFZb1JIUW1zUFJybW15MDcxOUtfVXA2N0xwMnFrczA1YTJaN05pRHhOYWNzMjVmUHdhbVdlemF3TEIzN0pJaVo3bGJBMXJnZmNYTXVJVDdmYkRXWTlBT2F2NmN4eTlteUI1SlJTOXc2WWFWUTBCZTJBVHRLVDhEVjBFeHE0Nmk1YkxYd3N3RXgtVUdGdlZFSmk4dHM0QjFmbktsQTctbmJMT0MtMDlKS1pUR0pELXBxckhULUUycjlBZmVJQjFrM0xEUm50U2ZabExtVjZ1WWZ1WnlobUZIOVlndjNydUZfczJUWVVRZURTd1lYazllaER4VU10cXUyVS1ZNG9Ha2hnbTAzOEpGMklFSWpWeVV5eFB2UlVWYmJJakZnOVM2R2lJSXRSM3VzVEZZNUVpNmVjRzdXRUJsT2hzcjhZWERFeGV5c1dFQVM3dkhGY2Q3ckNBRDZCcVdhZnZkdzM3QVNpODZYWE81TEIyZGUycldkSVRvbm5hR3Jib2UzOEtXdUpHQ2FyWDQtMDdQbC1ycEdfUzdXd0U2dHFIVjhoRDJ0YkNsWUpva1dzOGNPdXRpZjVwUldtT3FVN3RrZUhTN3JfX1M3LU9PaXZELWkzRmtMbjgxZGZ6ZjVJNW9RZW1nM2hqUXo4Z2I5Z2tSVTVMdUNLblRxOGQ1Y3F4SGZIbWo4YkFBV3FIbjB6LUxGNHdsQWgxQUM4bzVrblBObFFfVWNaQ3QwejQ1eGFlSXVIcXlyVEZEdzVKNV9pd2o4RW1UVjlqb3VMWnF0V1JTcWF1R0RjdUNjM2lLUHRqZDl2WWtXUnhmbVdxeHA3REFHTkdkMjM4LTllajBWQnd3RHlFSVdiUThfQnduOVFJdmR6OUVGN1lOYjBqclhadHozX21kRzlUT2EtWVBkYWFRSjRGdW80dmlEUTVrVjhWbjJYNGtCeGNtNzRHQXJsRlZyWjBYdHltVDM2MV9IT0RFT2dLLTVBREtsS09HdUxrODRLcEQ1TmRoVDh6WmgybGc5MzgtbmJSYThQd3FFaUcxbmg3eE95RkJVX2hHM20wT1k2c21qd24wSkFWNGROaklQeHZrc21PdTVsdHVxR0pxd3Ztb1NQVHEtd25URHRNa1pqa3BLdVdkTnNFeDNManJST0dOb1RWM2hqekxFTlFSZkd6TlZBY1VQT1NFOVlDQzlPQWVlVXQ4MW0wdGkzd0Myam1lSWE2aEtVVTVNc3N3dENpa1BWRl9ZQ3daYllONWRmRUF0THpleFRmdWRqTFM2aldmLUFuZzFGdkFQNHR6d21SdzRGQ0Q4cU8yV0xGUTVUY01TZlYxSzZ4cmtfUGZvVDhmYmNBX1pibTVTcl9lenJoME9KSnBucUxPRU1PRXBmLWFENEgwRWZOU0RvRDlvQk9ueVp0dXJrUVgtQUk5VldVbV9MS19PYmlua3liWl80Z2hMcFRnTXBkZDA3enIxRWFzaU56TEZKa0hPQUtNY0dCY1pnQ2V3Zml6ZFczWFBESUlLd3BSVEs5ZXlGLUpINDRsd1NBVjBkR1dvbE8wLWZBeEhFQ0hvY3E5UGJsTDdteGdSRjBIZTRobXpsd29PMmhKQkxXY3Znd2FMdWtZU1VkQlVRZXlSZ3FaVnNqcXpwR3N3SktOTDA3aUZIcE9TR1VDcXdaTDhQX2E5VDlwckoyX0xlNmFQcnoydEkwc0s1S08yaVlsM0pwYktUVWl3LU5hQzF2UVZNSm9ZR3QyQWdrUXB2a25QNzhkVEFOYmZ0b1BmTXRCMmVQZTAtYzdOeUlBYlNINlZNZW1nUTFfSV92UlJiWGt6Qms1c1hBc3kzZkVRMzEwNVJDOS1JeVg4YWtVeUJyOTZPQ0FnSUs1Z25sMlY0S1V1c0dIWEpuX2pMQmZ4Z29SY1U0bVZscXNWcjJwRy1UZEFYSXBzQURGblRTelBybU5BeDF6N3hZLXZwSHBkMmlzbHZWN2JkU3hRcE0zQ0hna3QwYWlJX3hBdGcxUHdGRE55cndUNHRvbXU5VTRMRmZDRjhvXzIwajI1Y0RCcmR2OV94cS1XYkNwalNHS2lObHlkNGZBbklycnZMSlJYVnlfakRXb1ZfWUo2MGxzYUNIektYeENGTkUzMUJXRE9WRHRrY2o5UFJHckZza2RQbjNPUkstbG9GZG4yNmxKeEdtbHo4WDZFc0lvT01wZkxuN29ycXl3X1hTN1prRGdvWG9hRFYwNzBwVVpuMW0wQlZYbGZxZjFQUHp2XzBQT3Fqa3lzejVKZmJDMG0wRzhqWV9HY1dxaXB2VFNQUzV2LUJSOXRFRUllak83cUI3RGUtYVBJakF1YUVOV0otT1BxUHJqS0NLdFVHc0tsT2RGcWd6UTU4Yi1kc0JZS1VPT1NXSlc3TDM5ZDVEZlRDOURZU1hMT0YxZ25ndVBUaG1VcGsxWFZSS1RxT1ZZTU1vclZjVU5iYmZMd0VBTXlvdTE0YjdoclZ6ZnNKMmE2Yy1ORmNCMnJNX3dwcVJSN2RSd2d6aENLRXQyTjhkcDlLTFVZMHBydFowNTJoZm1mVHNRVHI1YjhTNnl1Vll4dFZhenZfa0dybk9KYVh6LUluSUo0djUzRFNEdzBoVGt5UU9tMlg5UnBLbk9WaEhoU2txY2tUSXJmemlmNEExb3Q1blI5bE9adHluWVI3NXZQNUtXdmpra05aNy15dTBXdlVqcXhteFVqSXFxNnlQR2FGeVNONkx3NVpQUk1FNk5yTUY4T1hQV1FCdm9PYzdFTGl4QXZkODltSlprbGJ6cWREcEM1VlNwN3V5aWdWYXNkekk4X3U0cjJjZ1k2X190cmNnMlpMQVlLdExxM3pFNkZudVFKci1CalE1U3kzdmotQ01LV0ZzWnp0VUxRblhkdlN6VG1MWHNQdGlrNmF4RnFtd0c3UXNqZFVRZTRFMGl1NFU5T2k3VEpjZXA1U052VkJtdUhDWEpTaDRGQnM0SDQwY2IxdDVNbUtELTQ0R0s0OHpfTHdFOHZ0VmRMTC1FUVpPSkJ4QXRWNnl5MURUdjVyUk53emRwbDBxUnloUmlheXhKY3RBUG1mX3JxM2w0VlZvcE40b2ROeG15NS01RFlvUHdoYllLNVhCZUNEd0dwQnFCLVdZU0RhVEFzR2gxTVpub3FGRnl4VDNiSVZrTnpMQUlxeGJGQzh5WlNZR2NKbklHRVRTaVJ2REduN0hXaGo5MHFGb1FOa0U5TUFwQ09zOXVWMnRRNVlJWmZpaTUxLWFIeWR0UEFtaVNDX1k5Q1p3Y2V4ckVXQVBRYzV1eGwwMWd0SE15WUxiYzUyLTUzTGlyTUhZUDFlRTFjcFpieWQwU0pxRWJXSE53Nkd5aHp5T28wZVd6Z1phLTQ4TmgxU3hvNHpySzExUk5WZlFFS3VpOXNHMDdZU0gzSGxYUlU4WmgwNUlPdlhQcUI0cGtITmQ4SlByczN0THUxNHc0a21vUEp6S1hLNnFRNmFfdlpmUWpJQ1VNYXVEOW1abzlsd2RoRG5pVXRVbjBKV2RFTGFEa3ZYTHByOTJjalc1b3hTWkFmS2RPdVlTUTVkRkpSTnZsMWtnYWZEUm1SR3lBemdON2xiN3pkZlNfX2NSYU5wWHNybHh4V0lnNHJjQ2NON1hiRHMycUdmNC1kay13bUE0OTBPN0xmNDA1NlQxVmRySEJvM1VUN2Y2Sl9KX2pZVHRPWEdfR2RYNUoxY01Va3pXb2VBd3lZb3BSXzU5NVJfWlhEYXFSVDJrUnFHWG42RVZJUVQ2RlJWUEkyQnRnREI3eHNiRERiQ3FUczJsRTBDZ3pUUGZPcjExZUFKc21QUWxVYVBmV2hPZXRGd3lJX3ZTczhCVG1jWFVwanhIZHlyTTdiR2c5cTBVSXBRV1U4ZExtWWdub1pTSHU0cU5aYWJVWmExbXI0MjE3WUVnPT0= - -# Feature SyncDelta JIRA configuration -Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pTDhnTVNzRUhScU8wYnZsZk52bHFkSWxLc18xQmtCeC1HbnNwTzVBbXRNTmQzRjZYaGE2MVlCNGtnWDk1T2I5VXVKNHpKU1VRbXEyN2tRWUJnU2ltZE5qZ3lmNEF6Z1hMTTEwZkk2NUNBYjhmVTJEcWpRUW9HNEVpSGFWdjBWQXQ3eUtHUTFJS3U5QWpaeno0RFNhMUxnPT0= diff --git a/env_prod.20251013_140140.backup b/env_prod.20251013_140140.backup deleted file mode 100644 index a0583019..00000000 --- a/env_prod.20251013_140140.backup +++ /dev/null @@ -1,94 +0,0 @@ -# Production Environment Configuration - -# System Configuration -APP_ENV_TYPE = prod -APP_ENV_LABEL = Production Instance -APP_API_URL = https://gateway.poweron-center.net -APP_KEY_SYSVAR = CONFIG_KEY -APP_INIT_PASS_ADMIN_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pSXoyVEVwNDZ6cmthQTROUkxGUjh1UWF2UU5zaWRuX3p2aHJCVFo2NEstR0RqdnQ5clZmeVliRlhHZGFHTlhZV2dzMmRPZFVEemVlSHd5VHR3cmpNUXRaRlhZSFZ6d1dsX2Y5Zl9lOXdYdEU9 -APP_INIT_PASS_EVENT_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5peGNMWExjWGZxQ2VndXVOSUVGcWhQTWd0N3d0blU3bGJvNjgzNVVNNktCQnZlTEtVckV5RUtQMjMwRTBkdmxEMlZwX0k1M1hlOFFNY3hjaWsyd2JmRGl2UWxfSXEwenVnQ3NmaTlxckp2VXM9 - -# PostgreSQL Storage (new) -DB_APP_HOST=gateway-prod-server.postgres.database.azure.com -DB_APP_DATABASE=poweron_app -DB_APP_USER=gzxxmcrdhn -DB_APP_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pVmtwYWZQakdWZnJPamVlRWJPa0tnc3daSVVHejVrQ0x1VFZZbHhVSkk0S2tFWl92T2NwWURBMU9UbFROMHZ2TkNKZFlEWjhJZDZ0bnFndC1oYjhNRW1VLWpEYnlDNEJwcGVKckpUVlp6YTg9 -DB_APP_PORT=5432 - -# PostgreSQL Storage (new) -DB_CHAT_HOST=gateway-prod-server.postgres.database.azure.com -DB_CHAT_DATABASE=poweron_chat -DB_CHAT_USER=gzxxmcrdhn -DB_CHAT_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pZVZnTzBPTDY1Q3c2U1pDV0lxbXhoWnlYSXRDWVhIeGJwSkdNMzMxR2h5a1FRN00xcWtYUE4ySGpqRllSaGM5SmRZZk9Bd2trVDJNZDdWcEFIbTJtel91MHpsazlTQnRsV2docGdBc0RVeEU9 -DB_CHAT_PORT=5432 - -# PostgreSQL Storage (new) -DB_MANAGEMENT_HOST=gateway-prod-server.postgres.database.azure.com -DB_MANAGEMENT_DATABASE=poweron_management -DB_MANAGEMENT_USER=gzxxmcrdhn -DB_MANAGEMENT_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pQXdaRnVEQUx2MmU5ck9XZzNfaGVoRXlYMlVjSVM5dWNTekhmR2VYNkd6WVhELUlkLWdFWWRWQ1JJLWZ4WUNwclZVRlg3ZHBCS0xwM1laNklTaEs1czFDRTMxYlV2TWNueEJlTHFyNEt4aVk9 -DB_MANAGEMENT_PORT=5432 - -# Security Configuration -APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pY3JfX1R3cEJhTjAzZGx2amtRSE4yVzZhMmY3a3FHam9BdzBxVWd5R0FRSW1KbmNGS3JDMktKTWptZm4wYmZZZTVDQkh3NVlxSW1MZEdiVWdORng4dm0xV08wZDh0YlBNQTdEbmlnVWduMzNWY1RPX1BqaGtnOTc2ZWNBTnNnd1AtaTNRUExpRThVdzNmdVFHM2hkTjFjcW0ya2szMWNaT3VDeDhXMlJ1NDM4PQ== -APP_TOKEN_EXPIRY=300 - -# CORS Configuration -APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,https://playground-int.poweron-center.net,http://localhost:5176,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net - -# Logging configuration -APP_LOGGING_LOG_LEVEL = DEBUG -APP_LOGGING_LOG_DIR = /home/site/wwwroot/ -APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s -APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S -APP_LOGGING_CONSOLE_ENABLED = True -APP_LOGGING_FILE_ENABLED = True -APP_LOGGING_ROTATION_SIZE = 10485760 -APP_LOGGING_BACKUP_COUNT = 5 - -# Service Redirects -Service_MSFT_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/msft/auth/callback -Service_GOOGLE_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/google/auth/callback - -# OpenAI configuration -Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions -Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pU05XM2hMaExPMnpYeFpwRVhyYl9JZmRITmlmRDlWOUJSSWE4NTFLZUptSkJhNlEycHBLZmh3WFA2ZmU5VmxHZks1UUNVOUZnckZNdXZ2MTY2dFg1Nl8yWDRrcTRlT0tHYkhyRGZINTEzU25iYVFRMzJGeUZIdlc4LU9GbmpQYmtmU3lJT2VVZ1UzLVd3R25ZQ092SUVnPT0= -Connector_AiOpenai_MODEL_NAME = gpt-4o -Connector_AiOpenai_TEMPERATURE = 0.2 -Connector_AiOpenai_MAX_TOKENS = 2000 - -# Anthropic configuration -Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages -Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pNTA1RkZ3UllCOXVsNVZzbkw2Rkl1TWxCZ0wwWEVXUm9ReUhBcVl1cGFUdW9FRVh4elVxR0x3NVRxZkc4SkxHVFdzSU1YNG5Rb0FqSHJhdElwWm1iLWdubTVDcUl3UkVjVHNoU0xLa0ZTSFlfTlJUVXg4cVVwUWdlVDBTSFU5SnBzS0ZnVjlQcmtiNzV2UTNMck1IakZ0OWlubUtlWDZnMk4yX2JsZ1U4Wm1yT29fM2d2NVBNOWNBbWtTRWNyQ2tZNjhwSVF6bG5SU3dTenR2MzA3Z19NUT09 -Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022 -Connector_AiAnthropic_TEMPERATURE = 0.2 -Connector_AiAnthropic_MAX_TOKENS = 2000 - -# Perplexity AI configuration -Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions -Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQm82Mzk2Q1FGRkJEUkI4LXlQbHYzT2RkdVJEcmM4WGdZTWpJTEhoeUF1NW5LUVpJdDBYN3k1WFN4a2FQSWJSQmd0U0xJbzZDTmFFN05FcXl0Z3V1OEpsZjYydV94TXVjVjVXRTRYSWdLMkd5XzZIbFV6emRCZHpuOUpQeThadE5xcDNDVGV1RHJrUEN0c1BBYXctZFNWcFRuVXhRPT0= -Connector_AiPerplexity_MODEL_NAME = sonar -Connector_AiPerplexity_TEMPERATURE = 0.2 -Connector_AiPerplexity_MAX_TOKENS = 2000 - -# Agent Mail configuration -Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c -Service_MSFT_CLIENT_SECRET = KDH8Q~H2OCtdvYy5yx6HOCYEbdnJCq90G21vTcPw -Service_MSFT_TENANT_ID = common - -# Google Service configuration -Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com -Service_GOOGLE_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pV2JEV0lNUXhwa1VTUGh2RWcyYnJHSFQyTmdBOEhwRkJWc3MwOFZlcHJGUmlGOVVFbG1XalNyUXVuaExESy1xeFNIQlRiSFVIWTB6Rm1fNFg0OHZZSkF4ZlBIcFZDMjZHcFRERXJ0WlVFclhHa29Za1BqWGxsM05NZGFRc1BLZnE= - -# Tavily Web Search configuration -Connector_WebTavily_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pMjhJNS1CZFJubUlkN3ZrTUoxR0Y1QzJFWEJSMk0wQkI0UndqOW1UelVieWhGaTVBcHoxRXo1VjRzVVRROHFIeHMyS3Q5cDZCeUlEMzE1ZlhVTmNveFk5VmFQMm80NTRyVW1TZHVsR3dUN0RtMnd4LW1VWlpqOXJPeXZBTmg4OEM= - -# Google Cloud Speech Services configuration -Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pNjlJdmFMeERXUUQzR0duRUY4cGRZRzdwQlpnVFAzSzQ5cHZNRnVUZ0xWd3dQMHR3QjVsdF92NmdUQlJGRk1RcG1RYWZzcE9RbEhjQmR5Yk5Ud3ZKTW5jbmpEVGJ2ZkxVeVJpcUxaT2lNREFXaks5WHg5aVlHcXlUZldMdnZGYklHWjlJOWJ6Wm5RSkNmdm5feENjS1E0QUVXTTE5SW5sNFBEeTJ1RjRmVm9SQUNIYmF2U1U2dklsbTVlWFpCcHMwTFF1SUg5NmNfcWhQRFlpeWt0U19HMXNuUHd2RFdrVl9XdUFaY0hWdVBPYWlybU1CdGlCN1A0RzZBbi1IUVJ1TWMxTE9Ea09sTURhcDFZb1JIUW1zUFJybW15MDcxOUtfVXA2N0xwMnFrczA1YTJaN05pRHhOYWNzMjVmUHdhbVdlemF3TEIzN0pJaVo3bGJBMXJnZmNYTXVJVDdmYkRXWTlBT2F2NmN4eTlteUI1SlJTOXc2WWFWUTBCZTJBVHRLVDhEVjBFeHE0Nmk1YkxYd3N3RXgtVUdGdlZFSmk4dHM0QjFmbktsQTctbmJMT0MtMDlKS1pUR0pELXBxckhULUUycjlBZmVJQjFrM0xEUm50U2ZabExtVjZ1WWZ1WnlobUZIOVlndjNydUZfczJUWVVRZURTd1lYazllaER4VU10cXUyVS1ZNG9Ha2hnbTAzOEpGMklFSWpWeVV5eFB2UlVWYmJJakZnOVM2R2lJSXRSM3VzVEZZNUVpNmVjRzdXRUJsT2hzcjhZWERFeGV5c1dFQVM3dkhGY2Q3ckNBRDZCcVdhZnZkdzM3QVNpODZYWE81TEIyZGUycldkSVRvbm5hR3Jib2UzOEtXdUpHQ2FyWDQtMDdQbC1ycEdfUzdXd0U2dHFIVjhoRDJ0YkNsWUpva1dzOGNPdXRpZjVwUldtT3FVN3RrZUhTN3JfX1M3LU9PaXZELWkzRmtMbjgxZGZ6ZjVJNW9RZW1nM2hqUXo4Z2I5Z2tSVTVMdUNLblRxOGQ1Y3F4SGZIbWo4YkFBV3FIbjB6LUxGNHdsQWgxQUM4bzVrblBObFFfVWNaQ3QwejQ1eGFlSXVIcXlyVEZEdzVKNV9pd2o4RW1UVjlqb3VMWnF0V1JTcWF1R0RjdUNjM2lLUHRqZDl2WWtXUnhmbVdxeHA3REFHTkdkMjM4LTllajBWQnd3RHlFSVdiUThfQnduOVFJdmR6OUVGN1lOYjBqclhadHozX21kRzlUT2EtWVBkYWFRSjRGdW80dmlEUTVrVjhWbjJYNGtCeGNtNzRHQXJsRlZyWjBYdHltVDM2MV9IT0RFT2dLLTVBREtsS09HdUxrODRLcEQ1TmRoVDh6WmgybGc5MzgtbmJSYThQd3FFaUcxbmg3eE95RkJVX2hHM20wT1k2c21qd24wSkFWNGROaklQeHZrc21PdTVsdHVxR0pxd3Ztb1NQVHEtd25URHRNa1pqa3BLdVdkTnNFeDNManJST0dOb1RWM2hqekxFTlFSZkd6TlZBY1VQT1NFOVlDQzlPQWVlVXQ4MW0wdGkzd0Myam1lSWE2aEtVVTVNc3N3dENpa1BWRl9ZQ3daYllONWRmRUF0THpleFRmdWRqTFM2aldmLUFuZzFGdkFQNHR6d21SdzRGQ0Q4cU8yV0xGUTVUY01TZlYxSzZ4cmtfUGZvVDhmYmNBX1pibTVTcl9lenJoME9KSnBucUxPRU1PRXBmLWFENEgwRWZOU0RvRDlvQk9ueVp0dXJrUVgtQUk5VldVbV9MS19PYmlua3liWl80Z2hMcFRnTXBkZDA3enIxRWFzaU56TEZKa0hPQUtNY0dCY1pnQ2V3Zml6ZFczWFBESUlLd3BSVEs5ZXlGLUpINDRsd1NBVjBkR1dvbE8wLWZBeEhFQ0hvY3E5UGJsTDdteGdSRjBIZTRobXpsd29PMmhKQkxXY3Znd2FMdWtZU1VkQlVRZXlSZ3FaVnNqcXpwR3N3SktOTDA3aUZIcE9TR1VDcXdaTDhQX2E5VDlwckoyX0xlNmFQcnoydEkwc0s1S08yaVlsM0pwYktUVWl3LU5hQzF2UVZNSm9ZR3QyQWdrUXB2a25QNzhkVEFOYmZ0b1BmTXRCMmVQZTAtYzdOeUlBYlNINlZNZW1nUTFfSV92UlJiWGt6Qms1c1hBc3kzZkVRMzEwNVJDOS1JeVg4YWtVeUJyOTZPQ0FnSUs1Z25sMlY0S1V1c0dIWEpuX2pMQmZ4Z29SY1U0bVZscXNWcjJwRy1UZEFYSXBzQURGblRTelBybU5BeDF6N3hZLXZwSHBkMmlzbHZWN2JkU3hRcE0zQ0hna3QwYWlJX3hBdGcxUHdGRE55cndUNHRvbXU5VTRMRmZDRjhvXzIwajI1Y0RCcmR2OV94cS1XYkNwalNHS2lObHlkNGZBbklycnZMSlJYVnlfakRXb1ZfWUo2MGxzYUNIektYeENGTkUzMUJXRE9WRHRrY2o5UFJHckZza2RQbjNPUkstbG9GZG4yNmxKeEdtbHo4WDZFc0lvT01wZkxuN29ycXl3X1hTN1prRGdvWG9hRFYwNzBwVVpuMW0wQlZYbGZxZjFQUHp2XzBQT3Fqa3lzejVKZmJDMG0wRzhqWV9HY1dxaXB2VFNQUzV2LUJSOXRFRUllak83cUI3RGUtYVBJakF1YUVOV0otT1BxUHJqS0NLdFVHc0tsT2RGcWd6UTU4Yi1kc0JZS1VPT1NXSlc3TDM5ZDVEZlRDOURZU1hMT0YxZ25ndVBUaG1VcGsxWFZSS1RxT1ZZTU1vclZjVU5iYmZMd0VBTXlvdTE0YjdoclZ6ZnNKMmE2Yy1ORmNCMnJNX3dwcVJSN2RSd2d6aENLRXQyTjhkcDlLTFVZMHBydFowNTJoZm1mVHNRVHI1YjhTNnl1Vll4dFZhenZfa0dybk9KYVh6LUluSUo0djUzRFNEdzBoVGt5UU9tMlg5UnBLbk9WaEhoU2txY2tUSXJmemlmNEExb3Q1blI5bE9adHluWVI3NXZQNUtXdmpra05aNy15dTBXdlVqcXhteFVqSXFxNnlQR2FGeVNONkx3NVpQUk1FNk5yTUY4T1hQV1FCdm9PYzdFTGl4QXZkODltSlprbGJ6cWREcEM1VlNwN3V5aWdWYXNkekk4X3U0cjJjZ1k2X190cmNnMlpMQVlLdExxM3pFNkZudVFKci1CalE1U3kzdmotQ01LV0ZzWnp0VUxRblhkdlN6VG1MWHNQdGlrNmF4RnFtd0c3UXNqZFVRZTRFMGl1NFU5T2k3VEpjZXA1U052VkJtdUhDWEpTaDRGQnM0SDQwY2IxdDVNbUtELTQ0R0s0OHpfTHdFOHZ0VmRMTC1FUVpPSkJ4QXRWNnl5MURUdjVyUk53emRwbDBxUnloUmlheXhKY3RBUG1mX3JxM2w0VlZvcE40b2ROeG15NS01RFlvUHdoYllLNVhCZUNEd0dwQnFCLVdZU0RhVEFzR2gxTVpub3FGRnl4VDNiSVZrTnpMQUlxeGJGQzh5WlNZR2NKbklHRVRTaVJ2REduN0hXaGo5MHFGb1FOa0U5TUFwQ09zOXVWMnRRNVlJWmZpaTUxLWFIeWR0UEFtaVNDX1k5Q1p3Y2V4ckVXQVBRYzV1eGwwMWd0SE15WUxiYzUyLTUzTGlyTUhZUDFlRTFjcFpieWQwU0pxRWJXSE53Nkd5aHp5T28wZVd6Z1phLTQ4TmgxU3hvNHpySzExUk5WZlFFS3VpOXNHMDdZU0gzSGxYUlU4WmgwNUlPdlhQcUI0cGtITmQ4SlByczN0THUxNHc0a21vUEp6S1hLNnFRNmFfdlpmUWpJQ1VNYXVEOW1abzlsd2RoRG5pVXRVbjBKV2RFTGFEa3ZYTHByOTJjalc1b3hTWkFmS2RPdVlTUTVkRkpSTnZsMWtnYWZEUm1SR3lBemdON2xiN3pkZlNfX2NSYU5wWHNybHh4V0lnNHJjQ2NON1hiRHMycUdmNC1kay13bUE0OTBPN0xmNDA1NlQxVmRySEJvM1VUN2Y2Sl9KX2pZVHRPWEdfR2RYNUoxY01Va3pXb2VBd3lZb3BSXzU5NVJfWlhEYXFSVDJrUnFHWG42RVZJUVQ2RlJWUEkyQnRnREI3eHNiRERiQ3FUczJsRTBDZ3pUUGZPcjExZUFKc21QUWxVYVBmV2hPZXRGd3lJX3ZTczhCVG1jWFVwanhIZHlyTTdiR2c5cTBVSXBRV1U4ZExtWWdub1pTSHU0cU5aYWJVWmExbXI0MjE3WUVnPT0= - -# Feature SyncDelta JIRA configuration -Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pTDhnTVNzRUhScU8wYnZsZk52bHFkSWxLc18xQmtCeC1HbnNwTzVBbXRNTmQzRjZYaGE2MVlCNGtnWDk1T2I5VXVKNHpKU1VRbXEyN2tRWUJnU2ltZE5qZ3lmNEF6Z1hMTTEwZkk2NUNBYjhmVTJEcWpRUW9HNEVpSGFWdjBWQXQ3eUtHUTFJS3U5QWpaeno0RFNhMUxnPT0= - -# Debug Configuration -APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE -APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat \ No newline at end of file diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 8f2964e2..ed5e318a 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -15,16 +15,29 @@ from modules.datamodels.datamodelWeb import ( ) from modules.interfaces.interfaceAiObjects import AiObjects from modules.shared.configuration import APP_CONFIG +from modules.services.serviceAi.subCoreAi import SubCoreAi +from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing +from modules.services.serviceAi.subWebResearch import SubWebResearch +from modules.services.serviceAi.subDocumentGeneration import SubDocumentGeneration +from modules.services.serviceAi.subUtilities import SubUtilities logger = logging.getLogger(__name__) - -# Model registry is now provided by interfaces via AiModels - - class AiService: - """Centralized AI service orchestrating documents, model selection, failover, and web operations. + """Lightweight AI service orchestrator that delegates to specialized sub-modules. + + Manager delegates to specialized sub-modules: + - SubCoreAi: Core AI operations (readImage, generateImage, callAi, planning, text calls) + - SubDocumentProcessing: Document chunking, processing, and merging logic + - SubWebResearch: Web research and crawling functionality + - SubDocumentGeneration: Single-file and multi-file document generation + - SubUtilities: Helper functions, text processing, and debugging utilities + + The main service acts as a coordinator: + 1. Manages lazy initialization of sub-modules + 2. Delegates operations to appropriate sub-modules + 3. Maintains the same public API for backward compatibility """ def __init__(self, serviceCenter=None) -> None: @@ -37,6 +50,11 @@ class AiService: # Only depend on interfaces self.aiObjects = None # Will be initialized in create() self._extractionService = None # Lazy initialization + self._coreAi = None # Lazy initialization + self._documentProcessor = None # Lazy initialization + self._webResearch = None # Lazy initialization + self._documentGenerator = None # Lazy initialization + self._utilities = None # Lazy initialization @property def extractionService(self): @@ -46,6 +64,46 @@ class AiService: self._extractionService = ExtractionService(self.services) return self._extractionService + @property + def coreAi(self): + """Lazy initialization of core AI service.""" + if self._coreAi is None: + logger.info("Lazy initializing SubCoreAi...") + self._coreAi = SubCoreAi(self.services, self.aiObjects) + return self._coreAi + + @property + def documentProcessor(self): + """Lazy initialization of document processing service.""" + if self._documentProcessor is None: + logger.info("Lazy initializing SubDocumentProcessing...") + self._documentProcessor = SubDocumentProcessing(self.services, self.aiObjects) + return self._documentProcessor + + @property + def webResearchService(self): + """Lazy initialization of web research service.""" + if self._webResearch is None: + logger.info("Lazy initializing SubWebResearch...") + self._webResearch = SubWebResearch(self.services, self.aiObjects) + return self._webResearch + + @property + def documentGenerator(self): + """Lazy initialization of document generation service.""" + if self._documentGenerator is None: + logger.info("Lazy initializing SubDocumentGeneration...") + self._documentGenerator = SubDocumentGeneration(self.services, self.aiObjects, self.documentProcessor) + return self._documentGenerator + + @property + def utilities(self): + """Lazy initialization of utilities service.""" + if self._utilities is None: + logger.info("Lazy initializing SubUtilities...") + self._utilities = SubUtilities(self.services) + return self._utilities + async def _ensureAiObjectsInitialized(self): """Ensure aiObjects is initialized.""" if self.aiObjects is None: @@ -72,45 +130,8 @@ class AiService: options: Optional[AiCallOptions] = None, ) -> str: """Call AI for image analysis using interface.callImage().""" - try: - # Check if imageData is valid - if not imageData: - error_msg = "No image data provided" - self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE") - logger.error(f"Error in AI image analysis: {error_msg}") - return f"Error: {error_msg}" - - self.services.utils.debugLogToFile(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}", "AI_SERVICE") - logger.info(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}") - - # Always use IMAGE_ANALYSIS operation type for image processing - if options is None: - options = AiCallOptions(operationType=OperationType.IMAGE_ANALYSIS) - else: - # Override the operation type to ensure image analysis - options.operationType = OperationType.IMAGE_ANALYSIS - - self.services.utils.debugLogToFile(f"Calling aiObjects.callImage with operationType: {options.operationType}", "AI_SERVICE") - logger.info(f"Calling aiObjects.callImage with operationType: {options.operationType}") - result = await self.aiObjects.callImage(prompt, imageData, mimeType, options) - - # Debug the result - self.services.utils.debugLogToFile(f"Raw AI result type: {type(result)}, value: {repr(result)}", "AI_SERVICE") - - # Check if result is valid - if not result or (isinstance(result, str) and not result.strip()): - error_msg = f"No response from AI image analysis (result: {repr(result)})" - self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE") - logger.error(f"Error in AI image analysis: {error_msg}") - return f"Error: {error_msg}" - - self.services.utils.debugLogToFile(f"callImage returned: {result[:200]}..." if len(result) > 200 else result, "AI_SERVICE") - logger.info(f"callImage returned: {result[:200]}..." if len(result) > 200 else result) - return result - except Exception as e: - self.services.utils.debugLogToFile(f"Error in AI image analysis: {str(e)}", "AI_SERVICE") - logger.error(f"Error in AI image analysis: {str(e)}") - return f"Error: {str(e)}" + await self._ensureAiObjectsInitialized() + return await self.coreAi.readImage(prompt, imageData, mimeType, options) # AI Image Generation async def generateImage( @@ -122,1220 +143,16 @@ class AiService: options: Optional[AiCallOptions] = None, ) -> Dict[str, Any]: """Generate an image using AI using interface.generateImage().""" - try: - return await self.aiObjects.generateImage(prompt, size, quality, style, options) - except Exception as e: - logger.error(f"Error in AI image generation: {str(e)}") - return {"success": False, "error": str(e)} + await self._ensureAiObjectsInitialized() + return await self.coreAi.generateImage(prompt, size, quality, style, options) - # Web Research - Using interface functions + # Web Research async def webResearch(self, request: WebResearchRequest) -> WebResearchActionResult: """Perform web research using interface functions.""" - try: - logger.info(f"WEB RESEARCH STARTED") - logger.info(f"User Query: {request.user_prompt}") - logger.info(f"Max Results: {request.max_results}, Max Pages: {request.options.max_pages}") - - # Global URL index to track all processed URLs across the entire research session - global_processed_urls = set() - - # Step 1: Find relevant websites - either provided URLs or AI-determined main URLs - logger.info(f"=== STEP 1: INITIAL MAIN URLS LIST ===") - - if request.urls: - # Use provided URLs as initial main URLs - websites = request.urls - logger.info(f"Using provided URLs ({len(websites)}):") - for i, url in enumerate(websites, 1): - logger.info(f" {i}. {url}") - else: - # Use AI to determine main URLs based on user's intention - logger.info(f"AI analyzing user intent: '{request.user_prompt}'") - - # Use AI to generate optimized Tavily search query and search parameters - query_optimizer_prompt = f"""You are a search query optimizer. - - USER QUERY: {request.user_prompt} - - Your task: Create a search query and parameters for the USER QUERY given. - - RULES: - 1. The search query MUST be related to the user query above - 2. Extract key terms from the user query - 3. Determine appropriate country/language based on the query context - 4. Keep search query short (2-6 words) - - Return ONLY this JSON format: - {{ - "user_prompt": "search query based on user query above", - "country": "Full English country name (ISO-3166; map codes via pycountry/i18n-iso-countries)", - "language": "language_code_or_null", - "topic": "general|news|academic_or_null", - "time_range": "d|w|m|y_or_null", - "selection_strategy": "single|multiple|specific_page", - "selection_criteria": "what URLs to prioritize", - "expected_url_patterns": ["pattern1", "pattern2"], - "estimated_result_count": number - }}""" - - # Get AI response for query optimization - ai_request = AiCallRequest( - prompt=query_optimizer_prompt, - options=AiCallOptions() - ) - ai_response_obj = await self.aiObjects.call(ai_request) - ai_response = ai_response_obj.content - logger.debug(f"AI query optimizer response: {ai_response}") - - # Parse AI response to extract search query - import json - try: - # Clean the response by removing markdown code blocks - cleaned_response = ai_response.strip() - if cleaned_response.startswith('```json'): - cleaned_response = cleaned_response[7:] # Remove ```json - if cleaned_response.endswith('```'): - cleaned_response = cleaned_response[:-3] # Remove ``` - cleaned_response = cleaned_response.strip() - - query_data = json.loads(cleaned_response) - search_query = query_data.get("user_prompt", request.user_prompt) - ai_country = query_data.get("country") - ai_language = query_data.get("language") - ai_topic = query_data.get("topic") - ai_time_range = query_data.get("time_range") - selection_strategy = query_data.get("selection_strategy", "multiple") - selection_criteria = query_data.get("selection_criteria", "relevant URLs") - expected_patterns = query_data.get("expected_url_patterns", []) - estimated_count = query_data.get("estimated_result_count", request.max_results) - - logger.info(f"AI optimized search query: '{search_query}'") - logger.info(f"Selection strategy: {selection_strategy}") - logger.info(f"Selection criteria: {selection_criteria}") - logger.info(f"Expected URL patterns: {expected_patterns}") - logger.info(f"Estimated result count: {estimated_count}") - - except json.JSONDecodeError: - logger.warning("Failed to parse AI response as JSON, using original query") - search_query = request.user_prompt - ai_country = None - ai_language = None - ai_topic = None - ai_time_range = None - selection_strategy = "multiple" - - # Perform the web search with AI-determined parameters - search_kwargs = { - "query": search_query, - "max_results": request.max_results, - "search_depth": request.options.search_depth, - "auto_parameters": False # Use explicit parameters - } - - # Add parameters only if they have valid values - def _normalizeCountry(c: Optional[str]) -> Optional[str]: - if not c: - return None - s = str(c).strip() - if not s or s.lower() in ['null', 'none', 'undefined']: - return None - # Map common codes to full English names when easy to do without extra deps - mapping = { - 'ch': 'Switzerland', 'che': 'Switzerland', - 'de': 'Germany', 'ger': 'Germany', 'deu': 'Germany', - 'at': 'Austria', 'aut': 'Austria', - 'us': 'United States', 'usa': 'United States', 'uni ted states': 'United States', - 'uk': 'United Kingdom', 'gb': 'United Kingdom', 'gbr': 'United Kingdom' - } - key = s.lower() - if key in mapping: - return mapping[key] - # If looks like full name, capitalize first letter only (Tavily accepts English names) - return s - - norm_ai_country = _normalizeCountry(ai_country) - norm_req_country = _normalizeCountry(request.options.country) - if norm_ai_country: - search_kwargs["country"] = norm_ai_country - elif norm_req_country: - search_kwargs["country"] = norm_req_country - - if ai_language and ai_language not in ['null', '', 'none', 'undefined']: - search_kwargs["language"] = ai_language - elif request.options.language and request.options.language not in ['null', '', 'none', 'undefined']: - search_kwargs["language"] = request.options.language - - if ai_topic and ai_topic in ['general', 'news', 'academic']: - search_kwargs["topic"] = ai_topic - elif request.options.topic and request.options.topic in ['general', 'news', 'academic']: - search_kwargs["topic"] = request.options.topic - - if ai_time_range and ai_time_range in ['d', 'w', 'm', 'y']: - search_kwargs["time_range"] = ai_time_range - elif request.options.time_range and request.options.time_range in ['d', 'w', 'm', 'y']: - search_kwargs["time_range"] = request.options.time_range - - # Constrain by expected domains if provided by AI - try: - include_domains = [] - for p in expected_patterns or []: - if not isinstance(p, str): - continue - # Extract bare domain from pattern or URL - import re - m = re.search(r"(?:https?://)?([^/\s]+)", p.strip()) - if m: - domain = m.group(1).lower() - # strip leading www. - if domain.startswith('www.'): - domain = domain[4:] - include_domains.append(domain) - # Deduplicate - if include_domains: - seen = set() - uniq = [] - for d in include_domains: - if d not in seen: - seen.add(d) - uniq.append(d) - search_kwargs["include_domains"] = uniq - except Exception: - pass - - # Log the parameters being used - logger.info(f"Search parameters: country={search_kwargs.get('country', 'not_set')}, language={search_kwargs.get('language', 'not_set')}, topic={search_kwargs.get('topic', 'not_set')}, time_range={search_kwargs.get('time_range', 'not_set')}, include_domains={search_kwargs.get('include_domains', [])}") - - search_results = await self.aiObjects.search_websites(**search_kwargs) - - logger.debug(f"Web search returned {len(search_results)} results:") - for i, result in enumerate(search_results, 1): - logger.debug(f" {i}. {result.url} - {result.title}") - - # Deduplicate while preserving order - seen = set() - search_urls = [] - for r in search_results: - u = str(r.url) - if u not in seen: - seen.add(u) - search_urls.append(u) - - logger.info(f"After initial deduplication: {len(search_urls)} unique URLs from {len(search_results)} search results") - - if not search_urls: - logger.error("No relevant websites found") - return WebResearchActionResult(success=False, error="No relevant websites found") - - # Now use AI to determine the main URLs based on user's intention - logger.info(f"AI selecting main URLs from {len(search_urls)} search results based on user intent") - - # Create a prompt for AI to identify main URLs based on user's intention - ai_prompt = f""" - Select the most relevant URLs from these search results: - - {chr(10).join([f"{i+1}. {url}" for i, url in enumerate(search_urls)])} - - Return only the URLs that are most relevant for the user's query. - One URL per line. - """ - # Create AI call request - ai_request = AiCallRequest( - prompt=ai_prompt, - options=AiCallOptions() - ) - ai_response_obj = await self.aiObjects.call(ai_request) - ai_response = ai_response_obj.content - logger.debug(f"AI response for main URL selection: {ai_response}") - - # Parse AI response to extract URLs - websites = [] - for line in ai_response.strip().split('\n'): - line = line.strip() - if line and ('http://' in line or 'https://' in line): - # Extract URL from the line - for word in line.split(): - if word.startswith('http://') or word.startswith('https://'): - websites.append(word.rstrip('.,;')) - break - - if not websites: - logger.warning("AI did not identify any main URLs, using first few search results") - websites = search_urls[:3] # Fallback to first 3 search results - - # Deduplicate while preserving order - seen = set() - unique_websites = [] - for url in websites: - if url not in seen: - seen.add(url) - unique_websites.append(url) - - websites = unique_websites - logger.info(f"After AI selection deduplication: {len(websites)} unique URLs from {len(websites)} AI-selected URLs") - - logger.info(f"AI selected {len(websites)} main URLs (after deduplication):") - for i, url in enumerate(websites, 1): - logger.info(f" {i}. {url}") - - # Step 2: Smart website selection using AI interface - logger.info(f"=== STEP 2: FILTERED URL LIST BY USER PROMPT'S INTENTION ===") - logger.info(f"AI analyzing {len(websites)} URLs for relevance to: '{request.user_prompt}'") - - selectedWebsites, aiResponse = await self.aiObjects.selectRelevantWebsites(websites, request.user_prompt) - - logger.debug(f"AI Response: {aiResponse}") - logger.debug(f"AI selected {len(selectedWebsites)} most relevant URLs:") - for i, url in enumerate(selectedWebsites, 1): - logger.debug(f" {i}. {url}") - - # Show which were filtered out - filtered_out = [url for url in websites if url not in selectedWebsites] - if filtered_out: - logger.debug(f"Filtered out {len(filtered_out)} less relevant URLs:") - for i, url in enumerate(filtered_out, 1): - logger.debug(f" {i}. {url}") - - # Step 3+4+5: Recursive crawling with configurable depth - # Get configuration parameters - max_depth = int(APP_CONFIG.get("Web_Research_MAX_DEPTH", "2")) - max_links_per_domain = int(APP_CONFIG.get("Web_Research_MAX_LINKS_PER_DOMAIN", "4")) - crawl_timeout_minutes = int(APP_CONFIG.get("Web_Research_CRAWL_TIMEOUT_MINUTES", "10")) - crawl_timeout_seconds = crawl_timeout_minutes * 60 - - # Use the configured max_depth or the request's pages_search_depth, whichever is smaller - effective_depth = min(max_depth, request.options.pages_search_depth) - - logger.info(f"=== STEP 3+4+5: RECURSIVE CRAWLING (DEPTH {effective_depth}) ===") - logger.info(f"Starting recursive crawl of {len(selectedWebsites)} main websites...") - logger.info(f"Search depth: {effective_depth} levels (max configured: {max_depth})") - logger.info(f"Max links per domain: {max_links_per_domain}") - logger.info(f"Crawl timeout: {crawl_timeout_minutes} minutes") - - # Use recursive crawling with URL index to avoid duplicates - import asyncio - try: - allContent = await asyncio.wait_for( - self.aiObjects.crawlRecursively( - urls=selectedWebsites, - max_depth=effective_depth, - extract_depth=request.options.extract_depth, - max_per_domain=max_links_per_domain, - global_processed_urls=global_processed_urls - ), - timeout=crawl_timeout_seconds - ) - logger.info(f"Crawling completed within timeout: {len(allContent)} pages crawled") - except asyncio.TimeoutError: - logger.warning(f"Crawling timed out after {crawl_timeout_minutes} minutes, using partial results") - # crawlRecursively now handles timeouts gracefully and returns partial results - # Try to get the partial results that were collected - allContent = {} - - if not allContent: - logger.error("Could not extract content from any websites") - return WebResearchActionResult(success=False, error="Could not extract content from any websites") - - logger.info(f"=== WEB RESEARCH COMPLETED ===") - logger.info(f"Successfully crawled {len(allContent)} URLs total") - logger.info(f"Crawl depth: {effective_depth} levels") - - # Create simple result with raw content - sources = [WebSearchResultItem(title=url, url=url) for url in selectedWebsites] - - # Get all additional links (all URLs except main ones) - additional_links = [url for url in allContent.keys() if url not in selectedWebsites] - - # Combine all content into a single result - combinedContent = "" - for url, content in allContent.items(): - combinedContent += f"\n\n=== {url} ===\n{content}\n" - - documentData = WebResearchDocumentData( - user_prompt=request.user_prompt, - websites_analyzed=len(allContent), - additional_links_found=len(additional_links), - analysis_result=combinedContent, # Raw content, no analysis - sources=sources, - additional_links=additional_links, - individual_content=allContent, # Individual URL -> content mapping - debug_info={ - "crawl_depth": effective_depth, - "max_configured_depth": max_depth, - "max_links_per_domain": max_links_per_domain, - "crawl_timeout_minutes": crawl_timeout_minutes, - "total_urls_crawled": len(allContent), - "main_urls": len(selectedWebsites), - "additional_urls": len(additional_links) - } - ) - - document = WebResearchActionDocument( - documentName=f"web_research_{request.user_prompt[:50]}.json", - documentData=documentData, - mimeType="application/json" - ) - - return WebResearchActionResult( - success=True, - documents=[document], - resultLabel="web_research_results" - ) - - except Exception as e: - logger.error(f"Error in web research: {str(e)}") - return WebResearchActionResult(success=False, error=str(e)) - - def _calculateMaxContextBytes(self, options: Optional[AiCallOptions]) -> int: - """Calculate maximum context bytes based on model capabilities and options.""" - if options and options.maxContextBytes: - return options.maxContextBytes - - # Default model capabilities (this should be enhanced with actual model registry) - defaultMaxTokens = 4000 - safetyMargin = options.safetyMargin if options else 0.1 - - # Calculate bytes (4 chars per token estimation) - maxContextBytes = int(defaultMaxTokens * (1 - safetyMargin) * 4) - - return maxContextBytes - - async def _processDocumentsPerChunk( - self, - documents: List[ChatDocument], - prompt: str, - options: Optional[AiCallOptions] = None - ) -> str: - """ - Process documents with per-chunk AI calls and merge results. - FIXED: Now preserves chunk relationships and document structure. - - Args: - documents: List of ChatDocument objects to process - prompt: AI prompt for processing - options: AI call options - - Returns: - Merged AI results as string with preserved document structure - """ - if not documents: - return "" - - # Get model capabilities for size calculation - model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options) - - # Build extraction options for chunking with intelligent merging - extractionOptions: Dict[str, Any] = { - "prompt": prompt, - "operationType": options.operationType if options else "general", - "processDocumentsIndividually": True, # Process each document separately - "maxSize": model_capabilities["maxContextBytes"], - "chunkAllowed": True, - "textChunkSize": model_capabilities["textChunkSize"], - "imageChunkSize": model_capabilities["imageChunkSize"], - "imageMaxPixels": 1024 * 1024, - "imageQuality": 85, - "mergeStrategy": { - "useIntelligentMerging": True, # Enable intelligent token-aware merging - "modelCapabilities": model_capabilities, - "prompt": prompt, - "groupBy": "typeGroup", - "orderBy": "id", - "mergeType": "concatenate" - }, - } - - logger.debug(f"Per-chunk extraction options: prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}") - - try: - # Extract content with chunking - extractionResult = self.extractionService.extractContent(documents, extractionOptions) - - if not isinstance(extractionResult, list): - return "[Error: No extraction results]" - - # FIXED: Process chunks with proper mapping - chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options) - - # FIXED: Merge with preserved chunk relationships - mergedContent = self._mergeChunkResults(chunkResults, options) - - return mergedContent - - except Exception as e: - logger.error(f"Error in per-chunk processing: {str(e)}") - return f"[Error in per-chunk processing: {str(e)}]" - - async def _processDocumentsPerChunkJson( - self, - documents: List[ChatDocument], - prompt: str, - options: Optional[AiCallOptions] = None - ) -> Dict[str, Any]: - """ - Process documents with per-chunk AI calls and merge results in JSON mode. - Returns structured JSON document instead of text. - """ - if not documents: - return {"metadata": {"title": "Empty Document"}, "sections": []} - - # Get model capabilities for size calculation - model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options) - - # Build extraction options for chunking with intelligent merging - extractionOptions: Dict[str, Any] = { - "prompt": prompt, - "operationType": options.operationType if options else "general", - "processDocumentsIndividually": True, # Process each document separately - "maxSize": model_capabilities["maxContextBytes"], - "chunkAllowed": True, - "textChunkSize": model_capabilities["textChunkSize"], - "imageChunkSize": model_capabilities["imageChunkSize"], - "imageMaxPixels": 1024 * 1024, - "imageQuality": 85, - "mergeStrategy": { - "useIntelligentMerging": True, # Enable intelligent token-aware merging - "modelCapabilities": model_capabilities, - "prompt": prompt, - "groupBy": "typeGroup", - "orderBy": "id", - "mergeType": "concatenate" - }, - } - - logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}") - - try: - # Extract content with chunking - extractionResult = self.extractionService.extractContent(documents, extractionOptions) - - if not isinstance(extractionResult, list): - return {"metadata": {"title": "Error Document"}, "sections": []} - - # Process chunks with proper mapping - chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options, generate_json=True) - - # Merge with JSON mode - mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options) - - return mergedJsonDocument - - except Exception as e: - logger.error(f"Error in per-chunk processing (JSON mode): {str(e)}") - return {"metadata": {"title": "Error Document"}, "sections": []} - - async def _processChunksWithMapping( - self, - extractionResult: List[ContentExtracted], - prompt: str, - options: Optional[AiCallOptions] = None, - generate_json: bool = False - ) -> List[ChunkResult]: - """Process chunks with proper mapping to preserve relationships.""" - from modules.datamodels.datamodelExtraction import ChunkResult - import asyncio - import time - - # Collect all chunks that need processing with proper indexing - chunks_to_process = [] - chunk_index = 0 - - for ec in extractionResult: - # Get document MIME type from metadata - document_mime_type = None - for part in ec.parts: - if part.metadata and 'documentMimeType' in part.metadata: - document_mime_type = part.metadata['documentMimeType'] - break - - for part in ec.parts: - if part.typeGroup in ("text", "table", "structure", "image", "container", "binary"): - # Skip empty container chunks (they're just metadata containers) - if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0): - logger.debug(f"Skipping empty container chunk: mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}") - continue - - chunks_to_process.append({ - 'part': part, - 'chunk_index': chunk_index, - 'document_id': ec.id, - 'document_mime_type': document_mime_type - }) - chunk_index += 1 - - logger.info(f"Processing {len(chunks_to_process)} chunks with proper mapping") - - # Process chunks in parallel with proper mapping - async def process_single_chunk(chunk_info: Dict) -> ChunkResult: - part = chunk_info['part'] - chunk_index = chunk_info['chunk_index'] - document_id = chunk_info['document_id'] - document_mime_type = chunk_info.get('document_mime_type', part.mimeType) - - start_time = time.time() - - try: - # FIXED: Check MIME type first, then fallback to typeGroup - is_image = ( - (document_mime_type and document_mime_type.startswith('image/')) or - (part.mimeType and part.mimeType.startswith('image/')) or - (part.typeGroup == "image") - ) - - # Debug logging - self.services.utils.debugLogToFile(f"Chunk {chunk_index}: document_mime_type={document_mime_type}, part.mimeType={part.mimeType}, part.typeGroup={part.typeGroup}, is_image={is_image}", "AI_SERVICE") - logger.info(f"Chunk {chunk_index}: document_mime_type={document_mime_type}, part.mimeType={part.mimeType}, part.typeGroup={part.typeGroup}, is_image={is_image}") - - if is_image: - # Use the same extraction prompt for image analysis (contains table JSON format) - self.services.utils.debugLogToFile(f"Processing image chunk {chunk_index}: mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") - - # Check if image data is available - if not part.data: - error_msg = f"No image data available for chunk {chunk_index}" - logger.warning(error_msg) - ai_result = f"Error: {error_msg}" - else: - try: - ai_result = await self.readImage( - prompt=prompt, - imageData=part.data, - mimeType=part.mimeType, - options=options - ) - - self.services.utils.debugLogToFile(f"Image analysis result for chunk {chunk_index}: length={len(ai_result) if ai_result else 0}, preview={ai_result[:200] if ai_result else 'None'}...", "AI_SERVICE") - - # Check if result is empty or None - if not ai_result or not ai_result.strip(): - logger.warning(f"Image chunk {chunk_index} returned empty response from AI") - ai_result = "No content detected in image" - - except Exception as e: - logger.error(f"Error processing image chunk {chunk_index}: {str(e)}") - ai_result = f"Error analyzing image: {str(e)}" - - # If generating JSON, clean image analysis result - if generate_json: - try: - import json - import re - - # Clean the response - remove markdown code blocks if present - cleaned_result = ai_result.strip() - - # Remove various markdown patterns - if cleaned_result.startswith('```json'): - cleaned_result = re.sub(r'^```json\s*', '', cleaned_result) - cleaned_result = re.sub(r'\s*```$', '', cleaned_result) - elif cleaned_result.startswith('```'): - cleaned_result = re.sub(r'^```\s*', '', cleaned_result) - cleaned_result = re.sub(r'\s*```$', '', cleaned_result) - - # Remove any leading/trailing text that's not JSON - # Look for the first { and last } to extract JSON - first_brace = cleaned_result.find('{') - last_brace = cleaned_result.rfind('}') - - if first_brace != -1 and last_brace != -1 and last_brace > first_brace: - cleaned_result = cleaned_result[first_brace:last_brace + 1] - - # Additional cleaning for common AI response issues - cleaned_result = cleaned_result.strip() - - # Validate JSON - json.loads(cleaned_result) - ai_result = cleaned_result # Use cleaned version - self.services.utils.debugLogToFile(f"Image chunk {chunk_index} JSON validation successful", "AI_SERVICE") - - except json.JSONDecodeError as e: - logger.warning(f"Image chunk {chunk_index} returned invalid JSON: {str(e)}") - logger.warning(f"Raw response was: '{ai_result[:500]}...'") - - # Create fallback JSON with the actual response content (not the error message) - # Use the original AI response content, not the error message - fallback_content = ai_result if ai_result and ai_result.strip() else "No content detected" - - self.services.utils.debugLogToFile(f"IMAGE FALLBACK CONTENT PREVIEW: '{fallback_content[:200]}...'", "AI_SERVICE") - - ai_result = json.dumps({ - "metadata": {"title": f"Image Analysis - Chunk {chunk_index}"}, - "sections": [{ - "id": f"image_section_{chunk_index}", - "type": "paragraph", - "data": {"text": fallback_content} - }] - }) - self.services.utils.debugLogToFile(f"Created fallback JSON for image chunk {chunk_index} with actual content", "AI_SERVICE") - elif part.typeGroup in ("container", "binary"): - # Handle ALL container and binary content generically - let AI process any document type - self.services.utils.debugLogToFile(f"DEBUG: Chunk {chunk_index}: typeGroup={part.typeGroup}, mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") - - # Skip empty container chunks (they're just metadata containers) - if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0): - self.services.utils.debugLogToFile(f"DEBUG: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") - logger.info(f"Chunk {chunk_index}: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}") - # Skip processing this chunk - pass - elif part.mimeType and part.data and len(part.data.strip()) > 0: - # Process any document container as text content - request_options = options if options is not None else AiCallOptions() - request_options.operationType = OperationType.GENERAL - self.services.utils.debugLogToFile(f"EXTRACTION CONTAINER CHUNK {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}", "AI_SERVICE") - logger.info(f"Chunk {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}") - - # Log extraction prompt and context - self.services.utils.debugLogToFile(f"EXTRACTION PROMPT: {prompt}", "AI_SERVICE") - self.services.utils.debugLogToFile(f"EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters", "AI_SERVICE") - - request = AiCallRequest( - prompt=prompt, - context=part.data, - options=request_options - ) - response = await self.aiObjects.call(request) - ai_result = response.content - - # Log extraction response - self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE") - - # Save full extraction prompt and response to debug file - only if debug enabled - debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) - if debug_enabled: - try: - import os - from datetime import datetime, UTC - ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") - debug_root = "./test-chat/ai" - os.makedirs(debug_root, exist_ok=True) - with open(os.path.join(debug_root, f"{ts}_extraction_container_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f: - f.write(f"EXTRACTION PROMPT:\n{prompt}\n\n") - f.write(f"EXTRACTION CONTEXT:\n{part.data if part.data else 'No context'}\n\n") - f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n") - except Exception: - pass - - # If generating JSON, validate the response - if generate_json: - try: - import json - import re - - # Clean the response - remove markdown code blocks if present - cleaned_result = ai_result.strip() - - # Remove various markdown patterns - if cleaned_result.startswith('```json'): - cleaned_result = re.sub(r'^```json\s*', '', cleaned_result) - cleaned_result = re.sub(r'\s*```$', '', cleaned_result) - elif cleaned_result.startswith('```'): - cleaned_result = re.sub(r'^```\s*', '', cleaned_result) - cleaned_result = re.sub(r'\s*```$', '', cleaned_result) - - # Remove any leading/trailing text that's not JSON - # Look for the first { and last } to extract JSON - first_brace = cleaned_result.find('{') - last_brace = cleaned_result.rfind('}') - - if first_brace != -1 and last_brace != -1 and last_brace > first_brace: - cleaned_result = cleaned_result[first_brace:last_brace + 1] - - # Additional cleaning for common AI response issues - cleaned_result = cleaned_result.strip() - - # Validate JSON - json.loads(cleaned_result) - ai_result = cleaned_result # Use cleaned version - - except json.JSONDecodeError as e: - logger.warning(f"Container chunk {chunk_index} ({part.mimeType}) returned invalid JSON: {str(e)}") - logger.warning(f"Raw response was: '{ai_result[:500]}...'") - - # Create fallback JSON with the actual response content (not the error message) - # Use the original AI response content, not the error message - fallback_content = ai_result if ai_result and ai_result.strip() else "No content detected" - - self.services.utils.debugLogToFile(f"FALLBACK CONTENT PREVIEW: '{fallback_content[:200]}...'", "AI_SERVICE") - - ai_result = json.dumps({ - "metadata": {"title": f"Document Analysis - Chunk {chunk_index}"}, - "sections": [{ - "id": f"analysis_section_{chunk_index}", - "type": "paragraph", - "data": {"text": fallback_content} - }] - }) - self.services.utils.debugLogToFile(f"Created fallback JSON for container chunk {chunk_index} with actual content", "AI_SERVICE") - else: - # Skip empty or invalid container/binary content - don't create a result - self.services.utils.debugLogToFile(f"DEBUG: Chunk {chunk_index}: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") - # Return None to indicate this chunk should be completely skipped - return None - else: - # Ensure options is not None and set correct operation type for text - request_options = options if options is not None else AiCallOptions() - # FIXED: Set operation type to general for text processing - request_options.operationType = OperationType.GENERAL - self.services.utils.debugLogToFile(f"EXTRACTION CHUNK {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}", "AI_SERVICE") - logger.info(f"Chunk {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}") - - # Log extraction context length - self.services.utils.debugLogToFile(f"EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters", "AI_SERVICE") - - # Debug: Log the actual prompt being sent to AI - logger.debug(f"AI PROMPT PREVIEW: {prompt[:300]}...") - logger.debug(f"AI CONTEXT PREVIEW: {part.data[:200] if part.data else 'None'}...") - - request = AiCallRequest( - prompt=prompt, - context=part.data, - options=request_options - ) - response = await self.aiObjects.call(request) - - # Debug: Log what AI actually returned - logger.debug(f"AI RESPONSE PREVIEW: {response.content[:300] if response.content else 'None'}...") - ai_result = response.content - - # Log extraction response length - self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE") - - # Save extraction response to debug file (without verbose prompt) - only if debug enabled - debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) - if debug_enabled: - try: - import os - from datetime import datetime, UTC - ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") - debug_root = "./test-chat/ai" - os.makedirs(debug_root, exist_ok=True) - with open(os.path.join(debug_root, f"{ts}_extraction_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f: - f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n") - except Exception: - pass - - # If generating JSON, validate the response - if generate_json: - try: - import json - import re - - # Clean the response - remove markdown code blocks and extra formatting - cleaned_result = ai_result.strip() - - # Remove any markdown code block markers (```json, ```, etc.) - cleaned_result = re.sub(r'^```(?:json)?\s*', '', cleaned_result, flags=re.MULTILINE) - cleaned_result = re.sub(r'\s*```\s*$', '', cleaned_result, flags=re.MULTILINE) - - # Remove any remaining ``` markers anywhere in the text - cleaned_result = re.sub(r'```', '', cleaned_result) - - # Try to extract JSON from the response if it's embedded in other text - json_match = re.search(r'\{.*\}', cleaned_result, re.DOTALL) - if json_match: - cleaned_result = json_match.group(0) - - # Validate JSON - json.loads(cleaned_result) - ai_result = cleaned_result # Use cleaned version - - except json.JSONDecodeError as e: - logger.warning(f"Chunk {chunk_index} returned invalid JSON: {str(e)}") - # Create fallback JSON - ai_result = json.dumps({ - "metadata": {"title": "Error Section"}, - "sections": [{ - "id": f"error_section_{chunk_index}", - "type": "paragraph", - "data": {"text": f"Error parsing JSON: {str(e)}"} - }] - }) - - processing_time = time.time() - start_time - - logger.info(f"Chunk {chunk_index} processed: {len(ai_result)} chars in {processing_time:.2f}s") - - return ChunkResult( - originalChunk=part, - aiResult=ai_result, - chunkIndex=chunk_index, - documentId=document_id, - processingTime=processing_time, - metadata={ - "success": True, - "chunkSize": len(part.data) if part.data else 0, - "resultSize": len(ai_result), - "typeGroup": part.typeGroup - } - ) - - except Exception as e: - processing_time = time.time() - start_time - logger.warning(f"Error processing chunk {chunk_index}: {str(e)}") - - return ChunkResult( - originalChunk=part, - aiResult=f"[Error processing chunk: {str(e)}]", - chunkIndex=chunk_index, - documentId=document_id, - processingTime=processing_time, - metadata={ - "success": False, - "error": str(e), - "chunkSize": len(part.data) if part.data else 0, - "typeGroup": part.typeGroup - } - ) - - # Process chunks with concurrency control - max_concurrent = 5 # Default concurrency - if options and hasattr(options, 'maxConcurrentChunks'): - max_concurrent = options.maxConcurrentChunks - elif options and hasattr(options, 'maxParallelChunks'): - max_concurrent = options.maxParallelChunks - - logger.info(f"Processing {len(chunks_to_process)} chunks with max concurrency: {max_concurrent}") - self.services.utils.debugLogToFile(f"DEBUG: Chunks to process: {len(chunks_to_process)}", "AI_SERVICE") - for i, chunk_info in enumerate(chunks_to_process): - self.services.utils.debugLogToFile(f"DEBUG: Chunk {i}: typeGroup={chunk_info['part'].typeGroup}, mimeType={chunk_info['part'].mimeType}, data_length={len(chunk_info['part'].data) if chunk_info['part'].data else 0}", "AI_SERVICE") - - # Create semaphore for concurrency control - semaphore = asyncio.Semaphore(max_concurrent) - - async def process_with_semaphore(chunk_info): - async with semaphore: - return await process_single_chunk(chunk_info) - - # Process all chunks in parallel with concurrency control - tasks = [process_with_semaphore(chunk_info) for chunk_info in chunks_to_process] - self.services.utils.debugLogToFile(f"DEBUG: Created {len(tasks)} tasks for parallel processing", "AI_SERVICE") - chunk_results = await asyncio.gather(*tasks, return_exceptions=True) - self.services.utils.debugLogToFile(f"DEBUG: Got {len(chunk_results)} results from parallel processing", "AI_SERVICE") - - # Handle any exceptions in the gather itself - processed_results = [] - for i, result in enumerate(chunk_results): - if isinstance(result, Exception): - # Create error ChunkResult - chunk_info = chunks_to_process[i] - processed_results.append(ChunkResult( - originalChunk=chunk_info['part'], - aiResult=f"[Error in parallel processing: {str(result)}]", - chunkIndex=chunk_info['chunk_index'], - documentId=chunk_info['document_id'], - processingTime=0.0, - metadata={"success": False, "error": str(result)} - )) - elif result is not None: - # Only add non-None results (skip empty containers) - processed_results.append(result) - - logger.info(f"Completed processing {len(processed_results)} chunks") - return processed_results - - def _mergeChunkResults( - self, - chunkResults: List[ChunkResult], - options: Optional[AiCallOptions] = None - ) -> str: - """Merge chunk results while preserving document structure and chunk order.""" - - if not chunkResults: - return "" - - # Get merging configuration from options - chunk_separator = "\n\n---\n\n" - include_document_headers = True - include_chunk_metadata = False - - if options: - if hasattr(options, 'chunkSeparator'): - chunk_separator = options.chunkSeparator - elif hasattr(options, 'mergeStrategy') and options.mergeStrategy: - chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n---\n\n") - - # Check for enhanced options - if hasattr(options, 'preserveChunkMetadata'): - include_chunk_metadata = options.preserveChunkMetadata - - # Group chunk results by document - results_by_document = {} - for chunk_result in chunkResults: - doc_id = chunk_result.documentId - if doc_id not in results_by_document: - results_by_document[doc_id] = [] - results_by_document[doc_id].append(chunk_result) - - # Sort chunks within each document by chunk index - for doc_id in results_by_document: - results_by_document[doc_id].sort(key=lambda x: x.chunkIndex) - - # Merge results for each document - merged_documents = [] - - for doc_id, doc_chunks in results_by_document.items(): - # Build document header if enabled - doc_header = "" - if include_document_headers: - doc_header = f"\n\n=== DOCUMENT: {doc_id} ===\n\n" - - # Merge chunks for this document - doc_content = "" - for i, chunk_result in enumerate(doc_chunks): - # Add chunk separator (except for first chunk) - if i > 0: - doc_content += chunk_separator - - # Add chunk content with optional metadata - chunk_metadata = chunk_result.metadata - if chunk_metadata.get("success", False): - chunk_content = chunk_result.aiResult - - # Add chunk metadata if enabled - if include_chunk_metadata: - chunk_info = f"[Chunk {chunk_result.chunkIndex} - {chunk_metadata.get('typeGroup', 'unknown')} - {chunk_metadata.get('chunkSize', 0)} chars]" - chunk_content = f"{chunk_info}\n{chunk_content}" - - doc_content += chunk_content - else: - # Handle error chunks - error_msg = f"[ERROR in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}]" - doc_content += error_msg - - merged_documents.append(doc_header + doc_content) - - # Join all documents - final_result = "\n\n".join(merged_documents) - - logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents") - return final_result.strip() - - def _mergeChunkResultsClean( - self, - chunkResults: List[ChunkResult], - options: Optional[AiCallOptions] = None - ) -> str: - """Merge chunk results in CLEAN mode - no debug metadata or document headers.""" - - if not chunkResults: - return "" - - # Get merging configuration from options - chunk_separator = "\n\n" - include_document_headers = False # CLEAN MODE: No document headers - include_chunk_metadata = False # CLEAN MODE: No chunk metadata - - if options: - if hasattr(options, 'chunkSeparator'): - chunk_separator = options.chunkSeparator - elif hasattr(options, 'mergeStrategy') and options.mergeStrategy: - chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n") - - # Group chunk results by document - results_by_document = {} - for chunk_result in chunkResults: - doc_id = chunk_result.documentId - if doc_id not in results_by_document: - results_by_document[doc_id] = [] - results_by_document[doc_id].append(chunk_result) - - # Sort chunks within each document by chunk index - for doc_id in results_by_document: - results_by_document[doc_id].sort(key=lambda x: x.chunkIndex) - - # Merge results for each document in CLEAN mode - merged_documents = [] - - for doc_id, doc_chunks in results_by_document.items(): - # CLEAN MODE: No document headers - doc_header = "" - - # Merge chunks for this document - doc_content = "" - for i, chunk_result in enumerate(doc_chunks): - # Add chunk separator (except for first chunk) - if i > 0: - doc_content += chunk_separator - - # Add chunk content without metadata - chunk_metadata = chunk_result.metadata - if chunk_metadata.get("success", False): - chunk_content = chunk_result.aiResult - - # CLEAN MODE: Skip container/binary chunks entirely - if chunk_content.startswith("[Skipped ") and "content:" in chunk_content: - continue # Skip container/binary chunks in clean mode - - # CLEAN MODE: Skip empty or whitespace-only chunks - if not chunk_content.strip(): - continue # Skip empty chunks in clean mode - - # CLEAN MODE: No chunk metadata - doc_content += chunk_content - else: - # Handle error chunks silently in clean mode - continue - - merged_documents.append(doc_header + doc_content) - - # Join all documents - final_result = "\n\n".join(merged_documents) - - def _mergeChunkResultsJson( - self, - chunkResults: List[ChunkResult], - options: Optional[AiCallOptions] = None - ) -> Dict[str, Any]: - """Merge chunk results in JSON mode - returns structured JSON document.""" - import json - - if not chunkResults: - return {"metadata": {"title": "Empty Document"}, "sections": []} - - # Group chunk results by document - results_by_document = {} - for chunk_result in chunkResults: - doc_id = chunk_result.documentId - if doc_id not in results_by_document: - results_by_document[doc_id] = [] - results_by_document[doc_id].append(chunk_result) - - # Sort chunks within each document by chunk index - for doc_id in results_by_document: - results_by_document[doc_id].sort(key=lambda x: x.chunkIndex) - - # Merge JSON results for each document - all_documents = [] - all_sections = [] - document_titles = [] - combined_metadata = {"title": "Merged Document", "splitStrategy": "by_section"} - - for doc_id, doc_chunks in results_by_document.items(): - # Process each chunk's JSON result - for chunk_result in doc_chunks: - chunk_metadata = chunk_result.metadata - if chunk_metadata.get("success", False): - try: - # Parse JSON from AI result - chunk_json = json.loads(chunk_result.aiResult) - - # Check if this is a multi-file response (has "documents" key) - if isinstance(chunk_json, dict) and "documents" in chunk_json: - # This is a multi-file response - merge all documents - logger.debug(f"Processing multi-file response from chunk {chunk_result.chunkIndex} with {len(chunk_json['documents'])} documents") - - # Add all documents from this chunk - for doc in chunk_json["documents"]: - # Add chunk context to document - doc["metadata"] = doc.get("metadata", {}) - doc["metadata"]["source_chunk"] = chunk_result.chunkIndex - doc["metadata"]["source_document"] = doc_id - all_documents.append(doc) - - # Update combined metadata - if "metadata" in chunk_json: - combined_metadata.update(chunk_json["metadata"]) - - # Extract sections from single-file response (fallback) - elif isinstance(chunk_json, dict) and "sections" in chunk_json: - for section in chunk_json["sections"]: - # Add document context to section - section["metadata"] = section.get("metadata", {}) - section["metadata"]["source_document"] = doc_id - section["metadata"]["chunk_index"] = chunk_result.chunkIndex - all_sections.append(section) - - # Extract document title - if isinstance(chunk_json, dict) and "metadata" in chunk_json: - title = chunk_json["metadata"].get("title", "") - if title and title not in document_titles: - document_titles.append(title) - - except json.JSONDecodeError as e: - logger.warning(f"Failed to parse JSON from chunk {chunk_result.chunkIndex}: {str(e)}") - # Create a fallback section for invalid JSON - fallback_section = { - "id": f"error_section_{chunk_result.chunkIndex}", - "title": "Error Section", - "content_type": "paragraph", - "elements": [{ - "text": f"Error parsing chunk {chunk_result.chunkIndex}: {str(e)}" - }], - "order": chunk_result.chunkIndex, - "metadata": { - "source_document": doc_id, - "chunk_index": chunk_result.chunkIndex, - "error": str(e) - } - } - all_sections.append(fallback_section) - else: - # Handle error chunks - error_section = { - "id": f"error_section_{chunk_result.chunkIndex}", - "title": "Error Section", - "content_type": "paragraph", - "elements": [{ - "text": f"Error in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}" - }], - "order": chunk_result.chunkIndex, - "metadata": { - "source_document": doc_id, - "chunk_index": chunk_result.chunkIndex, - "error": chunk_metadata.get('error', 'Unknown error') - } - } - all_sections.append(error_section) - - # Sort sections by order - all_sections.sort(key=lambda x: x.get("order", 0)) - - # If we have merged documents from multi-file responses, return them - if all_documents: - logger.info(f"Merged {len(all_documents)} documents from {len(chunkResults)} chunks") - return { - "metadata": combined_metadata, - "documents": all_documents - } - - # Otherwise, create merged document with sections (single-file fallback) - merged_document = { - "metadata": { - "title": document_titles[0] if document_titles else "Merged Document", - "source_documents": list(results_by_document.keys()), - "extraction_method": "ai_json_extraction", - "version": "1.0" - }, - "sections": all_sections, - "summary": f"Merged document from {len(results_by_document)} source documents", - "tags": ["merged", "ai_generated"] - } - - logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents (JSON mode)") - return merged_document - - async def _compressContent(self, content: str, targetSize: int, contentType: str) -> str: - if len(content.encode("utf-8")) <= targetSize: - return content - - try: - compressionPrompt = f""" - Komprimiere den folgenden {contentType} auf maximal {targetSize} Zeichen, - behalte aber alle wichtigen Informationen bei: - - {content} - - Gib nur den komprimierten Inhalt zurück, ohne zusätzliche Erklärungen. - """ - - # Service must not call connectors directly; use simple truncation fallback here - data = content.encode("utf-8") - return data[:targetSize].decode("utf-8", errors="ignore") + "... [truncated]" - except Exception as e: - logger.warning(f"AI compression failed, using truncation: {str(e)}") - return content[:targetSize] + "... [truncated]" - - # ===== DYNAMIC GENERIC AI CALLS IMPLEMENTATION ===== + await self._ensureAiObjectsInitialized() + return await self.webResearchService.webResearch(request) + # Master AI Call (process user prompt with optional unlimited count of input documents delivering one or many output documents, no size limitations) async def callAi( self, prompt: str, @@ -1362,1058 +179,13 @@ class AiService: Raises: Exception: If all available models fail """ - # Ensure aiObjects is initialized await self._ensureAiObjectsInitialized() - if options is None: - options = AiCallOptions() + # Get document processor and generator + documentProcessor = self.documentProcessor + documentGenerator = self.documentGenerator - # Normalize placeholders from List[PromptPlaceholder] - placeholders_dict: Dict[str, str] = {} - placeholders_meta: Dict[str, bool] = {} - if placeholders: - placeholders_dict = {p.label: p.content for p in placeholders} - placeholders_meta = {p.label: bool(getattr(p, 'summaryAllowed', False)) for p in placeholders} - - # Auto-determine call type based on documents and operation type - call_type = self._determineCallType(documents, options.operationType) - options.callType = call_type - - try: - # Build the full prompt that will be sent to AI - if placeholders: - full_prompt = prompt - for p in placeholders: - placeholder = f"{{{{KEY:{p.label}}}}}" - full_prompt = full_prompt.replace(placeholder, p.content) - else: - full_prompt = prompt - - self._writeAiResponseDebug( - label='ai_prompt_debug', - content=full_prompt, - partIndex=1, - modelName=None, - continuation=False - ) - except Exception: - pass - - # Handle document generation with specific output format - if outputFormat: - result = await self._callAiWithDocumentGeneration(prompt, documents, options, outputFormat, title) - # Log AI response for debugging - try: - if isinstance(result, dict) and 'content' in result: - self._writeAiResponseDebug( - label='ai_document_generation', - content=result['content'], - partIndex=1, - modelName=None, # Document generation doesn't return model info - continuation=False - ) - except Exception: - pass - return result - - if call_type == "planning": - result = await self._callAiPlanning(prompt, placeholders_dict, placeholders_meta, options) - # Log AI response for debugging - try: - self._writeAiResponseDebug( - label='ai_planning', - content=result or "", - partIndex=1, - modelName=None, # Planning doesn't return model info - continuation=False - ) - except Exception: - pass - return result - else: - # Set processDocumentsIndividually from the legacy parameter if not set in options - if options.processDocumentsIndividually is None and documents: - options.processDocumentsIndividually = False # Default to batch processing - - # For text calls, we need to build the full prompt with placeholders here - # since _callAiText doesn't handle placeholders directly - if placeholders_dict: - full_prompt = self._buildPromptWithPlaceholders(prompt, placeholders_dict) - else: - full_prompt = prompt - - result = await self._callAiText(full_prompt, documents, options) - # Log AI response for debugging (additional logging for text calls) - try: - self._writeAiResponseDebug( - label='ai_text_main', - content=result or "", - partIndex=1, - modelName=None, # Text calls already log internally - continuation=False - ) - except Exception: - pass - return result - - def _determineCallType(self, documents: Optional[List[ChatDocument]], operation_type: str) -> str: - """ - Determine call type based on documents and operation type. - - Criteria: no documents AND operationType is "generate_plan" -> planning - All other cases -> text - """ - has_documents = documents is not None and len(documents) > 0 - is_planning_operation = operation_type == OperationType.GENERATE_PLAN - - if not has_documents and is_planning_operation: - return "planning" - else: - return "text" - - async def _callAiPlanning( - self, - prompt: str, - placeholders: Optional[Dict[str, str]], - placeholdersMeta: Optional[Dict[str, bool]], - options: AiCallOptions - ) -> str: - """ - Handle planning calls with placeholder system and selective summarization. - """ - # Ensure aiObjects is initialized - await self._ensureAiObjectsInitialized() - - # Build full prompt with placeholders; if too large, summarize summaryAllowed placeholders proportionally - effective_placeholders = placeholders or {} - full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders) - - if options.compressPrompt and placeholdersMeta: - # Determine model capacity - try: - caps = self._getModelCapabilitiesForContent(full_prompt, None, options) - max_bytes = caps.get("maxContextBytes", len(full_prompt.encode("utf-8"))) - except Exception: - max_bytes = len(full_prompt.encode("utf-8")) - - current_bytes = len(full_prompt.encode("utf-8")) - if current_bytes > max_bytes: - # Compute total bytes contributed by allowed placeholders (approximate by content length) - allowed_labels = [l for l, allow in placeholdersMeta.items() if allow] - allowed_sizes = {l: len((effective_placeholders.get(l) or "").encode("utf-8")) for l in allowed_labels} - total_allowed = sum(allowed_sizes.values()) - - overage = current_bytes - max_bytes - if total_allowed > 0 and overage > 0: - # Target total for allowed after reduction - target_allowed = max(total_allowed - overage, 0) - # Global ratio to apply across allowed placeholders - ratio = target_allowed / total_allowed if total_allowed > 0 else 1.0 - ratio = max(0.0, min(1.0, ratio)) - - reduced: Dict[str, str] = {} - for label, content in effective_placeholders.items(): - if label in allowed_labels and isinstance(content, str) and len(content) > 0: - old_len = len(content) - # Reduce by proportional ratio on characters (fallback if empty) - reduction_factor = ratio if old_len > 0 else 1.0 - reduced[label] = self._reduceText(content, reduction_factor) - else: - reduced[label] = content - - effective_placeholders = reduced - full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders) - - # If still slightly over, perform a second-pass fine adjustment with updated ratio - current_bytes = len(full_prompt.encode("utf-8")) - if current_bytes > max_bytes and total_allowed > 0: - overage2 = current_bytes - max_bytes - # Recompute allowed sizes after first reduction - allowed_sizes2 = {l: len((effective_placeholders.get(l) or "").encode("utf-8")) for l in allowed_labels} - total_allowed2 = sum(allowed_sizes2.values()) - if total_allowed2 > 0 and overage2 > 0: - target_allowed2 = max(total_allowed2 - overage2, 0) - ratio2 = target_allowed2 / total_allowed2 - ratio2 = max(0.0, min(1.0, ratio2)) - reduced2: Dict[str, str] = {} - for label, content in effective_placeholders.items(): - if label in allowed_labels and isinstance(content, str) and len(content) > 0: - old_len = len(content) - reduction_factor = ratio2 if old_len > 0 else 1.0 - reduced2[label] = self._reduceText(content, reduction_factor) - else: - reduced2[label] = content - effective_placeholders = reduced2 - full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders) - - - # Make AI call using AiObjects (let it handle model selection) - request = AiCallRequest( - prompt=full_prompt, - context="", # Context is already included in the prompt - options=options + return await self.coreAi.callAi( + prompt, documents, placeholders, options, outputFormat, title, + documentProcessor, documentGenerator ) - response = await self.aiObjects.call(request) - try: - logger.debug(f"AI model selected (planning): {getattr(response, 'modelName', 'unknown')}") - except Exception: - pass - return response.content - - async def _callAiText( - self, - prompt: str, - documents: Optional[List[ChatDocument]], - options: AiCallOptions - ) -> str: - """ - Handle text calls with document processing through ExtractionService. - UNIFIED PROCESSING: Always use per-chunk processing for consistency. - """ - # Ensure aiObjects is initialized - await self._ensureAiObjectsInitialized() - - # UNIFIED PROCESSING: Always use per-chunk processing for consistency - # This ensures MIME-type checking, chunk mapping, and parallel processing - return await self._processDocumentsPerChunk(documents, prompt, options) - - async def _callAiDirect( - self, - prompt: str, - documents: Optional[List[ChatDocument]], - options: AiCallOptions - ) -> Dict[str, Any]: - """ - Call AI directly with prompt and documents for JSON output. - Used for multi-file generation - uses the existing generation pipeline. - """ - # Use the existing generation pipeline that already works - # This ensures proper document processing and content extraction - logger.info(f"Using existing generation pipeline for {len(documents) if documents else 0} documents") - - # Process documents with JSON merging using the existing pipeline - result = await self._processDocumentsPerChunkJson(documents, prompt, options) - - # Convert single-file result to multi-file format if needed - if "sections" in result and "documents" not in result: - logger.info("Converting single-file result to multi-file format") - # This is a single-file result, convert it to multi-file format - return { - "metadata": result.get("metadata", {"title": "Converted Document"}), - "documents": [{ - "id": "doc_1", - "title": result.get("metadata", {}).get("title", "Document"), - "filename": "document.txt", - "sections": result.get("sections", []) - }] - } - - return result - - async def _processDocumentsPerChunkJsonWithPrompt( - self, - documents: List[ChatDocument], - custom_prompt: str, - options: Optional[AiCallOptions] = None - ) -> Dict[str, Any]: - """ - Process documents with per-chunk AI calls and merge results in JSON mode. - Uses a custom prompt instead of the default extraction prompt. - """ - if not documents: - return {"metadata": {"title": "Empty Document"}, "sections": []} - - # Get model capabilities for size calculation - model_capabilities = self._getModelCapabilitiesForContent(custom_prompt, documents, options) - - # Build extraction options for chunking with intelligent merging - extractionOptions: Dict[str, Any] = { - "prompt": custom_prompt, # Use the custom prompt instead of default - "operationType": options.operationType if options else "general", - "processDocumentsIndividually": True, # Process each document separately - "maxSize": model_capabilities["maxContextBytes"], - "chunkAllowed": True, - "textChunkSize": model_capabilities["textChunkSize"], - "imageChunkSize": model_capabilities["imageChunkSize"], - "imageMaxPixels": 1024 * 1024, - "imageQuality": 85, - "mergeStrategy": { - "useIntelligentMerging": True, # Enable intelligent token-aware merging - "modelCapabilities": model_capabilities, - "prompt": custom_prompt, # Use the custom prompt - "groupBy": "typeGroup", - "orderBy": "id", - "mergeType": "concatenate" - }, - } - - logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}") - - try: - # Extract content with chunking - extractionResult = self.extractionService.extractContent(documents, extractionOptions) - - if not isinstance(extractionResult, list): - return {"metadata": {"title": "Error Document"}, "sections": []} - - # Process chunks with proper mapping - logger.info(f"Processing {len(extractionResult)} chunks with custom prompt") - logger.debug(f"Custom prompt preview: {custom_prompt[:200]}...") - - # Debug: Show what content is being processed (before filtering) - for i, ec in enumerate(extractionResult): - if hasattr(ec, 'parts'): - for j, part in enumerate(ec.parts): - if not (hasattr(part, 'data') and part.data): - # Check if this is an empty container chunk (which is expected) - part_type = getattr(part, 'typeGroup', None) - part_mime = getattr(part, 'mimeType', '') - - is_empty_container = ( - part_type == "container" and - part_mime and - 'document' in part_mime.lower() - ) - - if not is_empty_container: - logger.warning(f"Part {j} has no data - typeGroup='{part_type}', mimeType='{part_mime}'") - - chunkResults = await self._processChunksWithMapping(extractionResult, custom_prompt, options, generate_json=True) - - # Debug: Show what chunks were actually processed (after filtering) - logger.info(f"After filtering: {len(chunkResults)} chunks will be processed") - - # Merge with JSON mode - mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options) - - # Debug: Show what the AI actually returned - logger.info(f"AI returned document with keys: {list(mergedJsonDocument.keys())}") - if 'documents' in mergedJsonDocument: - logger.info(f"Number of documents: {len(mergedJsonDocument['documents'])}") - elif 'sections' in mergedJsonDocument: - logger.info(f"Number of sections: {len(mergedJsonDocument['sections'])}") - - return mergedJsonDocument - - except Exception as e: - logger.error(f"Error in per-chunk JSON processing: {str(e)}") - return {"metadata": {"title": "Error Document"}, "sections": []} - - async def _callAiJson( - self, - prompt: str, - documents: Optional[List[ChatDocument]], - options: AiCallOptions - ) -> Dict[str, Any]: - """ - Handle AI calls with document processing for JSON output. - Returns structured JSON document instead of text. - """ - # Ensure aiObjects is initialized - await self._ensureAiObjectsInitialized() - - # Process documents with JSON merging - return await self._processDocumentsPerChunkJson(documents, prompt, options) - - - - def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]: - """ - Get model capabilities for content processing, including appropriate size limits for chunking. - """ - # Estimate total content size - prompt_size = len(prompt.encode('utf-8')) - document_size = 0 - if documents: - # Rough estimate of document content size - for doc in documents: - document_size += doc.fileSize or 0 - - total_size = prompt_size + document_size - - # Use AiObjects to select the best model for this content size - # We'll simulate the model selection by checking available models - from modules.interfaces.interfaceAiObjects import aiModels - - # Find the best model for this content size and operation - best_model = None - best_context_length = 0 - - for model_name, model_info in aiModels.items(): - context_length = model_info.get("contextLength", 0) - - # Skip models with no context length or too small for content - if context_length == 0: - continue - - # Check if model supports the operation type - capabilities = model_info.get("capabilities", []) - if options.operationType == OperationType.IMAGE_ANALYSIS and "image_analysis" not in capabilities: - continue - elif options.operationType == OperationType.IMAGE_GENERATION and "image_generation" not in capabilities: - continue - elif options.operationType == OperationType.WEB_RESEARCH and "web_search" not in capabilities: - continue - elif "text_generation" not in capabilities: - continue - - # Prefer models that can handle the content without chunking, but allow chunking if needed - if context_length >= total_size * 0.8: # 80% of content size - if context_length > best_context_length: - best_model = model_info - best_context_length = context_length - elif best_model is None: # Fallback to largest available model - if context_length > best_context_length: - best_model = model_info - best_context_length = context_length - - # Fallback to a reasonable default if no model found - if best_model is None: - best_model = { - "contextLength": 128000, # GPT-4o default - "llmName": "gpt-4o" - } - - # Calculate appropriate sizes - # Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters) - context_length_bytes = int(best_model["contextLength"] * 4) - max_context_bytes = int(context_length_bytes * 0.9) # 90% of context length - text_chunk_size = int(max_context_bytes * 0.7) # 70% of max context for text chunks - image_chunk_size = int(max_context_bytes * 0.8) # 80% of max context for image chunks - - logger.debug(f"Selected model: {best_model.get('llmName', 'unknown')} with context length: {best_model['contextLength']}") - logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes") - logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes") - - return { - "maxContextBytes": max_context_bytes, - "textChunkSize": text_chunk_size, - "imageChunkSize": image_chunk_size - } - - def _getModelsForOperation(self, operation_type: str, options: AiCallOptions) -> List[ModelCapabilities]: - """ - Get models capable of handling the specific operation with capability filtering. - """ - # Use the actual AI objects model selection instead of hardcoded default - if hasattr(self, 'aiObjects') and self.aiObjects: - # Let AiObjects handle the model selection - return [] - else: - # Fallback to default model if AiObjects not available - default_model = ModelCapabilities( - name="default", - maxTokens=4000, - capabilities=["text", "reasoning"] if operation_type == "planning" else ["text"], - costPerToken=0.001, - processingTime=1.0, - isAvailable=True - ) - return [default_model] - - def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str: - """ - Build full prompt by replacing placeholders with their content. - Uses the new {{KEY:placeholder}} format. - """ - if not placeholders: - return prompt - - full_prompt = prompt - for placeholder, content in placeholders.items(): - # Replace both old format {{placeholder}} and new format {{KEY:placeholder}} - full_prompt = full_prompt.replace(f"{{{{{placeholder}}}}}", content) - full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", content) - - return full_prompt - - def _writeTraceLog(self, contextText: str, data: Any) -> None: - """Write raw data to the central trace log file without truncation.""" - try: - import os - import json - from datetime import datetime, UTC - # Only write if logger is in debug mode - if logger.level > logging.DEBUG: - return - # Get log directory from configuration via service center if possible - logDir = None - try: - logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./") - except Exception: - pass - if not logDir: - logDir = "./" - if not os.path.isabs(logDir): - # Make it relative to gateway directory - gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - logDir = os.path.join(gatewayDir, logDir) - os.makedirs(logDir, exist_ok=True) - traceFile = os.path.join(logDir, "log_trace.log") - timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] - traceEntry = f"[{timestamp}] {contextText}\n" + ("=" * 80) + "\n" - if data is None: - traceEntry += "No data provided\n" - else: - # Prefer exact text; if dict/list, pretty print JSON - try: - if isinstance(data, (dict, list)): - traceEntry += f"JSON Data:\n{json.dumps(data, indent=2, ensure_ascii=False)}\n" - else: - text = str(data) - traceEntry += f"Text Data:\n{text}\n" - except Exception: - traceEntry += f"Data (fallback): {str(data)}\n" - traceEntry += ("=" * 80) + "\n\n" - with open(traceFile, "a", encoding="utf-8") as f: - f.write(traceEntry) - except Exception: - # Swallow to avoid recursive logging issues - pass - - def _writeAiResponseDebug(self, label: str, content: str, partIndex: int = 1, modelName: str = None, continuation: bool = None) -> None: - """Persist raw AI response parts for debugging under test-chat/ai - only if debug enabled.""" - try: - # Check if debug logging is enabled - debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) - if not debug_enabled: - return - - import os - from datetime import datetime, UTC - # Base dir: gateway/test-chat/ai (go up 4 levels from this file) - # .../gateway/modules/services/serviceAi/mainServiceAi.py -> up to gateway root - gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) - outDir = os.path.join(gatewayDir, 'test-chat', 'ai') - os.makedirs(outDir, exist_ok=True) - ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3] - suffix = [] - if partIndex is not None: - suffix.append(f"part{partIndex}") - if continuation is not None: - suffix.append(f"cont_{str(continuation).lower()}") - if modelName: - safeModel = ''.join(c if c.isalnum() or c in ('-', '_') else '-' for c in modelName) - suffix.append(safeModel) - suffixStr = ('_' + '_'.join(suffix)) if suffix else '' - fname = f"{ts}_{label}{suffixStr}.txt" - fpath = os.path.join(outDir, fname) - with open(fpath, 'w', encoding='utf-8') as f: - f.write(content or '') - except Exception: - # Do not raise; best-effort debug write - pass - - def _exceedsTokenLimit(self, text: str, model: ModelCapabilities, safety_margin: float) -> bool: - """ - Check if text exceeds model token limit with safety margin. - """ - # Simple character-based estimation (4 chars per token) - estimated_tokens = len(text) // 4 - max_tokens = int(model.maxTokens * (1 - safety_margin)) - return estimated_tokens > max_tokens - - def _reducePlanningPrompt( - self, - full_prompt: str, - placeholders: Optional[Dict[str, str]], - model: ModelCapabilities, - options: AiCallOptions - ) -> str: - """ - Reduce planning prompt size by summarizing placeholders while preserving prompt structure. - """ - if not placeholders: - return self._reduceText(full_prompt, 0.7) - - # Reduce placeholders while preserving prompt - reduced_placeholders = {} - for placeholder, content in placeholders.items(): - if len(content) > 1000: # Only reduce long content - reduction_factor = 0.7 - reduced_content = self._reduceText(content, reduction_factor) - reduced_placeholders[placeholder] = reduced_content - else: - reduced_placeholders[placeholder] = content - - return self._buildPromptWithPlaceholders(full_prompt, reduced_placeholders) - - def _reduceTextPrompt( - self, - prompt: str, - context: str, - model: ModelCapabilities, - options: AiCallOptions - ) -> str: - """ - Reduce text prompt size using typeGroup-aware chunking and merging. - """ - max_size = int(model.maxTokens * (1 - options.safetyMargin)) - - if options.compressPrompt: - # Reduce both prompt and context - target_size = max_size - current_size = len(prompt) + len(context) - reduction_factor = (target_size * 0.7) / current_size - - if reduction_factor < 1.0: - prompt = self._reduceText(prompt, reduction_factor) - context = self._reduceText(context, reduction_factor) - else: - # Only reduce context, preserve prompt integrity - max_context_size = max_size - len(prompt) - if len(context) > max_context_size: - reduction_factor = max_context_size / len(context) - context = self._reduceText(context, reduction_factor) - - return prompt + "\n\n" + context if context else prompt - - def _extractTextFromContentParts(self, extracted_content) -> str: - """ - Extract text content from ExtractionService ContentPart objects. - """ - if not extracted_content or not hasattr(extracted_content, 'parts'): - return "" - - text_parts = [] - for part in extracted_content.parts: - if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']: - if hasattr(part, 'data') and part.data: - text_parts.append(part.data) - - return "\n\n".join(text_parts) - - def _reduceText(self, text: str, reduction_factor: float) -> str: - """ - Reduce text size by the specified factor. - """ - if reduction_factor >= 1.0: - return text - - target_length = int(len(text) * reduction_factor) - return text[:target_length] + "... [reduced]" - - async def _analyzePromptIntent(self, prompt: str, ai_service=None) -> Dict[str, Any]: - """Use AI to analyze user prompt and determine processing requirements.""" - if not ai_service: - return {"is_multi_file": False, "strategy": "single", "criteria": None} - - try: - analysis_prompt = f""" -Analyze this user request and determine if it requires multiple file output or single file output. - -User request: "{prompt}" - -Respond with JSON only in this exact format: -{{ - "is_multi_file": true/false, - "strategy": "single|per_entity|by_section|by_criteria|custom", - "criteria": "description of how to split content", - "file_naming_pattern": "suggested pattern for filenames", - "reasoning": "brief explanation of the analysis" -}} - -Consider: -- Does the user want separate files for different entities (customers, products, etc.)? -- Does the user want to split content into multiple documents? -- What would be the most logical way to organize the content? -- What language is the request in? (analyze in the original language) - -Return only the JSON response. -""" - - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType - request_options = AiCallOptions() - request_options.operationType = OperationType.GENERAL - - request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options) - response = await ai_service.aiObjects.call(request) - - if response and response.content: - import json - import re - - # Extract JSON from response - result = response.content.strip() - json_match = re.search(r'\{.*\}', result, re.DOTALL) - if json_match: - result = json_match.group(0) - - analysis = json.loads(result) - return analysis - else: - return {"is_multi_file": False, "strategy": "single", "criteria": None} - - except Exception as e: - logger.warning(f"AI prompt analysis failed: {str(e)}, defaulting to single file") - return {"is_multi_file": False, "strategy": "single", "criteria": None} - - def _validateResponseStructure(self, response: Dict[str, Any], prompt_analysis: Dict[str, Any]) -> bool: - """Validate that AI response matches the expected structure.""" - try: - if not isinstance(response, dict): - logger.warning(f"Response validation failed: Response is not a dict, got {type(response)}") - return False - - # Check for multi-file structure - if prompt_analysis.get("is_multi_file", False): - has_documents = "documents" in response - is_documents_list = isinstance(response.get("documents"), list) - logger.info(f"Multi-file validation: has_documents={has_documents}, is_documents_list={is_documents_list}") - if has_documents and is_documents_list: - logger.info(f"Multi-file validation passed: {len(response['documents'])} documents found") - else: - logger.warning(f"Multi-file validation failed: documents key present={has_documents}, documents is list={is_documents_list}") - logger.warning(f"Available keys: {list(response.keys())}") - return has_documents and is_documents_list - else: - has_sections = "sections" in response - is_sections_list = isinstance(response.get("sections"), list) - logger.info(f"Single-file validation: has_sections={has_sections}, is_sections_list={is_sections_list}") - return has_sections and is_sections_list - except Exception as e: - logger.warning(f"Response validation failed with exception: {str(e)}") - return False - - async def _callAiWithDocumentGeneration( - self, - prompt: str, - documents: Optional[List[ChatDocument]], - options: AiCallOptions, - outputFormat: str, - title: Optional[str] - ) -> Dict[str, Any]: - """ - Handle AI calls with document generation in specific output format. - Now supports both single-file and multi-file generation. - - Args: - prompt: The main prompt for the AI call - documents: Optional list of documents to process - options: AI call configuration options - outputFormat: Target output format (html, pdf, docx, txt, md, json, csv, xlsx) - title: Optional title for generated documents - - Returns: - Dict with generated documents and metadata - """ - try: - # Use AI to analyze prompt intent - prompt_analysis = await self._analyzePromptIntent(prompt, self) - logger.info(f"Prompt analysis result: {prompt_analysis}") - - if prompt_analysis.get("is_multi_file", False): - return await self._callAiWithMultiFileGeneration( - prompt, documents, options, outputFormat, title, prompt_analysis - ) - else: - return await self._callAiWithSingleFileGeneration( - prompt, documents, options, outputFormat, title - ) - - except Exception as e: - logger.error(f"Error in document generation: {str(e)}") - return { - "success": False, - "error": str(e), - "content": "", - "rendered_content": "", - "mime_type": "text/plain", - "filename": f"error_{outputFormat}", - "format": outputFormat, - "title": title or "Error", - "documents": [] - } - - async def _callAiWithSingleFileGeneration( - self, - prompt: str, - documents: Optional[List[ChatDocument]], - options: AiCallOptions, - outputFormat: str, - title: Optional[str] - ) -> Dict[str, Any]: - """Handle single-file document generation (existing functionality).""" - try: - # Get format-specific extraction prompt from generation service - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generation_service = GenerationService(self.services) - - # Use default title if not provided - if not title: - title = "AI Generated Document" - - # Get format-specific extraction prompt - extractionPrompt = await generation_service.getExtractionPrompt( - outputFormat=outputFormat, - userPrompt=prompt, - title=title, - aiService=self - ) - - # Process documents with format-specific prompt using JSON mode - # This ensures structured JSON output instead of text - aiResponseJson = await self._callAiJson(extractionPrompt, documents, options) - - # Validate JSON response - if not isinstance(aiResponseJson, dict) or "sections" not in aiResponseJson: - raise Exception("AI response is not valid JSON document structure") - - # Generate filename from document metadata - parsedFilename = None - try: - if aiResponseJson.get("metadata", {}).get("title"): - title = aiResponseJson["metadata"]["title"] - # Clean title for filename - import re - parsed = re.sub(r"[^a-zA-Z0-9._-]", "-", title) - parsed = re.sub(r"-+", "-", parsed).strip('-') - if parsed: - parsedFilename = f"{parsed}.{outputFormat}" - except Exception: - parsedFilename = None - - # Render the JSON content to the specified format - renderedContent, mimeType = await generation_service.renderReport( - extractedContent=aiResponseJson, - outputFormat=outputFormat, - title=title, - userPrompt=prompt, - aiService=self - ) - - # Generate meaningful filename (use AI-provided if valid, else fallback) - from datetime import datetime, UTC - timestamp = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") - if parsedFilename and parsedFilename.lower().endswith(f".{outputFormat.lower()}"): - filename = parsedFilename - else: - safeTitle = ''.join(c if c.isalnum() else '-' for c in (title or 'document')).strip('-') - filename = f"{safeTitle or 'document'}-{timestamp}.{outputFormat}" - - # Return structured result with document information - return { - "success": True, - "content": aiResponseJson, # Structured JSON document - "rendered_content": renderedContent, # Formatted content - "mime_type": mimeType, - "filename": filename, - "format": outputFormat, - "title": title, - "documents": [{ - "documentName": filename, - "documentData": renderedContent, - "mimeType": mimeType - }], - "is_multi_file": False - } - - except Exception as e: - logger.error(f"Error in single-file document generation: {str(e)}") - raise - - async def _callAiWithMultiFileGeneration( - self, - prompt: str, - documents: Optional[List[ChatDocument]], - options: AiCallOptions, - outputFormat: str, - title: Optional[str], - prompt_analysis: Dict[str, Any] - ) -> Dict[str, Any]: - """Handle multi-file document generation using AI analysis.""" - try: - # Get multi-file extraction prompt based on AI analysis - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generation_service = GenerationService(self.services) - - # Use default title if not provided - if not title: - title = "AI Generated Documents" - - # Get adaptive extraction prompt - extraction_prompt = await generation_service.getAdaptiveExtractionPrompt( - outputFormat=outputFormat, - userPrompt=prompt, - title=title, - promptAnalysis=prompt_analysis, - aiService=self - ) - - logger.info(f"Adaptive extraction prompt length: {len(extraction_prompt)} characters") - logger.debug(f"Adaptive extraction prompt preview: {extraction_prompt[:500]}...") - - # Process with adaptive JSON schema - use the existing pipeline but with adaptive prompt - logger.info(f"Using adaptive prompt with existing pipeline: {len(extraction_prompt)} chars") - logger.debug(f"Processing documents: {len(documents) if documents else 0} documents") - - # Use the existing pipeline but replace the prompt with our adaptive one - # This ensures proper document processing while using the multi-file prompt - ai_response = await self._processDocumentsPerChunkJsonWithPrompt(documents, extraction_prompt, options) - - logger.info(f"AI response type: {type(ai_response)}") - logger.info(f"AI response keys: {list(ai_response.keys()) if isinstance(ai_response, dict) else 'Not a dict'}") - logger.debug(f"AI response preview: {str(ai_response)[:500]}...") - - # Validate response structure - if not self._validateResponseStructure(ai_response, prompt_analysis): - # Fallback to single-file if multi-file fails - logger.warning(f"Multi-file processing failed - Invalid response structure. Expected multi-file but got: {list(ai_response.keys()) if isinstance(ai_response, dict) else type(ai_response)}") - logger.warning(f"Prompt analysis: {prompt_analysis}") - logger.warning("Falling back to single-file generation") - return await self._callAiWithSingleFileGeneration( - prompt, documents, options, outputFormat, title - ) - - # Process multiple documents - generated_documents = [] - for i, doc_data in enumerate(ai_response.get("documents", [])): - # Transform AI-generated sections to renderer-compatible format - transformed_sections = [] - for section in doc_data.get("sections", []): - # Convert AI format to renderer format - transformed_section = { - "id": section.get("id", f"section_{len(transformed_sections) + 1}"), - "type": section.get("content_type", "paragraph"), - "data": { - "text": "", - "elements": section.get("elements", []) - }, - "order": section.get("order", len(transformed_sections) + 1) - } - - # Extract text from elements for simple text-based sections - if section.get("content_type") in ["paragraph", "heading"]: - text_parts = [] - for element in section.get("elements", []): - if "text" in element: - text_parts.append(element["text"]) - transformed_section["data"]["text"] = "\n".join(text_parts) - - transformed_sections.append(transformed_section) - - # Create complete document structure for rendering - complete_document = { - "metadata": { - "title": doc_data["title"], - "source_document": "multi_file_generation", - "document_id": doc_data.get("id", f"doc_{i+1}"), - "filename": doc_data.get("filename", f"document_{i+1}"), - "split_strategy": prompt_analysis.get("strategy", "custom") - }, - "sections": transformed_sections, - "summary": f"Generated document: {doc_data['title']}", - "tags": ["multi_file", "ai_generated"] - } - - rendered_content, mime_type = await generation_service.renderReport( - extractedContent=complete_document, - outputFormat=outputFormat, - title=doc_data["title"], - userPrompt=prompt, - aiService=self - ) - - # Generate proper filename with correct extension - base_filename = doc_data.get("filename", f"document_{i+1}") - # Remove any existing extension and add the correct one - if '.' in base_filename: - base_filename = base_filename.rsplit('.', 1)[0] - - # Add proper extension based on output format - if outputFormat.lower() == "docx": - filename = f"{base_filename}.docx" - elif outputFormat.lower() == "pdf": - filename = f"{base_filename}.pdf" - elif outputFormat.lower() == "html": - filename = f"{base_filename}.html" - else: - filename = f"{base_filename}.{outputFormat}" - - generated_documents.append({ - "documentName": filename, - "documentData": rendered_content, - "mimeType": mime_type - }) - - # Save debug files for multi-file generation - only if debug enabled - debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) - if debug_enabled: - try: - import os - from datetime import datetime, UTC - ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") - debug_root = "./test-chat/ai" - debug_dir = os.path.join(debug_root, f"multifile_output_{ts}") - os.makedirs(debug_dir, exist_ok=True) - - # Save metadata - with open(os.path.join(debug_dir, "metadata.txt"), "w", encoding="utf-8") as f: - f.write(f"title: {title}\n") - f.write(f"format: {outputFormat}\n") - f.write(f"documents_count: {len(generated_documents)}\n") - f.write(f"split_strategy: {prompt_analysis.get('strategy', 'custom')}\n") - f.write(f"prompt_analysis: {prompt_analysis}\n") - - # Save each generated document - for i, doc in enumerate(generated_documents): - doc_filename = doc["documentName"] - doc_data = doc["documentData"] - doc_mime = doc["mimeType"] - - # Determine file extension - if outputFormat.lower() == "docx": - file_ext = ".docx" - elif outputFormat.lower() == "pdf": - file_ext = ".pdf" - elif outputFormat.lower() == "html": - file_ext = ".html" - else: - file_ext = f".{outputFormat}" - - # Save the rendered document - output_path = os.path.join(debug_dir, f"document_{i+1}_{doc_filename}") - - if file_ext in ['.md', '.txt', '.html', '.json', '.csv']: - # Text-based formats - with open(output_path, 'w', encoding='utf-8') as f: - f.write(doc_data) - else: - # Binary formats - decode from base64 if needed - try: - import base64 - doc_bytes = base64.b64decode(doc_data) - with open(output_path, 'wb') as f: - f.write(doc_bytes) - except Exception: - # If not base64, save as text - with open(output_path, 'w', encoding='utf-8') as f: - f.write(doc_data) - - logger.info(f"💾 Debug: Saved multi-file document {i+1}: {output_path}") - - logger.info(f"💾 Debug: Multi-file output saved to: {debug_dir}") - - except Exception as e: - logger.warning(f"Failed to save multi-file debug output: {e}") - - return { - "success": True, - "content": ai_response, - "rendered_content": None, # Not applicable for multi-file - "mime_type": None, # Not applicable for multi-file - "filename": None, # Not applicable for multi-file - "format": outputFormat, - "title": title, - "documents": generated_documents, - "is_multi_file": True, - "split_strategy": prompt_analysis.get("strategy", "custom") - } - - except Exception as e: - logger.error(f"Error in multi-file document generation: {str(e)}") - # Fallback to single-file - return await self._callAiWithSingleFileGeneration( - prompt, documents, options, outputFormat, title - ) - diff --git a/modules/services/serviceAi/subCoreAi.py b/modules/services/serviceAi/subCoreAi.py new file mode 100644 index 00000000..4cd13f8a --- /dev/null +++ b/modules/services/serviceAi/subCoreAi.py @@ -0,0 +1,596 @@ +import logging +from typing import Dict, Any, List, Optional, Tuple, Union +from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument +from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, ModelCapabilities, OperationType, Priority +from modules.interfaces.interfaceAiObjects import AiObjects + +logger = logging.getLogger(__name__) + + +class SubCoreAi: + """Core AI operations including image analysis, text generation, and planning calls.""" + + def __init__(self, services, aiObjects): + """Initialize core AI operations. + + Args: + services: Service center instance for accessing other services + aiObjects: Initialized AiObjects instance + """ + self.services = services + self.aiObjects = aiObjects + + # AI Processing Call + async def callAi( + self, + prompt: str, + documents: Optional[List[ChatDocument]] = None, + placeholders: Optional[List[PromptPlaceholder]] = None, + options: Optional[AiCallOptions] = None, + outputFormat: Optional[str] = None, + title: Optional[str] = None, + documentProcessor=None, + documentGenerator=None + ) -> Union[str, Dict[str, Any]]: + """ + Unified AI call interface that automatically routes to appropriate handler. + + Args: + prompt: The main prompt for the AI call + documents: Optional list of documents to process + placeholders: Optional list of placeholder replacements for planning calls + options: AI call configuration options + outputFormat: Optional output format (html, pdf, docx, txt, md, json, csv, xlsx) for document generation + title: Optional title for generated documents + documentProcessor: Document processing service instance + documentGenerator: Document generation service instance + + Returns: + AI response as string, or dict with documents if outputFormat is specified + + Raises: + Exception: If all available models fail + """ + if options is None: + options = AiCallOptions() + + # Normalize placeholders from List[PromptPlaceholder] + placeholders_dict: Dict[str, str] = {} + placeholders_meta: Dict[str, bool] = {} + if placeholders: + placeholders_dict = {p.label: p.content for p in placeholders} + placeholders_meta = {p.label: bool(getattr(p, 'summaryAllowed', False)) for p in placeholders} + + # Auto-determine call type based on documents and operation type + call_type = self._determineCallType(documents, options.operationType) + options.callType = call_type + + try: + # Build the full prompt that will be sent to AI + if placeholders: + full_prompt = prompt + for p in placeholders: + placeholder = f"{{{{KEY:{p.label}}}}}" + full_prompt = full_prompt.replace(placeholder, p.content) + else: + full_prompt = prompt + + self._writeAiResponseDebug( + label='ai_prompt_debug', + content=full_prompt, + partIndex=1, + modelName=None, + continuation=False + ) + except Exception: + pass + + # Handle document generation with specific output format + if outputFormat and documentGenerator: + result = await documentGenerator.callAiWithDocumentGeneration(prompt, documents, options, outputFormat, title) + # Log AI response for debugging + try: + if isinstance(result, dict) and 'content' in result: + self._writeAiResponseDebug( + label='ai_document_generation', + content=result['content'], + partIndex=1, + modelName=None, # Document generation doesn't return model info + continuation=False + ) + except Exception: + pass + return result + + if call_type == "planning": + result = await self._callAiPlanning(prompt, placeholders_dict, placeholders_meta, options) + # Log AI response for debugging + try: + self._writeAiResponseDebug( + label='ai_planning', + content=result or "", + partIndex=1, + modelName=None, # Planning doesn't return model info + continuation=False + ) + except Exception: + pass + return result + else: + # Set processDocumentsIndividually from the legacy parameter if not set in options + if options.processDocumentsIndividually is None and documents: + options.processDocumentsIndividually = False # Default to batch processing + + # For text calls, we need to build the full prompt with placeholders here + # since _callAiText doesn't handle placeholders directly + if placeholders_dict: + full_prompt = self._buildPromptWithPlaceholders(prompt, placeholders_dict) + else: + full_prompt = prompt + + if documentProcessor: + result = await documentProcessor.callAiText(full_prompt, documents, options) + else: + # Fallback to direct AI call if no document processor available + request = AiCallRequest( + prompt=full_prompt, + context="", + options=options + ) + response = await self.aiObjects.call(request) + result = response.content + + # Log AI response for debugging (additional logging for text calls) + try: + self._writeAiResponseDebug( + label='ai_text_main', + content=result or "", + partIndex=1, + modelName=None, # Text calls already log internally + continuation=False + ) + except Exception: + pass + return result + + # AI Image Analysis + async def readImage( + self, + prompt: str, + imageData: Union[str, bytes], + mimeType: str = None, + options: Optional[AiCallOptions] = None, + ) -> str: + """Call AI for image analysis using interface.callImage().""" + try: + # Check if imageData is valid + if not imageData: + error_msg = "No image data provided" + self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE") + logger.error(f"Error in AI image analysis: {error_msg}") + return f"Error: {error_msg}" + + self.services.utils.debugLogToFile(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}", "AI_SERVICE") + logger.info(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}") + + # Always use IMAGE_ANALYSIS operation type for image processing + if options is None: + options = AiCallOptions(operationType=OperationType.IMAGE_ANALYSIS) + else: + # Override the operation type to ensure image analysis + options.operationType = OperationType.IMAGE_ANALYSIS + + self.services.utils.debugLogToFile(f"Calling aiObjects.callImage with operationType: {options.operationType}", "AI_SERVICE") + logger.info(f"Calling aiObjects.callImage with operationType: {options.operationType}") + result = await self.aiObjects.callImage(prompt, imageData, mimeType, options) + + # Debug the result + self.services.utils.debugLogToFile(f"Raw AI result type: {type(result)}, value: {repr(result)}", "AI_SERVICE") + + # Check if result is valid + if not result or (isinstance(result, str) and not result.strip()): + error_msg = f"No response from AI image analysis (result: {repr(result)})" + self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE") + logger.error(f"Error in AI image analysis: {error_msg}") + return f"Error: {error_msg}" + + self.services.utils.debugLogToFile(f"callImage returned: {result[:200]}..." if len(result) > 200 else result, "AI_SERVICE") + logger.info(f"callImage returned: {result[:200]}..." if len(result) > 200 else result) + return result + except Exception as e: + self.services.utils.debugLogToFile(f"Error in AI image analysis: {str(e)}", "AI_SERVICE") + logger.error(f"Error in AI image analysis: {str(e)}") + return f"Error: {str(e)}" + + # AI Image Generation + async def generateImage( + self, + prompt: str, + size: str = "1024x1024", + quality: str = "standard", + style: str = "vivid", + options: Optional[AiCallOptions] = None, + ) -> Dict[str, Any]: + """Generate an image using AI using interface.generateImage().""" + try: + return await self.aiObjects.generateImage(prompt, size, quality, style, options) + except Exception as e: + logger.error(f"Error in AI image generation: {str(e)}") + return {"success": False, "error": str(e)} + + def _determineCallType(self, documents: Optional[List[ChatDocument]], operation_type: str) -> str: + """ + Determine call type based on documents and operation type. + + Criteria: no documents AND operationType is "generate_plan" -> planning + All other cases -> text + """ + has_documents = documents is not None and len(documents) > 0 + is_planning_operation = operation_type == OperationType.GENERATE_PLAN + + if not has_documents and is_planning_operation: + return "planning" + else: + return "text" + + async def _callAiPlanning( + self, + prompt: str, + placeholders: Optional[Dict[str, str]], + placeholdersMeta: Optional[Dict[str, bool]], + options: AiCallOptions + ) -> str: + """ + Handle planning calls with placeholder system and selective summarization. + """ + # Build full prompt with placeholders; if too large, summarize summaryAllowed placeholders proportionally + effective_placeholders = placeholders or {} + full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders) + + if options.compressPrompt and placeholdersMeta: + # Determine model capacity + try: + caps = self._getModelCapabilitiesForContent(full_prompt, None, options) + max_bytes = caps.get("maxContextBytes", len(full_prompt.encode("utf-8"))) + except Exception: + max_bytes = len(full_prompt.encode("utf-8")) + + current_bytes = len(full_prompt.encode("utf-8")) + if current_bytes > max_bytes: + # Compute total bytes contributed by allowed placeholders (approximate by content length) + allowed_labels = [l for l, allow in placeholdersMeta.items() if allow] + allowed_sizes = {l: len((effective_placeholders.get(l) or "").encode("utf-8")) for l in allowed_labels} + total_allowed = sum(allowed_sizes.values()) + + overage = current_bytes - max_bytes + if total_allowed > 0 and overage > 0: + # Target total for allowed after reduction + target_allowed = max(total_allowed - overage, 0) + # Global ratio to apply across allowed placeholders + ratio = target_allowed / total_allowed if total_allowed > 0 else 1.0 + ratio = max(0.0, min(1.0, ratio)) + + reduced: Dict[str, str] = {} + for label, content in effective_placeholders.items(): + if label in allowed_labels and isinstance(content, str) and len(content) > 0: + old_len = len(content) + # Reduce by proportional ratio on characters (fallback if empty) + reduction_factor = ratio if old_len > 0 else 1.0 + reduced[label] = self._reduceText(content, reduction_factor) + else: + reduced[label] = content + + effective_placeholders = reduced + full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders) + + # If still slightly over, perform a second-pass fine adjustment with updated ratio + current_bytes = len(full_prompt.encode("utf-8")) + if current_bytes > max_bytes and total_allowed > 0: + overage2 = current_bytes - max_bytes + # Recompute allowed sizes after first reduction + allowed_sizes2 = {l: len((effective_placeholders.get(l) or "").encode("utf-8")) for l in allowed_labels} + total_allowed2 = sum(allowed_sizes2.values()) + if total_allowed2 > 0 and overage2 > 0: + target_allowed2 = max(total_allowed2 - overage2, 0) + ratio2 = target_allowed2 / total_allowed2 + ratio2 = max(0.0, min(1.0, ratio2)) + reduced2: Dict[str, str] = {} + for label, content in effective_placeholders.items(): + if label in allowed_labels and isinstance(content, str) and len(content) > 0: + old_len = len(content) + reduction_factor = ratio2 if old_len > 0 else 1.0 + reduced2[label] = self._reduceText(content, reduction_factor) + else: + reduced2[label] = content + effective_placeholders = reduced2 + full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders) + + + # Make AI call using AiObjects (let it handle model selection) + request = AiCallRequest( + prompt=full_prompt, + context="", # Context is already included in the prompt + options=options + ) + response = await self.aiObjects.call(request) + try: + logger.debug(f"AI model selected (planning): {getattr(response, 'modelName', 'unknown')}") + except Exception: + pass + return response.content + + async def _callAiDirect( + self, + prompt: str, + documents: Optional[List[ChatDocument]], + options: AiCallOptions, + documentProcessor=None + ) -> Dict[str, Any]: + """ + Call AI directly with prompt and documents for JSON output. + Used for multi-file generation - uses the existing generation pipeline. + """ + # Use the existing generation pipeline that already works + # This ensures proper document processing and content extraction + logger.info(f"Using existing generation pipeline for {len(documents) if documents else 0} documents") + + if documentProcessor: + # Process documents with JSON merging using the existing pipeline + result = await documentProcessor.processDocumentsPerChunkJson(documents, prompt, options) + else: + # Fallback to simple AI call + request = AiCallRequest( + prompt=prompt, + context="", + options=options + ) + response = await self.aiObjects.call(request) + result = {"metadata": {"title": "AI Response"}, "sections": [{"id": "section_1", "type": "paragraph", "data": {"text": response.content}}]} + + # Convert single-file result to multi-file format if needed + if "sections" in result and "documents" not in result: + logger.info("Converting single-file result to multi-file format") + # This is a single-file result, convert it to multi-file format + return { + "metadata": result.get("metadata", {"title": "Converted Document"}), + "documents": [{ + "id": "doc_1", + "title": result.get("metadata", {}).get("title", "Document"), + "filename": "document.txt", + "sections": result.get("sections", []) + }] + } + + return result + + def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]: + """ + Get model capabilities for content processing, including appropriate size limits for chunking. + """ + # Estimate total content size + prompt_size = len(prompt.encode('utf-8')) + document_size = 0 + if documents: + # Rough estimate of document content size + for doc in documents: + document_size += doc.fileSize or 0 + + total_size = prompt_size + document_size + + # Use AiObjects to select the best model for this content size + # We'll simulate the model selection by checking available models + from modules.interfaces.interfaceAiObjects import aiModels + + # Find the best model for this content size and operation + best_model = None + best_context_length = 0 + + for model_name, model_info in aiModels.items(): + context_length = model_info.get("contextLength", 0) + + # Skip models with no context length or too small for content + if context_length == 0: + continue + + # Check if model supports the operation type + capabilities = model_info.get("capabilities", []) + if options.operationType == OperationType.IMAGE_ANALYSIS and "image_analysis" not in capabilities: + continue + elif options.operationType == OperationType.IMAGE_GENERATION and "image_generation" not in capabilities: + continue + elif options.operationType == OperationType.WEB_RESEARCH and "web_search" not in capabilities: + continue + elif "text_generation" not in capabilities: + continue + + # Prefer models that can handle the content without chunking, but allow chunking if needed + if context_length >= total_size * 0.8: # 80% of content size + if context_length > best_context_length: + best_model = model_info + best_context_length = context_length + elif best_model is None: # Fallback to largest available model + if context_length > best_context_length: + best_model = model_info + best_context_length = context_length + + # Fallback to a reasonable default if no model found + if best_model is None: + best_model = { + "contextLength": 128000, # GPT-4o default + "llmName": "gpt-4o" + } + + # Calculate appropriate sizes + # Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters) + context_length_bytes = int(best_model["contextLength"] * 4) + max_context_bytes = int(context_length_bytes * 0.9) # 90% of context length + text_chunk_size = int(max_context_bytes * 0.7) # 70% of max context for text chunks + image_chunk_size = int(max_context_bytes * 0.8) # 80% of max context for image chunks + + logger.debug(f"Selected model: {best_model.get('llmName', 'unknown')} with context length: {best_model['contextLength']}") + logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes") + logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes") + + return { + "maxContextBytes": max_context_bytes, + "textChunkSize": text_chunk_size, + "imageChunkSize": image_chunk_size + } + + def _getModelsForOperation(self, operation_type: str, options: AiCallOptions) -> List[ModelCapabilities]: + """ + Get models capable of handling the specific operation with capability filtering. + """ + # Use the actual AI objects model selection instead of hardcoded default + if hasattr(self, 'aiObjects') and self.aiObjects: + # Let AiObjects handle the model selection + return [] + else: + # Fallback to default model if AiObjects not available + default_model = ModelCapabilities( + name="default", + maxTokens=4000, + capabilities=["text", "reasoning"] if operation_type == "planning" else ["text"], + costPerToken=0.001, + processingTime=1.0, + isAvailable=True + ) + return [default_model] + + def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str: + """ + Build full prompt by replacing placeholders with their content. + Uses the new {{KEY:placeholder}} format. + """ + if not placeholders: + return prompt + + full_prompt = prompt + for placeholder, content in placeholders.items(): + # Replace both old format {{placeholder}} and new format {{KEY:placeholder}} + full_prompt = full_prompt.replace(f"{{{{{placeholder}}}}}", content) + full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", content) + + return full_prompt + + def _writeAiResponseDebug(self, label: str, content: str, partIndex: int = 1, modelName: str = None, continuation: bool = None) -> None: + """Persist raw AI response parts for debugging under test-chat/ai - only if debug enabled.""" + try: + # Check if debug logging is enabled + debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if not debug_enabled: + return + + import os + from datetime import datetime, UTC + # Base dir: gateway/test-chat/ai (go up 4 levels from this file) + # .../gateway/modules/services/serviceAi/subCoreAi.py -> up to gateway root + gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + outDir = os.path.join(gatewayDir, 'test-chat', 'ai') + os.makedirs(outDir, exist_ok=True) + ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3] + suffix = [] + if partIndex is not None: + suffix.append(f"part{partIndex}") + if continuation is not None: + suffix.append(f"cont_{str(continuation).lower()}") + if modelName: + safeModel = ''.join(c if c.isalnum() or c in ('-', '_') else '-' for c in modelName) + suffix.append(safeModel) + suffixStr = ('_' + '_'.join(suffix)) if suffix else '' + fname = f"{ts}_{label}{suffixStr}.txt" + fpath = os.path.join(outDir, fname) + with open(fpath, 'w', encoding='utf-8') as f: + f.write(content or '') + except Exception: + # Do not raise; best-effort debug write + pass + + def _exceedsTokenLimit(self, text: str, model: ModelCapabilities, safety_margin: float) -> bool: + """ + Check if text exceeds model token limit with safety margin. + """ + # Simple character-based estimation (4 chars per token) + estimated_tokens = len(text) // 4 + max_tokens = int(model.maxTokens * (1 - safety_margin)) + return estimated_tokens > max_tokens + + def _reducePlanningPrompt( + self, + full_prompt: str, + placeholders: Optional[Dict[str, str]], + model: ModelCapabilities, + options: AiCallOptions + ) -> str: + """ + Reduce planning prompt size by summarizing placeholders while preserving prompt structure. + """ + if not placeholders: + return self._reduceText(full_prompt, 0.7) + + # Reduce placeholders while preserving prompt + reduced_placeholders = {} + for placeholder, content in placeholders.items(): + if len(content) > 1000: # Only reduce long content + reduction_factor = 0.7 + reduced_content = self._reduceText(content, reduction_factor) + reduced_placeholders[placeholder] = reduced_content + else: + reduced_placeholders[placeholder] = content + + return self._buildPromptWithPlaceholders(full_prompt, reduced_placeholders) + + def _reduceTextPrompt( + self, + prompt: str, + context: str, + model: ModelCapabilities, + options: AiCallOptions + ) -> str: + """ + Reduce text prompt size using typeGroup-aware chunking and merging. + """ + max_size = int(model.maxTokens * (1 - options.safetyMargin)) + + if options.compressPrompt: + # Reduce both prompt and context + target_size = max_size + current_size = len(prompt) + len(context) + reduction_factor = (target_size * 0.7) / current_size + + if reduction_factor < 1.0: + prompt = self._reduceText(prompt, reduction_factor) + context = self._reduceText(context, reduction_factor) + else: + # Only reduce context, preserve prompt integrity + max_context_size = max_size - len(prompt) + if len(context) > max_context_size: + reduction_factor = max_context_size / len(context) + context = self._reduceText(context, reduction_factor) + + return prompt + "\n\n" + context if context else prompt + + def _extractTextFromContentParts(self, extracted_content) -> str: + """ + Extract text content from ExtractionService ContentPart objects. + """ + if not extracted_content or not hasattr(extracted_content, 'parts'): + return "" + + text_parts = [] + for part in extracted_content.parts: + if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']: + if hasattr(part, 'data') and part.data: + text_parts.append(part.data) + + return "\n\n".join(text_parts) + + def _reduceText(self, text: str, reduction_factor: float) -> str: + """ + Reduce text size by the specified factor. + """ + if reduction_factor >= 1.0: + return text + + target_length = int(len(text) * reduction_factor) + return text[:target_length] + "... [reduced]" diff --git a/modules/services/serviceAi/subDocumentGeneration.py b/modules/services/serviceAi/subDocumentGeneration.py new file mode 100644 index 00000000..750616e4 --- /dev/null +++ b/modules/services/serviceAi/subDocumentGeneration.py @@ -0,0 +1,459 @@ +import logging +from typing import Dict, Any, List, Optional, Tuple, Union +from modules.datamodels.datamodelChat import ChatDocument +from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType + +logger = logging.getLogger(__name__) + + +class SubDocumentGeneration: + """Document generation operations including single-file and multi-file generation.""" + + def __init__(self, services, aiObjects, documentProcessor): + """Initialize document generation service. + + Args: + services: Service center instance for accessing other services + aiObjects: Initialized AiObjects instance + documentProcessor: Document processing service instance + """ + self.services = services + self.aiObjects = aiObjects + self.documentProcessor = documentProcessor + + async def callAiWithDocumentGeneration( + self, + prompt: str, + documents: Optional[List[ChatDocument]], + options: AiCallOptions, + outputFormat: str, + title: Optional[str] + ) -> Dict[str, Any]: + """ + Handle AI calls with document generation in specific output format. + Now supports both single-file and multi-file generation. + + Args: + prompt: The main prompt for the AI call + documents: Optional list of documents to process + options: AI call configuration options + outputFormat: Target output format (html, pdf, docx, txt, md, json, csv, xlsx) + title: Optional title for generated documents + + Returns: + Dict with generated documents and metadata + """ + try: + # Use AI to analyze prompt intent + prompt_analysis = await self._analyzePromptIntent(prompt, self) + logger.info(f"Prompt analysis result: {prompt_analysis}") + + if prompt_analysis.get("is_multi_file", False): + return await self._callAiWithMultiFileGeneration( + prompt, documents, options, outputFormat, title, prompt_analysis + ) + else: + return await self._callAiWithSingleFileGeneration( + prompt, documents, options, outputFormat, title + ) + + except Exception as e: + logger.error(f"Error in document generation: {str(e)}") + return { + "success": False, + "error": str(e), + "content": "", + "rendered_content": "", + "mime_type": "text/plain", + "filename": f"error_{outputFormat}", + "format": outputFormat, + "title": title or "Error", + "documents": [] + } + + async def _callAiWithSingleFileGeneration( + self, + prompt: str, + documents: Optional[List[ChatDocument]], + options: AiCallOptions, + outputFormat: str, + title: Optional[str] + ) -> Dict[str, Any]: + """Handle single-file document generation (existing functionality).""" + try: + # Get format-specific extraction prompt from generation service + from modules.services.serviceGeneration.mainServiceGeneration import GenerationService + generation_service = GenerationService(self.services) + + # Use default title if not provided + if not title: + title = "AI Generated Document" + + # Get format-specific extraction prompt + extractionPrompt = await generation_service.getExtractionPrompt( + outputFormat=outputFormat, + userPrompt=prompt, + title=title, + aiService=self + ) + + # Process documents with format-specific prompt using JSON mode + # This ensures structured JSON output instead of text + aiResponseJson = await self._callAiJson(extractionPrompt, documents, options) + + # Validate JSON response + if not isinstance(aiResponseJson, dict) or "sections" not in aiResponseJson: + raise Exception("AI response is not valid JSON document structure") + + # Generate filename from document metadata + parsedFilename = None + try: + if aiResponseJson.get("metadata", {}).get("title"): + title = aiResponseJson["metadata"]["title"] + # Clean title for filename + import re + parsed = re.sub(r"[^a-zA-Z0-9._-]", "-", title) + parsed = re.sub(r"-+", "-", parsed).strip('-') + if parsed: + parsedFilename = f"{parsed}.{outputFormat}" + except Exception: + parsedFilename = None + + # Render the JSON content to the specified format + renderedContent, mimeType = await generation_service.renderReport( + extractedContent=aiResponseJson, + outputFormat=outputFormat, + title=title, + userPrompt=prompt, + aiService=self + ) + + # Generate meaningful filename (use AI-provided if valid, else fallback) + from datetime import datetime, UTC + timestamp = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + if parsedFilename and parsedFilename.lower().endswith(f".{outputFormat.lower()}"): + filename = parsedFilename + else: + safeTitle = ''.join(c if c.isalnum() else '-' for c in (title or 'document')).strip('-') + filename = f"{safeTitle or 'document'}-{timestamp}.{outputFormat}" + + # Return structured result with document information + return { + "success": True, + "content": aiResponseJson, # Structured JSON document + "rendered_content": renderedContent, # Formatted content + "mime_type": mimeType, + "filename": filename, + "format": outputFormat, + "title": title, + "documents": [{ + "documentName": filename, + "documentData": renderedContent, + "mimeType": mimeType + }], + "is_multi_file": False + } + + except Exception as e: + logger.error(f"Error in single-file document generation: {str(e)}") + raise + + async def _callAiWithMultiFileGeneration( + self, + prompt: str, + documents: Optional[List[ChatDocument]], + options: AiCallOptions, + outputFormat: str, + title: Optional[str], + prompt_analysis: Dict[str, Any] + ) -> Dict[str, Any]: + """Handle multi-file document generation using AI analysis.""" + try: + # Get multi-file extraction prompt based on AI analysis + from modules.services.serviceGeneration.mainServiceGeneration import GenerationService + generation_service = GenerationService(self.services) + + # Use default title if not provided + if not title: + title = "AI Generated Documents" + + # Get adaptive extraction prompt + extraction_prompt = await generation_service.getAdaptiveExtractionPrompt( + outputFormat=outputFormat, + userPrompt=prompt, + title=title, + promptAnalysis=prompt_analysis, + aiService=self + ) + + logger.info(f"Adaptive extraction prompt length: {len(extraction_prompt)} characters") + logger.debug(f"Adaptive extraction prompt preview: {extraction_prompt[:500]}...") + + # Process with adaptive JSON schema - use the existing pipeline but with adaptive prompt + logger.info(f"Using adaptive prompt with existing pipeline: {len(extraction_prompt)} chars") + logger.debug(f"Processing documents: {len(documents) if documents else 0} documents") + + # Use the existing pipeline but replace the prompt with our adaptive one + # This ensures proper document processing while using the multi-file prompt + ai_response = await self.documentProcessor.processDocumentsPerChunkJsonWithPrompt(documents, extraction_prompt, options) + + logger.info(f"AI response type: {type(ai_response)}") + logger.info(f"AI response keys: {list(ai_response.keys()) if isinstance(ai_response, dict) else 'Not a dict'}") + logger.debug(f"AI response preview: {str(ai_response)[:500]}...") + + # Validate response structure + if not self._validateResponseStructure(ai_response, prompt_analysis): + # Fallback to single-file if multi-file fails + logger.warning(f"Multi-file processing failed - Invalid response structure. Expected multi-file but got: {list(ai_response.keys()) if isinstance(ai_response, dict) else type(ai_response)}") + logger.warning(f"Prompt analysis: {prompt_analysis}") + logger.warning("Falling back to single-file generation") + return await self._callAiWithSingleFileGeneration( + prompt, documents, options, outputFormat, title + ) + + # Process multiple documents + generated_documents = [] + for i, doc_data in enumerate(ai_response.get("documents", [])): + # Transform AI-generated sections to renderer-compatible format + transformed_sections = [] + for section in doc_data.get("sections", []): + # Convert AI format to renderer format + transformed_section = { + "id": section.get("id", f"section_{len(transformed_sections) + 1}"), + "type": section.get("content_type", "paragraph"), + "data": { + "text": "", + "elements": section.get("elements", []) + }, + "order": section.get("order", len(transformed_sections) + 1) + } + + # Extract text from elements for simple text-based sections + if section.get("content_type") in ["paragraph", "heading"]: + text_parts = [] + for element in section.get("elements", []): + if "text" in element: + text_parts.append(element["text"]) + transformed_section["data"]["text"] = "\n".join(text_parts) + + transformed_sections.append(transformed_section) + + # Create complete document structure for rendering + complete_document = { + "metadata": { + "title": doc_data["title"], + "source_document": "multi_file_generation", + "document_id": doc_data.get("id", f"doc_{i+1}"), + "filename": doc_data.get("filename", f"document_{i+1}"), + "split_strategy": prompt_analysis.get("strategy", "custom") + }, + "sections": transformed_sections, + "summary": f"Generated document: {doc_data['title']}", + "tags": ["multi_file", "ai_generated"] + } + + rendered_content, mime_type = await generation_service.renderReport( + extractedContent=complete_document, + outputFormat=outputFormat, + title=doc_data["title"], + userPrompt=prompt, + aiService=self + ) + + # Generate proper filename with correct extension + base_filename = doc_data.get("filename", f"document_{i+1}") + # Remove any existing extension and add the correct one + if '.' in base_filename: + base_filename = base_filename.rsplit('.', 1)[0] + + # Add proper extension based on output format + if outputFormat.lower() == "docx": + filename = f"{base_filename}.docx" + elif outputFormat.lower() == "pdf": + filename = f"{base_filename}.pdf" + elif outputFormat.lower() == "html": + filename = f"{base_filename}.html" + else: + filename = f"{base_filename}.{outputFormat}" + + generated_documents.append({ + "documentName": filename, + "documentData": rendered_content, + "mimeType": mime_type + }) + + # Save debug files for multi-file generation - only if debug enabled + debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if debug_enabled: + try: + import os + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + debug_dir = os.path.join(debug_root, f"multifile_output_{ts}") + os.makedirs(debug_dir, exist_ok=True) + + # Save metadata + with open(os.path.join(debug_dir, "metadata.txt"), "w", encoding="utf-8") as f: + f.write(f"title: {title}\n") + f.write(f"format: {outputFormat}\n") + f.write(f"documents_count: {len(generated_documents)}\n") + f.write(f"split_strategy: {prompt_analysis.get('strategy', 'custom')}\n") + f.write(f"prompt_analysis: {prompt_analysis}\n") + + # Save each generated document + for i, doc in enumerate(generated_documents): + doc_filename = doc["documentName"] + doc_data = doc["documentData"] + doc_mime = doc["mimeType"] + + # Determine file extension + if outputFormat.lower() == "docx": + file_ext = ".docx" + elif outputFormat.lower() == "pdf": + file_ext = ".pdf" + elif outputFormat.lower() == "html": + file_ext = ".html" + else: + file_ext = f".{outputFormat}" + + # Save the rendered document + output_path = os.path.join(debug_dir, f"document_{i+1}_{doc_filename}") + + if file_ext in ['.md', '.txt', '.html', '.json', '.csv']: + # Text-based formats + with open(output_path, 'w', encoding='utf-8') as f: + f.write(doc_data) + else: + # Binary formats - decode from base64 if needed + try: + import base64 + doc_bytes = base64.b64decode(doc_data) + with open(output_path, 'wb') as f: + f.write(doc_bytes) + except Exception: + # If not base64, save as text + with open(output_path, 'w', encoding='utf-8') as f: + f.write(doc_data) + + logger.info(f"💾 Debug: Saved multi-file document {i+1}: {output_path}") + + logger.info(f"💾 Debug: Multi-file output saved to: {debug_dir}") + + except Exception as e: + logger.warning(f"Failed to save multi-file debug output: {e}") + + return { + "success": True, + "content": ai_response, + "rendered_content": None, # Not applicable for multi-file + "mime_type": None, # Not applicable for multi-file + "filename": None, # Not applicable for multi-file + "format": outputFormat, + "title": title, + "documents": generated_documents, + "is_multi_file": True, + "split_strategy": prompt_analysis.get("strategy", "custom") + } + + except Exception as e: + logger.error(f"Error in multi-file document generation: {str(e)}") + # Fallback to single-file + return await self._callAiWithSingleFileGeneration( + prompt, documents, options, outputFormat, title + ) + + async def _callAiJson( + self, + prompt: str, + documents: Optional[List[ChatDocument]], + options: AiCallOptions + ) -> Dict[str, Any]: + """ + Handle AI calls with document processing for JSON output. + Returns structured JSON document instead of text. + """ + # Process documents with JSON merging + return await self.documentProcessor.processDocumentsPerChunkJson(documents, prompt, options) + + async def _analyzePromptIntent(self, prompt: str, ai_service=None) -> Dict[str, Any]: + """Use AI to analyze user prompt and determine processing requirements.""" + if not ai_service: + return {"is_multi_file": False, "strategy": "single", "criteria": None} + + try: + analysis_prompt = f""" +Analyze this user request and determine if it requires multiple file output or single file output. + +User request: "{prompt}" + +Respond with JSON only in this exact format: +{{ + "is_multi_file": true/false, + "strategy": "single|per_entity|by_section|by_criteria|custom", + "criteria": "description of how to split content", + "file_naming_pattern": "suggested pattern for filenames", + "reasoning": "brief explanation of the analysis" +}} + +Consider: +- Does the user want separate files for different entities (customers, products, etc.)? +- Does the user want to split content into multiple documents? +- What would be the most logical way to organize the content? +- What language is the request in? (analyze in the original language) + +Return only the JSON response. +""" + + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType + request_options = AiCallOptions() + request_options.operationType = OperationType.GENERAL + + request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options) + response = await ai_service.aiObjects.call(request) + + if response and response.content: + import json + import re + + # Extract JSON from response + result = response.content.strip() + json_match = re.search(r'\{.*\}', result, re.DOTALL) + if json_match: + result = json_match.group(0) + + analysis = json.loads(result) + return analysis + else: + return {"is_multi_file": False, "strategy": "single", "criteria": None} + + except Exception as e: + logger.warning(f"AI prompt analysis failed: {str(e)}, defaulting to single file") + return {"is_multi_file": False, "strategy": "single", "criteria": None} + + def _validateResponseStructure(self, response: Dict[str, Any], prompt_analysis: Dict[str, Any]) -> bool: + """Validate that AI response matches the expected structure.""" + try: + if not isinstance(response, dict): + logger.warning(f"Response validation failed: Response is not a dict, got {type(response)}") + return False + + # Check for multi-file structure + if prompt_analysis.get("is_multi_file", False): + has_documents = "documents" in response + is_documents_list = isinstance(response.get("documents"), list) + logger.info(f"Multi-file validation: has_documents={has_documents}, is_documents_list={is_documents_list}") + if has_documents and is_documents_list: + logger.info(f"Multi-file validation passed: {len(response['documents'])} documents found") + else: + logger.warning(f"Multi-file validation failed: documents key present={has_documents}, documents is list={is_documents_list}") + logger.warning(f"Available keys: {list(response.keys())}") + return has_documents and is_documents_list + else: + has_sections = "sections" in response + is_sections_list = isinstance(response.get("sections"), list) + logger.info(f"Single-file validation: has_sections={has_sections}, is_sections_list={is_sections_list}") + return has_sections and is_sections_list + except Exception as e: + logger.warning(f"Response validation failed with exception: {str(e)}") + return False diff --git a/modules/services/serviceAi/subDocumentProcessing.py b/modules/services/serviceAi/subDocumentProcessing.py new file mode 100644 index 00000000..e9e087d2 --- /dev/null +++ b/modules/services/serviceAi/subDocumentProcessing.py @@ -0,0 +1,1042 @@ +import logging +from typing import Dict, Any, List, Optional, Tuple, Union +from modules.datamodels.datamodelChat import ChatDocument +from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, ModelCapabilities, OperationType, Priority +from modules.datamodels.datamodelExtraction import ChunkResult, ContentExtracted +from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService + +logger = logging.getLogger(__name__) + + +class SubDocumentProcessing: + """Document processing operations including chunking, processing, and merging.""" + + def __init__(self, services, aiObjects): + """Initialize document processing service. + + Args: + services: Service center instance for accessing other services + aiObjects: Initialized AiObjects instance + """ + self.services = services + self.aiObjects = aiObjects + self._extractionService = None + + @property + def extractionService(self): + """Lazy initialization of extraction service.""" + if self._extractionService is None: + logger.info("Lazy initializing ExtractionService...") + self._extractionService = ExtractionService(self.services) + return self._extractionService + + def _calculateMaxContextBytes(self, options: Optional[AiCallOptions]) -> int: + """Calculate maximum context bytes based on model capabilities and options.""" + if options and options.maxContextBytes: + return options.maxContextBytes + + # Default model capabilities (this should be enhanced with actual model registry) + defaultMaxTokens = 4000 + safetyMargin = options.safetyMargin if options else 0.1 + + # Calculate bytes (4 chars per token estimation) + maxContextBytes = int(defaultMaxTokens * (1 - safetyMargin) * 4) + + return maxContextBytes + + async def processDocumentsPerChunk( + self, + documents: List[ChatDocument], + prompt: str, + options: Optional[AiCallOptions] = None + ) -> str: + """ + Process documents with per-chunk AI calls and merge results. + FIXED: Now preserves chunk relationships and document structure. + + Args: + documents: List of ChatDocument objects to process + prompt: AI prompt for processing + options: AI call options + + Returns: + Merged AI results as string with preserved document structure + """ + if not documents: + return "" + + # Get model capabilities for size calculation + model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options) + + # Build extraction options for chunking with intelligent merging + extractionOptions: Dict[str, Any] = { + "prompt": prompt, + "operationType": options.operationType if options else "general", + "processDocumentsIndividually": True, # Process each document separately + "maxSize": model_capabilities["maxContextBytes"], + "chunkAllowed": True, + "textChunkSize": model_capabilities["textChunkSize"], + "imageChunkSize": model_capabilities["imageChunkSize"], + "imageMaxPixels": 1024 * 1024, + "imageQuality": 85, + "mergeStrategy": { + "useIntelligentMerging": True, # Enable intelligent token-aware merging + "modelCapabilities": model_capabilities, + "prompt": prompt, + "groupBy": "typeGroup", + "orderBy": "id", + "mergeType": "concatenate" + }, + } + + logger.debug(f"Per-chunk extraction options: prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}") + + try: + # Extract content with chunking + extractionResult = self.extractionService.extractContent(documents, extractionOptions) + + if not isinstance(extractionResult, list): + return "[Error: No extraction results]" + + # FIXED: Process chunks with proper mapping + chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options) + + # FIXED: Merge with preserved chunk relationships + mergedContent = self._mergeChunkResults(chunkResults, options) + + return mergedContent + + except Exception as e: + logger.error(f"Error in per-chunk processing: {str(e)}") + return f"[Error in per-chunk processing: {str(e)}]" + + async def processDocumentsPerChunkJson( + self, + documents: List[ChatDocument], + prompt: str, + options: Optional[AiCallOptions] = None + ) -> Dict[str, Any]: + """ + Process documents with per-chunk AI calls and merge results in JSON mode. + Returns structured JSON document instead of text. + """ + if not documents: + return {"metadata": {"title": "Empty Document"}, "sections": []} + + # Get model capabilities for size calculation + model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options) + + # Build extraction options for chunking with intelligent merging + extractionOptions: Dict[str, Any] = { + "prompt": prompt, + "operationType": options.operationType if options else "general", + "processDocumentsIndividually": True, # Process each document separately + "maxSize": model_capabilities["maxContextBytes"], + "chunkAllowed": True, + "textChunkSize": model_capabilities["textChunkSize"], + "imageChunkSize": model_capabilities["imageChunkSize"], + "imageMaxPixels": 1024 * 1024, + "imageQuality": 85, + "mergeStrategy": { + "useIntelligentMerging": True, # Enable intelligent token-aware merging + "modelCapabilities": model_capabilities, + "prompt": prompt, + "groupBy": "typeGroup", + "orderBy": "id", + "mergeType": "concatenate" + }, + } + + logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}") + + try: + # Extract content with chunking + extractionResult = self.extractionService.extractContent(documents, extractionOptions) + + if not isinstance(extractionResult, list): + return {"metadata": {"title": "Error Document"}, "sections": []} + + # Process chunks with proper mapping + chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options, generate_json=True) + + # Merge with JSON mode + mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options) + + return mergedJsonDocument + + except Exception as e: + logger.error(f"Error in per-chunk processing (JSON mode): {str(e)}") + return {"metadata": {"title": "Error Document"}, "sections": []} + + async def processDocumentsPerChunkJsonWithPrompt( + self, + documents: List[ChatDocument], + custom_prompt: str, + options: Optional[AiCallOptions] = None + ) -> Dict[str, Any]: + """ + Process documents with per-chunk AI calls and merge results in JSON mode. + Uses a custom prompt instead of the default extraction prompt. + """ + if not documents: + return {"metadata": {"title": "Empty Document"}, "sections": []} + + # Get model capabilities for size calculation + model_capabilities = self._getModelCapabilitiesForContent(custom_prompt, documents, options) + + # Build extraction options for chunking with intelligent merging + extractionOptions: Dict[str, Any] = { + "prompt": custom_prompt, # Use the custom prompt instead of default + "operationType": options.operationType if options else "general", + "processDocumentsIndividually": True, # Process each document separately + "maxSize": model_capabilities["maxContextBytes"], + "chunkAllowed": True, + "textChunkSize": model_capabilities["textChunkSize"], + "imageChunkSize": model_capabilities["imageChunkSize"], + "imageMaxPixels": 1024 * 1024, + "imageQuality": 85, + "mergeStrategy": { + "useIntelligentMerging": True, # Enable intelligent token-aware merging + "modelCapabilities": model_capabilities, + "prompt": custom_prompt, # Use the custom prompt + "groupBy": "typeGroup", + "orderBy": "id", + "mergeType": "concatenate" + }, + } + + logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}") + + try: + # Extract content with chunking + extractionResult = self.extractionService.extractContent(documents, extractionOptions) + + if not isinstance(extractionResult, list): + return {"metadata": {"title": "Error Document"}, "sections": []} + + # Process chunks with proper mapping + logger.info(f"Processing {len(extractionResult)} chunks with custom prompt") + logger.debug(f"Custom prompt preview: {custom_prompt[:200]}...") + + # Debug: Show what content is being processed (before filtering) + for i, ec in enumerate(extractionResult): + if hasattr(ec, 'parts'): + for j, part in enumerate(ec.parts): + if not (hasattr(part, 'data') and part.data): + # Check if this is an empty container chunk (which is expected) + part_type = getattr(part, 'typeGroup', None) + part_mime = getattr(part, 'mimeType', '') + + is_empty_container = ( + part_type == "container" and + part_mime and + 'document' in part_mime.lower() + ) + + if not is_empty_container: + logger.warning(f"Part {j} has no data - typeGroup='{part_type}', mimeType='{part_mime}'") + + chunkResults = await self._processChunksWithMapping(extractionResult, custom_prompt, options, generate_json=True) + + # Debug: Show what chunks were actually processed (after filtering) + logger.info(f"After filtering: {len(chunkResults)} chunks will be processed") + + # Merge with JSON mode + mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options) + + # Debug: Show what the AI actually returned + logger.info(f"AI returned document with keys: {list(mergedJsonDocument.keys())}") + if 'documents' in mergedJsonDocument: + logger.info(f"Number of documents: {len(mergedJsonDocument['documents'])}") + elif 'sections' in mergedJsonDocument: + logger.info(f"Number of sections: {len(mergedJsonDocument['sections'])}") + + return mergedJsonDocument + + except Exception as e: + logger.error(f"Error in per-chunk JSON processing: {str(e)}") + return {"metadata": {"title": "Error Document"}, "sections": []} + + async def callAiText( + self, + prompt: str, + documents: Optional[List[ChatDocument]], + options: AiCallOptions + ) -> str: + """ + Handle text calls with document processing through ExtractionService. + UNIFIED PROCESSING: Always use per-chunk processing for consistency. + """ + # UNIFIED PROCESSING: Always use per-chunk processing for consistency + # This ensures MIME-type checking, chunk mapping, and parallel processing + return await self.processDocumentsPerChunk(documents, prompt, options) + + async def _processChunksWithMapping( + self, + extractionResult: List[ContentExtracted], + prompt: str, + options: Optional[AiCallOptions] = None, + generate_json: bool = False + ) -> List[ChunkResult]: + """Process chunks with proper mapping to preserve relationships.""" + from modules.datamodels.datamodelExtraction import ChunkResult + import asyncio + import time + + # Collect all chunks that need processing with proper indexing + chunks_to_process = [] + chunk_index = 0 + + for ec in extractionResult: + # Get document MIME type from metadata + document_mime_type = None + for part in ec.parts: + if part.metadata and 'documentMimeType' in part.metadata: + document_mime_type = part.metadata['documentMimeType'] + break + + for part in ec.parts: + if part.typeGroup in ("text", "table", "structure", "image", "container", "binary"): + # Skip empty container chunks (they're just metadata containers) + if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0): + logger.debug(f"Skipping empty container chunk: mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}") + continue + + chunks_to_process.append({ + 'part': part, + 'chunk_index': chunk_index, + 'document_id': ec.id, + 'document_mime_type': document_mime_type + }) + chunk_index += 1 + + logger.info(f"Processing {len(chunks_to_process)} chunks with proper mapping") + + # Process chunks in parallel with proper mapping + async def process_single_chunk(chunk_info: Dict) -> ChunkResult: + part = chunk_info['part'] + chunk_index = chunk_info['chunk_index'] + document_id = chunk_info['document_id'] + document_mime_type = chunk_info.get('document_mime_type', part.mimeType) + + start_time = time.time() + + try: + # FIXED: Check MIME type first, then fallback to typeGroup + is_image = ( + (document_mime_type and document_mime_type.startswith('image/')) or + (part.mimeType and part.mimeType.startswith('image/')) or + (part.typeGroup == "image") + ) + + # Debug logging + self.services.utils.debugLogToFile(f"Chunk {chunk_index}: document_mime_type={document_mime_type}, part.mimeType={part.mimeType}, part.typeGroup={part.typeGroup}, is_image={is_image}", "AI_SERVICE") + logger.info(f"Chunk {chunk_index}: document_mime_type={document_mime_type}, part.mimeType={part.mimeType}, part.typeGroup={part.typeGroup}, is_image={is_image}") + + if is_image: + # Use the same extraction prompt for image analysis (contains table JSON format) + self.services.utils.debugLogToFile(f"Processing image chunk {chunk_index}: mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") + + # Check if image data is available + if not part.data: + error_msg = f"No image data available for chunk {chunk_index}" + logger.warning(error_msg) + ai_result = f"Error: {error_msg}" + else: + try: + # Import here to avoid circular imports + from modules.services.serviceAi.subCoreAi import SubCoreAi + core_ai = SubCoreAi(self.services, self.aiObjects) + + ai_result = await core_ai.readImage( + prompt=prompt, + imageData=part.data, + mimeType=part.mimeType, + options=options + ) + + self.services.utils.debugLogToFile(f"Image analysis result for chunk {chunk_index}: length={len(ai_result) if ai_result else 0}, preview={ai_result[:200] if ai_result else 'None'}...", "AI_SERVICE") + + # Check if result is empty or None + if not ai_result or not ai_result.strip(): + logger.warning(f"Image chunk {chunk_index} returned empty response from AI") + ai_result = "No content detected in image" + + except Exception as e: + logger.error(f"Error processing image chunk {chunk_index}: {str(e)}") + ai_result = f"Error analyzing image: {str(e)}" + + # If generating JSON, clean image analysis result + if generate_json: + try: + import json + import re + + # Clean the response - remove markdown code blocks if present + cleaned_result = ai_result.strip() + + # Remove various markdown patterns + if cleaned_result.startswith('```json'): + cleaned_result = re.sub(r'^```json\s*', '', cleaned_result) + cleaned_result = re.sub(r'\s*```$', '', cleaned_result) + elif cleaned_result.startswith('```'): + cleaned_result = re.sub(r'^```\s*', '', cleaned_result) + cleaned_result = re.sub(r'\s*```$', '', cleaned_result) + + # Remove any leading/trailing text that's not JSON + # Look for the first { and last } to extract JSON + first_brace = cleaned_result.find('{') + last_brace = cleaned_result.rfind('}') + + if first_brace != -1 and last_brace != -1 and last_brace > first_brace: + cleaned_result = cleaned_result[first_brace:last_brace + 1] + + # Additional cleaning for common AI response issues + cleaned_result = cleaned_result.strip() + + # Validate JSON + json.loads(cleaned_result) + ai_result = cleaned_result # Use cleaned version + self.services.utils.debugLogToFile(f"Image chunk {chunk_index} JSON validation successful", "AI_SERVICE") + + except json.JSONDecodeError as e: + logger.warning(f"Image chunk {chunk_index} returned invalid JSON: {str(e)}") + logger.warning(f"Raw response was: '{ai_result[:500]}...'") + + # Create fallback JSON with the actual response content (not the error message) + # Use the original AI response content, not the error message + fallback_content = ai_result if ai_result and ai_result.strip() else "No content detected" + + self.services.utils.debugLogToFile(f"IMAGE FALLBACK CONTENT PREVIEW: '{fallback_content[:200]}...'", "AI_SERVICE") + + ai_result = json.dumps({ + "metadata": {"title": f"Image Analysis - Chunk {chunk_index}"}, + "sections": [{ + "id": f"image_section_{chunk_index}", + "type": "paragraph", + "data": {"text": fallback_content} + }] + }) + self.services.utils.debugLogToFile(f"Created fallback JSON for image chunk {chunk_index} with actual content", "AI_SERVICE") + elif part.typeGroup in ("container", "binary"): + # Handle ALL container and binary content generically - let AI process any document type + self.services.utils.debugLogToFile(f"DEBUG: Chunk {chunk_index}: typeGroup={part.typeGroup}, mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") + + # Skip empty container chunks (they're just metadata containers) + if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0): + self.services.utils.debugLogToFile(f"DEBUG: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") + logger.info(f"Chunk {chunk_index}: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}") + # Skip processing this chunk + pass + elif part.mimeType and part.data and len(part.data.strip()) > 0: + # Process any document container as text content + request_options = options if options is not None else AiCallOptions() + request_options.operationType = OperationType.GENERAL + self.services.utils.debugLogToFile(f"EXTRACTION CONTAINER CHUNK {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}", "AI_SERVICE") + logger.info(f"Chunk {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}") + + # Log extraction prompt and context + self.services.utils.debugLogToFile(f"EXTRACTION PROMPT: {prompt}", "AI_SERVICE") + self.services.utils.debugLogToFile(f"EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters", "AI_SERVICE") + + request = AiCallRequest( + prompt=prompt, + context=part.data, + options=request_options + ) + response = await self.aiObjects.call(request) + ai_result = response.content + + # Log extraction response + self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE") + + # Save full extraction prompt and response to debug file - only if debug enabled + debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if debug_enabled: + try: + import os + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + os.makedirs(debug_root, exist_ok=True) + with open(os.path.join(debug_root, f"{ts}_extraction_container_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f: + f.write(f"EXTRACTION PROMPT:\n{prompt}\n\n") + f.write(f"EXTRACTION CONTEXT:\n{part.data if part.data else 'No context'}\n\n") + f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n") + except Exception: + pass + + # If generating JSON, validate the response + if generate_json: + try: + import json + import re + + # Clean the response - remove markdown code blocks if present + cleaned_result = ai_result.strip() + + # Remove various markdown patterns + if cleaned_result.startswith('```json'): + cleaned_result = re.sub(r'^```json\s*', '', cleaned_result) + cleaned_result = re.sub(r'\s*```$', '', cleaned_result) + elif cleaned_result.startswith('```'): + cleaned_result = re.sub(r'^```\s*', '', cleaned_result) + cleaned_result = re.sub(r'\s*```$', '', cleaned_result) + + # Remove any leading/trailing text that's not JSON + # Look for the first { and last } to extract JSON + first_brace = cleaned_result.find('{') + last_brace = cleaned_result.rfind('}') + + if first_brace != -1 and last_brace != -1 and last_brace > first_brace: + cleaned_result = cleaned_result[first_brace:last_brace + 1] + + # Additional cleaning for common AI response issues + cleaned_result = cleaned_result.strip() + + # Validate JSON + json.loads(cleaned_result) + ai_result = cleaned_result # Use cleaned version + + except json.JSONDecodeError as e: + logger.warning(f"Container chunk {chunk_index} ({part.mimeType}) returned invalid JSON: {str(e)}") + logger.warning(f"Raw response was: '{ai_result[:500]}...'") + + # Create fallback JSON with the actual response content (not the error message) + # Use the original AI response content, not the error message + fallback_content = ai_result if ai_result and ai_result.strip() else "No content detected" + + self.services.utils.debugLogToFile(f"FALLBACK CONTENT PREVIEW: '{fallback_content[:200]}...'", "AI_SERVICE") + + ai_result = json.dumps({ + "metadata": {"title": f"Document Analysis - Chunk {chunk_index}"}, + "sections": [{ + "id": f"analysis_section_{chunk_index}", + "type": "paragraph", + "data": {"text": fallback_content} + }] + }) + self.services.utils.debugLogToFile(f"Created fallback JSON for container chunk {chunk_index} with actual content", "AI_SERVICE") + else: + # Skip empty or invalid container/binary content - don't create a result + self.services.utils.debugLogToFile(f"DEBUG: Chunk {chunk_index}: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE") + # Return None to indicate this chunk should be completely skipped + return None + else: + # Ensure options is not None and set correct operation type for text + request_options = options if options is not None else AiCallOptions() + # FIXED: Set operation type to general for text processing + request_options.operationType = OperationType.GENERAL + self.services.utils.debugLogToFile(f"EXTRACTION CHUNK {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}", "AI_SERVICE") + logger.info(f"Chunk {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}") + + # Log extraction context length + self.services.utils.debugLogToFile(f"EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters", "AI_SERVICE") + + # Debug: Log the actual prompt being sent to AI + logger.debug(f"AI PROMPT PREVIEW: {prompt[:300]}...") + logger.debug(f"AI CONTEXT PREVIEW: {part.data[:200] if part.data else 'None'}...") + + request = AiCallRequest( + prompt=prompt, + context=part.data, + options=request_options + ) + response = await self.aiObjects.call(request) + + # Debug: Log what AI actually returned + logger.debug(f"AI RESPONSE PREVIEW: {response.content[:300] if response.content else 'None'}...") + ai_result = response.content + + # Log extraction response length + self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE") + + # Save extraction response to debug file (without verbose prompt) - only if debug enabled + debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if debug_enabled: + try: + import os + from datetime import datetime, UTC + ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") + debug_root = "./test-chat/ai" + os.makedirs(debug_root, exist_ok=True) + with open(os.path.join(debug_root, f"{ts}_extraction_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f: + f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n") + except Exception: + pass + + # If generating JSON, validate the response + if generate_json: + try: + import json + import re + + # Clean the response - remove markdown code blocks and extra formatting + cleaned_result = ai_result.strip() + + # Remove any markdown code block markers (```json, ```, etc.) + cleaned_result = re.sub(r'^```(?:json)?\s*', '', cleaned_result, flags=re.MULTILINE) + cleaned_result = re.sub(r'\s*```\s*$', '', cleaned_result, flags=re.MULTILINE) + + # Remove any remaining ``` markers anywhere in the text + cleaned_result = re.sub(r'```', '', cleaned_result) + + # Try to extract JSON from the response if it's embedded in other text + json_match = re.search(r'\{.*\}', cleaned_result, re.DOTALL) + if json_match: + cleaned_result = json_match.group(0) + + # Validate JSON + json.loads(cleaned_result) + ai_result = cleaned_result # Use cleaned version + + except json.JSONDecodeError as e: + logger.warning(f"Chunk {chunk_index} returned invalid JSON: {str(e)}") + # Create fallback JSON + ai_result = json.dumps({ + "metadata": {"title": "Error Section"}, + "sections": [{ + "id": f"error_section_{chunk_index}", + "type": "paragraph", + "data": {"text": f"Error parsing JSON: {str(e)}"} + }] + }) + + processing_time = time.time() - start_time + + logger.info(f"Chunk {chunk_index} processed: {len(ai_result)} chars in {processing_time:.2f}s") + + return ChunkResult( + originalChunk=part, + aiResult=ai_result, + chunkIndex=chunk_index, + documentId=document_id, + processingTime=processing_time, + metadata={ + "success": True, + "chunkSize": len(part.data) if part.data else 0, + "resultSize": len(ai_result), + "typeGroup": part.typeGroup + } + ) + + except Exception as e: + processing_time = time.time() - start_time + logger.warning(f"Error processing chunk {chunk_index}: {str(e)}") + + return ChunkResult( + originalChunk=part, + aiResult=f"[Error processing chunk: {str(e)}]", + chunkIndex=chunk_index, + documentId=document_id, + processingTime=processing_time, + metadata={ + "success": False, + "error": str(e), + "chunkSize": len(part.data) if part.data else 0, + "typeGroup": part.typeGroup + } + ) + + # Process chunks with concurrency control + max_concurrent = 5 # Default concurrency + if options and hasattr(options, 'maxConcurrentChunks'): + max_concurrent = options.maxConcurrentChunks + elif options and hasattr(options, 'maxParallelChunks'): + max_concurrent = options.maxParallelChunks + + logger.info(f"Processing {len(chunks_to_process)} chunks with max concurrency: {max_concurrent}") + self.services.utils.debugLogToFile(f"DEBUG: Chunks to process: {len(chunks_to_process)}", "AI_SERVICE") + for i, chunk_info in enumerate(chunks_to_process): + self.services.utils.debugLogToFile(f"DEBUG: Chunk {i}: typeGroup={chunk_info['part'].typeGroup}, mimeType={chunk_info['part'].mimeType}, data_length={len(chunk_info['part'].data) if chunk_info['part'].data else 0}", "AI_SERVICE") + + # Create semaphore for concurrency control + semaphore = asyncio.Semaphore(max_concurrent) + + async def process_with_semaphore(chunk_info): + async with semaphore: + return await process_single_chunk(chunk_info) + + # Process all chunks in parallel with concurrency control + tasks = [process_with_semaphore(chunk_info) for chunk_info in chunks_to_process] + self.services.utils.debugLogToFile(f"DEBUG: Created {len(tasks)} tasks for parallel processing", "AI_SERVICE") + chunk_results = await asyncio.gather(*tasks, return_exceptions=True) + self.services.utils.debugLogToFile(f"DEBUG: Got {len(chunk_results)} results from parallel processing", "AI_SERVICE") + + # Handle any exceptions in the gather itself + processed_results = [] + for i, result in enumerate(chunk_results): + if isinstance(result, Exception): + # Create error ChunkResult + chunk_info = chunks_to_process[i] + processed_results.append(ChunkResult( + originalChunk=chunk_info['part'], + aiResult=f"[Error in parallel processing: {str(result)}]", + chunkIndex=chunk_info['chunk_index'], + documentId=chunk_info['document_id'], + processingTime=0.0, + metadata={"success": False, "error": str(result)} + )) + elif result is not None: + # Only add non-None results (skip empty containers) + processed_results.append(result) + + logger.info(f"Completed processing {len(processed_results)} chunks") + return processed_results + + def _mergeChunkResults( + self, + chunkResults: List[ChunkResult], + options: Optional[AiCallOptions] = None + ) -> str: + """Merge chunk results while preserving document structure and chunk order.""" + + if not chunkResults: + return "" + + # Get merging configuration from options + chunk_separator = "\n\n---\n\n" + include_document_headers = True + include_chunk_metadata = False + + if options: + if hasattr(options, 'chunkSeparator'): + chunk_separator = options.chunkSeparator + elif hasattr(options, 'mergeStrategy') and options.mergeStrategy: + chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n---\n\n") + + # Check for enhanced options + if hasattr(options, 'preserveChunkMetadata'): + include_chunk_metadata = options.preserveChunkMetadata + + # Group chunk results by document + results_by_document = {} + for chunk_result in chunkResults: + doc_id = chunk_result.documentId + if doc_id not in results_by_document: + results_by_document[doc_id] = [] + results_by_document[doc_id].append(chunk_result) + + # Sort chunks within each document by chunk index + for doc_id in results_by_document: + results_by_document[doc_id].sort(key=lambda x: x.chunkIndex) + + # Merge results for each document + merged_documents = [] + + for doc_id, doc_chunks in results_by_document.items(): + # Build document header if enabled + doc_header = "" + if include_document_headers: + doc_header = f"\n\n=== DOCUMENT: {doc_id} ===\n\n" + + # Merge chunks for this document + doc_content = "" + for i, chunk_result in enumerate(doc_chunks): + # Add chunk separator (except for first chunk) + if i > 0: + doc_content += chunk_separator + + # Add chunk content with optional metadata + chunk_metadata = chunk_result.metadata + if chunk_metadata.get("success", False): + chunk_content = chunk_result.aiResult + + # Add chunk metadata if enabled + if include_chunk_metadata: + chunk_info = f"[Chunk {chunk_result.chunkIndex} - {chunk_metadata.get('typeGroup', 'unknown')} - {chunk_metadata.get('chunkSize', 0)} chars]" + chunk_content = f"{chunk_info}\n{chunk_content}" + + doc_content += chunk_content + else: + # Handle error chunks + error_msg = f"[ERROR in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}]" + doc_content += error_msg + + merged_documents.append(doc_header + doc_content) + + # Join all documents + final_result = "\n\n".join(merged_documents) + + logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents") + return final_result.strip() + + def _mergeChunkResultsClean( + self, + chunkResults: List[ChunkResult], + options: Optional[AiCallOptions] = None + ) -> str: + """Merge chunk results in CLEAN mode - no debug metadata or document headers.""" + + if not chunkResults: + return "" + + # Get merging configuration from options + chunk_separator = "\n\n" + include_document_headers = False # CLEAN MODE: No document headers + include_chunk_metadata = False # CLEAN MODE: No chunk metadata + + if options: + if hasattr(options, 'chunkSeparator'): + chunk_separator = options.chunkSeparator + elif hasattr(options, 'mergeStrategy') and options.mergeStrategy: + chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n") + + # Group chunk results by document + results_by_document = {} + for chunk_result in chunkResults: + doc_id = chunk_result.documentId + if doc_id not in results_by_document: + results_by_document[doc_id] = [] + results_by_document[doc_id].append(chunk_result) + + # Sort chunks within each document by chunk index + for doc_id in results_by_document: + results_by_document[doc_id].sort(key=lambda x: x.chunkIndex) + + # Merge results for each document in CLEAN mode + merged_documents = [] + + for doc_id, doc_chunks in results_by_document.items(): + # CLEAN MODE: No document headers + doc_header = "" + + # Merge chunks for this document + doc_content = "" + for i, chunk_result in enumerate(doc_chunks): + # Add chunk separator (except for first chunk) + if i > 0: + doc_content += chunk_separator + + # Add chunk content without metadata + chunk_metadata = chunk_result.metadata + if chunk_metadata.get("success", False): + chunk_content = chunk_result.aiResult + + # CLEAN MODE: Skip container/binary chunks entirely + if chunk_content.startswith("[Skipped ") and "content:" in chunk_content: + continue # Skip container/binary chunks in clean mode + + # CLEAN MODE: Skip empty or whitespace-only chunks + if not chunk_content.strip(): + continue # Skip empty chunks in clean mode + + # CLEAN MODE: No chunk metadata + doc_content += chunk_content + else: + # Handle error chunks silently in clean mode + continue + + merged_documents.append(doc_header + doc_content) + + # Join all documents + final_result = "\n\n".join(merged_documents) + + return final_result.strip() + + def _mergeChunkResultsJson( + self, + chunkResults: List[ChunkResult], + options: Optional[AiCallOptions] = None + ) -> Dict[str, Any]: + """Merge chunk results in JSON mode - returns structured JSON document.""" + import json + + if not chunkResults: + return {"metadata": {"title": "Empty Document"}, "sections": []} + + # Group chunk results by document + results_by_document = {} + for chunk_result in chunkResults: + doc_id = chunk_result.documentId + if doc_id not in results_by_document: + results_by_document[doc_id] = [] + results_by_document[doc_id].append(chunk_result) + + # Sort chunks within each document by chunk index + for doc_id in results_by_document: + results_by_document[doc_id].sort(key=lambda x: x.chunkIndex) + + # Merge JSON results for each document + all_documents = [] + all_sections = [] + document_titles = [] + combined_metadata = {"title": "Merged Document", "splitStrategy": "by_section"} + + for doc_id, doc_chunks in results_by_document.items(): + # Process each chunk's JSON result + for chunk_result in doc_chunks: + chunk_metadata = chunk_result.metadata + if chunk_metadata.get("success", False): + try: + # Parse JSON from AI result + chunk_json = json.loads(chunk_result.aiResult) + + # Check if this is a multi-file response (has "documents" key) + if isinstance(chunk_json, dict) and "documents" in chunk_json: + # This is a multi-file response - merge all documents + logger.debug(f"Processing multi-file response from chunk {chunk_result.chunkIndex} with {len(chunk_json['documents'])} documents") + + # Add all documents from this chunk + for doc in chunk_json["documents"]: + # Add chunk context to document + doc["metadata"] = doc.get("metadata", {}) + doc["metadata"]["source_chunk"] = chunk_result.chunkIndex + doc["metadata"]["source_document"] = doc_id + all_documents.append(doc) + + # Update combined metadata + if "metadata" in chunk_json: + combined_metadata.update(chunk_json["metadata"]) + + # Extract sections from single-file response (fallback) + elif isinstance(chunk_json, dict) and "sections" in chunk_json: + for section in chunk_json["sections"]: + # Add document context to section + section["metadata"] = section.get("metadata", {}) + section["metadata"]["source_document"] = doc_id + section["metadata"]["chunk_index"] = chunk_result.chunkIndex + all_sections.append(section) + + # Extract document title + if isinstance(chunk_json, dict) and "metadata" in chunk_json: + title = chunk_json["metadata"].get("title", "") + if title and title not in document_titles: + document_titles.append(title) + + except json.JSONDecodeError as e: + logger.warning(f"Failed to parse JSON from chunk {chunk_result.chunkIndex}: {str(e)}") + # Create a fallback section for invalid JSON + fallback_section = { + "id": f"error_section_{chunk_result.chunkIndex}", + "title": "Error Section", + "content_type": "paragraph", + "elements": [{ + "text": f"Error parsing chunk {chunk_result.chunkIndex}: {str(e)}" + }], + "order": chunk_result.chunkIndex, + "metadata": { + "source_document": doc_id, + "chunk_index": chunk_result.chunkIndex, + "error": str(e) + } + } + all_sections.append(fallback_section) + else: + # Handle error chunks + error_section = { + "id": f"error_section_{chunk_result.chunkIndex}", + "title": "Error Section", + "content_type": "paragraph", + "elements": [{ + "text": f"Error in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}" + }], + "order": chunk_result.chunkIndex, + "metadata": { + "source_document": doc_id, + "chunk_index": chunk_result.chunkIndex, + "error": chunk_metadata.get('error', 'Unknown error') + } + } + all_sections.append(error_section) + + # Sort sections by order + all_sections.sort(key=lambda x: x.get("order", 0)) + + # If we have merged documents from multi-file responses, return them + if all_documents: + logger.info(f"Merged {len(all_documents)} documents from {len(chunkResults)} chunks") + return { + "metadata": combined_metadata, + "documents": all_documents + } + + # Otherwise, create merged document with sections (single-file fallback) + merged_document = { + "metadata": { + "title": document_titles[0] if document_titles else "Merged Document", + "source_documents": list(results_by_document.keys()), + "extraction_method": "ai_json_extraction", + "version": "1.0" + }, + "sections": all_sections, + "summary": f"Merged document from {len(results_by_document)} source documents", + "tags": ["merged", "ai_generated"] + } + + logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents (JSON mode)") + return merged_document + + def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]: + """ + Get model capabilities for content processing, including appropriate size limits for chunking. + """ + # Estimate total content size + prompt_size = len(prompt.encode('utf-8')) + document_size = 0 + if documents: + # Rough estimate of document content size + for doc in documents: + document_size += doc.fileSize or 0 + + total_size = prompt_size + document_size + + # Use AiObjects to select the best model for this content size + # We'll simulate the model selection by checking available models + from modules.interfaces.interfaceAiObjects import aiModels + + # Find the best model for this content size and operation + best_model = None + best_context_length = 0 + + for model_name, model_info in aiModels.items(): + context_length = model_info.get("contextLength", 0) + + # Skip models with no context length or too small for content + if context_length == 0: + continue + + # Check if model supports the operation type + capabilities = model_info.get("capabilities", []) + if options.operationType == OperationType.IMAGE_ANALYSIS and "image_analysis" not in capabilities: + continue + elif options.operationType == OperationType.IMAGE_GENERATION and "image_generation" not in capabilities: + continue + elif options.operationType == OperationType.WEB_RESEARCH and "web_search" not in capabilities: + continue + elif "text_generation" not in capabilities: + continue + + # Prefer models that can handle the content without chunking, but allow chunking if needed + if context_length >= total_size * 0.8: # 80% of content size + if context_length > best_context_length: + best_model = model_info + best_context_length = context_length + elif best_model is None: # Fallback to largest available model + if context_length > best_context_length: + best_model = model_info + best_context_length = context_length + + # Fallback to a reasonable default if no model found + if best_model is None: + best_model = { + "contextLength": 128000, # GPT-4o default + "llmName": "gpt-4o" + } + + # Calculate appropriate sizes + # Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters) + context_length_bytes = int(best_model["contextLength"] * 4) + max_context_bytes = int(context_length_bytes * 0.9) # 90% of context length + text_chunk_size = int(max_context_bytes * 0.7) # 70% of max context for text chunks + image_chunk_size = int(max_context_bytes * 0.8) # 80% of max context for image chunks + + logger.debug(f"Selected model: {best_model.get('llmName', 'unknown')} with context length: {best_model['contextLength']}") + logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes") + logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes") + + return { + "maxContextBytes": max_context_bytes, + "textChunkSize": text_chunk_size, + "imageChunkSize": image_chunk_size + } diff --git a/modules/services/serviceAi/subUtilities.py b/modules/services/serviceAi/subUtilities.py new file mode 100644 index 00000000..0f5bcc4d --- /dev/null +++ b/modules/services/serviceAi/subUtilities.py @@ -0,0 +1,316 @@ +import logging +from typing import Dict, Any, List, Optional, Tuple, Union +from modules.datamodels.datamodelAi import ModelCapabilities, AiCallOptions + +logger = logging.getLogger(__name__) + + +class SubUtilities: + """Utility functions for text processing, debugging, and helper operations.""" + + def __init__(self, services): + """Initialize utilities service. + + Args: + services: Service center instance for accessing other services + """ + self.services = services + + def _writeTraceLog(self, contextText: str, data: Any) -> None: + """Write raw data to the central trace log file without truncation.""" + try: + import os + import json + from datetime import datetime, UTC + # Only write if logger is in debug mode + if logger.level > logging.DEBUG: + return + # Get log directory from configuration via service center if possible + logDir = None + try: + logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./") + except Exception: + pass + if not logDir: + logDir = "./" + if not os.path.isabs(logDir): + # Make it relative to gateway directory + gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + logDir = os.path.join(gatewayDir, logDir) + os.makedirs(logDir, exist_ok=True) + traceFile = os.path.join(logDir, "log_trace.log") + timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] + traceEntry = f"[{timestamp}] {contextText}\n" + ("=" * 80) + "\n" + if data is None: + traceEntry += "No data provided\n" + else: + # Prefer exact text; if dict/list, pretty print JSON + try: + if isinstance(data, (dict, list)): + traceEntry += f"JSON Data:\n{json.dumps(data, indent=2, ensure_ascii=False)}\n" + else: + text = str(data) + traceEntry += f"Text Data:\n{text}\n" + except Exception: + traceEntry += f"Data (fallback): {str(data)}\n" + traceEntry += ("=" * 80) + "\n\n" + with open(traceFile, "a", encoding="utf-8") as f: + f.write(traceEntry) + except Exception: + # Swallow to avoid recursive logging issues + pass + + def _writeAiResponseDebug(self, label: str, content: str, partIndex: int = 1, modelName: str = None, continuation: bool = None) -> None: + """Persist raw AI response parts for debugging under test-chat/ai - only if debug enabled.""" + try: + # Check if debug logging is enabled + debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False) + if not debug_enabled: + return + + import os + from datetime import datetime, UTC + # Base dir: gateway/test-chat/ai (go up 4 levels from this file) + # .../gateway/modules/services/serviceAi/subUtilities.py -> up to gateway root + gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + outDir = os.path.join(gatewayDir, 'test-chat', 'ai') + os.makedirs(outDir, exist_ok=True) + ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3] + suffix = [] + if partIndex is not None: + suffix.append(f"part{partIndex}") + if continuation is not None: + suffix.append(f"cont_{str(continuation).lower()}") + if modelName: + safeModel = ''.join(c if c.isalnum() or c in ('-', '_') else '-' for c in modelName) + suffix.append(safeModel) + suffixStr = ('_' + '_'.join(suffix)) if suffix else '' + fname = f"{ts}_{label}{suffixStr}.txt" + fpath = os.path.join(outDir, fname) + with open(fpath, 'w', encoding='utf-8') as f: + f.write(content or '') + except Exception: + # Do not raise; best-effort debug write + pass + + def _exceedsTokenLimit(self, text: str, model: ModelCapabilities, safety_margin: float) -> bool: + """ + Check if text exceeds model token limit with safety margin. + """ + # Simple character-based estimation (4 chars per token) + estimated_tokens = len(text) // 4 + max_tokens = int(model.maxTokens * (1 - safety_margin)) + return estimated_tokens > max_tokens + + def _reduceText(self, text: str, reduction_factor: float) -> str: + """ + Reduce text size by the specified factor. + """ + if reduction_factor >= 1.0: + return text + + target_length = int(len(text) * reduction_factor) + return text[:target_length] + "... [reduced]" + + def _extractTextFromContentParts(self, extracted_content) -> str: + """ + Extract text content from ExtractionService ContentPart objects. + """ + if not extracted_content or not hasattr(extracted_content, 'parts'): + return "" + + text_parts = [] + for part in extracted_content.parts: + if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']: + if hasattr(part, 'data') and part.data: + text_parts.append(part.data) + + return "\n\n".join(text_parts) + + def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str: + """ + Build full prompt by replacing placeholders with their content. + Uses the new {{KEY:placeholder}} format. + """ + if not placeholders: + return prompt + + full_prompt = prompt + for placeholder, content in placeholders.items(): + # Replace both old format {{placeholder}} and new format {{KEY:placeholder}} + full_prompt = full_prompt.replace(f"{{{{{placeholder}}}}}", content) + full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", content) + + return full_prompt + + def _reducePlanningPrompt( + self, + full_prompt: str, + placeholders: Optional[Dict[str, str]], + model: ModelCapabilities, + options: AiCallOptions + ) -> str: + """ + Reduce planning prompt size by summarizing placeholders while preserving prompt structure. + """ + if not placeholders: + return self._reduceText(full_prompt, 0.7) + + # Reduce placeholders while preserving prompt + reduced_placeholders = {} + for placeholder, content in placeholders.items(): + if len(content) > 1000: # Only reduce long content + reduction_factor = 0.7 + reduced_content = self._reduceText(content, reduction_factor) + reduced_placeholders[placeholder] = reduced_content + else: + reduced_placeholders[placeholder] = content + + return self._buildPromptWithPlaceholders(full_prompt, reduced_placeholders) + + def _reduceTextPrompt( + self, + prompt: str, + context: str, + model: ModelCapabilities, + options: AiCallOptions + ) -> str: + """ + Reduce text prompt size using typeGroup-aware chunking and merging. + """ + max_size = int(model.maxTokens * (1 - options.safetyMargin)) + + if options.compressPrompt: + # Reduce both prompt and context + target_size = max_size + current_size = len(prompt) + len(context) + reduction_factor = (target_size * 0.7) / current_size + + if reduction_factor < 1.0: + prompt = self._reduceText(prompt, reduction_factor) + context = self._reduceText(context, reduction_factor) + else: + # Only reduce context, preserve prompt integrity + max_context_size = max_size - len(prompt) + if len(context) > max_context_size: + reduction_factor = max_context_size / len(context) + context = self._reduceText(context, reduction_factor) + + return prompt + "\n\n" + context if context else prompt + + async def _compressContent(self, content: str, targetSize: int, contentType: str) -> str: + """Compress content to target size.""" + if len(content.encode("utf-8")) <= targetSize: + return content + + try: + compressionPrompt = f""" + Komprimiere den folgenden {contentType} auf maximal {targetSize} Zeichen, + behalte aber alle wichtigen Informationen bei: + + {content} + + Gib nur den komprimierten Inhalt zurück, ohne zusätzliche Erklärungen. + """ + + # Service must not call connectors directly; use simple truncation fallback here + data = content.encode("utf-8") + return data[:targetSize].decode("utf-8", errors="ignore") + "... [truncated]" + except Exception as e: + logger.warning(f"AI compression failed, using truncation: {str(e)}") + return content[:targetSize] + "... [truncated]" + + def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List], options: AiCallOptions) -> Dict[str, int]: + """ + Get model capabilities for content processing, including appropriate size limits for chunking. + """ + # Estimate total content size + prompt_size = len(prompt.encode('utf-8')) + document_size = 0 + if documents: + # Rough estimate of document content size + for doc in documents: + document_size += getattr(doc, 'fileSize', 0) or 0 + + total_size = prompt_size + document_size + + # Use AiObjects to select the best model for this content size + # We'll simulate the model selection by checking available models + from modules.interfaces.interfaceAiObjects import aiModels + + # Find the best model for this content size and operation + best_model = None + best_context_length = 0 + + for model_name, model_info in aiModels.items(): + context_length = model_info.get("contextLength", 0) + + # Skip models with no context length or too small for content + if context_length == 0: + continue + + # Check if model supports the operation type + capabilities = model_info.get("capabilities", []) + from modules.datamodels.datamodelAi import OperationType + if options.operationType == OperationType.IMAGE_ANALYSIS and "image_analysis" not in capabilities: + continue + elif options.operationType == OperationType.IMAGE_GENERATION and "image_generation" not in capabilities: + continue + elif options.operationType == OperationType.WEB_RESEARCH and "web_search" not in capabilities: + continue + elif "text_generation" not in capabilities: + continue + + # Prefer models that can handle the content without chunking, but allow chunking if needed + if context_length >= total_size * 0.8: # 80% of content size + if context_length > best_context_length: + best_model = model_info + best_context_length = context_length + elif best_model is None: # Fallback to largest available model + if context_length > best_context_length: + best_model = model_info + best_context_length = context_length + + # Fallback to a reasonable default if no model found + if best_model is None: + best_model = { + "contextLength": 128000, # GPT-4o default + "llmName": "gpt-4o" + } + + # Calculate appropriate sizes + # Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters) + context_length_bytes = int(best_model["contextLength"] * 4) + max_context_bytes = int(context_length_bytes * 0.9) # 90% of context length + text_chunk_size = int(max_context_bytes * 0.7) # 70% of max context for text chunks + image_chunk_size = int(max_context_bytes * 0.8) # 80% of max context for image chunks + + logger.debug(f"Selected model: {best_model.get('llmName', 'unknown')} with context length: {best_model['contextLength']}") + logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes") + logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes") + + return { + "maxContextBytes": max_context_bytes, + "textChunkSize": text_chunk_size, + "imageChunkSize": image_chunk_size + } + + def _getModelsForOperation(self, operation_type: str, options: AiCallOptions) -> List[ModelCapabilities]: + """ + Get models capable of handling the specific operation with capability filtering. + """ + # Use the actual AI objects model selection instead of hardcoded default + if hasattr(self, 'aiObjects') and self.aiObjects: + # Let AiObjects handle the model selection + return [] + else: + # Fallback to default model if AiObjects not available + default_model = ModelCapabilities( + name="default", + maxTokens=4000, + capabilities=["text", "reasoning"] if operation_type == "planning" else ["text"], + costPerToken=0.001, + processingTime=1.0, + isAvailable=True + ) + return [default_model] diff --git a/modules/services/serviceAi/subWebResearch.py b/modules/services/serviceAi/subWebResearch.py new file mode 100644 index 00000000..953324aa --- /dev/null +++ b/modules/services/serviceAi/subWebResearch.py @@ -0,0 +1,384 @@ +import logging +from typing import Dict, Any, List, Optional, Tuple, Union +from modules.datamodels.datamodelWeb import ( + WebResearchRequest, + WebResearchActionResult, + WebResearchDocumentData, + WebResearchActionDocument, + WebSearchResultItem, +) +from modules.interfaces.interfaceAiObjects import AiObjects +from modules.shared.configuration import APP_CONFIG + +logger = logging.getLogger(__name__) + + +class SubWebResearch: + """Web research operations including search, crawling, and analysis.""" + + def __init__(self, services, aiObjects): + """Initialize web research service. + + Args: + services: Service center instance for accessing other services + aiObjects: Initialized AiObjects instance + """ + self.services = services + self.aiObjects = aiObjects + + async def webResearch(self, request: WebResearchRequest) -> WebResearchActionResult: + """Perform web research using interface functions.""" + try: + logger.info(f"WEB RESEARCH STARTED") + logger.info(f"User Query: {request.user_prompt}") + logger.info(f"Max Results: {request.max_results}, Max Pages: {request.options.max_pages}") + + # Global URL index to track all processed URLs across the entire research session + global_processed_urls = set() + + # Step 1: Find relevant websites - either provided URLs or AI-determined main URLs + logger.info(f"=== STEP 1: INITIAL MAIN URLS LIST ===") + + if request.urls: + # Use provided URLs as initial main URLs + websites = request.urls + logger.info(f"Using provided URLs ({len(websites)}):") + for i, url in enumerate(websites, 1): + logger.info(f" {i}. {url}") + else: + # Use AI to determine main URLs based on user's intention + logger.info(f"AI analyzing user intent: '{request.user_prompt}'") + + # Use AI to generate optimized Tavily search query and search parameters + query_optimizer_prompt = f"""You are a search query optimizer. + + USER QUERY: {request.user_prompt} + + Your task: Create a search query and parameters for the USER QUERY given. + + RULES: + 1. The search query MUST be related to the user query above + 2. Extract key terms from the user query + 3. Determine appropriate country/language based on the query context + 4. Keep search query short (2-6 words) + + Return ONLY this JSON format: + {{ + "user_prompt": "search query based on user query above", + "country": "Full English country name (ISO-3166; map codes via pycountry/i18n-iso-countries)", + "language": "language_code_or_null", + "topic": "general|news|academic_or_null", + "time_range": "d|w|m|y_or_null", + "selection_strategy": "single|multiple|specific_page", + "selection_criteria": "what URLs to prioritize", + "expected_url_patterns": ["pattern1", "pattern2"], + "estimated_result_count": number + }}""" + + # Get AI response for query optimization + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions + ai_request = AiCallRequest( + prompt=query_optimizer_prompt, + options=AiCallOptions() + ) + ai_response_obj = await self.aiObjects.call(ai_request) + ai_response = ai_response_obj.content + logger.debug(f"AI query optimizer response: {ai_response}") + + # Parse AI response to extract search query + import json + try: + # Clean the response by removing markdown code blocks + cleaned_response = ai_response.strip() + if cleaned_response.startswith('```json'): + cleaned_response = cleaned_response[7:] # Remove ```json + if cleaned_response.endswith('```'): + cleaned_response = cleaned_response[:-3] # Remove ``` + cleaned_response = cleaned_response.strip() + + query_data = json.loads(cleaned_response) + search_query = query_data.get("user_prompt", request.user_prompt) + ai_country = query_data.get("country") + ai_language = query_data.get("language") + ai_topic = query_data.get("topic") + ai_time_range = query_data.get("time_range") + selection_strategy = query_data.get("selection_strategy", "multiple") + selection_criteria = query_data.get("selection_criteria", "relevant URLs") + expected_patterns = query_data.get("expected_url_patterns", []) + estimated_count = query_data.get("estimated_result_count", request.max_results) + + logger.info(f"AI optimized search query: '{search_query}'") + logger.info(f"Selection strategy: {selection_strategy}") + logger.info(f"Selection criteria: {selection_criteria}") + logger.info(f"Expected URL patterns: {expected_patterns}") + logger.info(f"Estimated result count: {estimated_count}") + + except json.JSONDecodeError: + logger.warning("Failed to parse AI response as JSON, using original query") + search_query = request.user_prompt + ai_country = None + ai_language = None + ai_topic = None + ai_time_range = None + selection_strategy = "multiple" + + # Perform the web search with AI-determined parameters + search_kwargs = { + "query": search_query, + "max_results": request.max_results, + "search_depth": request.options.search_depth, + "auto_parameters": False # Use explicit parameters + } + + # Add parameters only if they have valid values + def _normalizeCountry(c: Optional[str]) -> Optional[str]: + if not c: + return None + s = str(c).strip() + if not s or s.lower() in ['null', 'none', 'undefined']: + return None + # Map common codes to full English names when easy to do without extra deps + mapping = { + 'ch': 'Switzerland', 'che': 'Switzerland', + 'de': 'Germany', 'ger': 'Germany', 'deu': 'Germany', + 'at': 'Austria', 'aut': 'Austria', + 'us': 'United States', 'usa': 'United States', 'uni ted states': 'United States', + 'uk': 'United Kingdom', 'gb': 'United Kingdom', 'gbr': 'United Kingdom' + } + key = s.lower() + if key in mapping: + return mapping[key] + # If looks like full name, capitalize first letter only (Tavily accepts English names) + return s + + norm_ai_country = _normalizeCountry(ai_country) + norm_req_country = _normalizeCountry(request.options.country) + if norm_ai_country: + search_kwargs["country"] = norm_ai_country + elif norm_req_country: + search_kwargs["country"] = norm_req_country + + if ai_language and ai_language not in ['null', '', 'none', 'undefined']: + search_kwargs["language"] = ai_language + elif request.options.language and request.options.language not in ['null', '', 'none', 'undefined']: + search_kwargs["language"] = request.options.language + + if ai_topic and ai_topic in ['general', 'news', 'academic']: + search_kwargs["topic"] = ai_topic + elif request.options.topic and request.options.topic in ['general', 'news', 'academic']: + search_kwargs["topic"] = request.options.topic + + if ai_time_range and ai_time_range in ['d', 'w', 'm', 'y']: + search_kwargs["time_range"] = ai_time_range + elif request.options.time_range and request.options.time_range in ['d', 'w', 'm', 'y']: + search_kwargs["time_range"] = request.options.time_range + + # Constrain by expected domains if provided by AI + try: + include_domains = [] + for p in expected_patterns or []: + if not isinstance(p, str): + continue + # Extract bare domain from pattern or URL + import re + m = re.search(r"(?:https?://)?([^/\s]+)", p.strip()) + if m: + domain = m.group(1).lower() + # strip leading www. + if domain.startswith('www.'): + domain = domain[4:] + include_domains.append(domain) + # Deduplicate + if include_domains: + seen = set() + uniq = [] + for d in include_domains: + if d not in seen: + seen.add(d) + uniq.append(d) + search_kwargs["include_domains"] = uniq + except Exception: + pass + + # Log the parameters being used + logger.info(f"Search parameters: country={search_kwargs.get('country', 'not_set')}, language={search_kwargs.get('language', 'not_set')}, topic={search_kwargs.get('topic', 'not_set')}, time_range={search_kwargs.get('time_range', 'not_set')}, include_domains={search_kwargs.get('include_domains', [])}") + + search_results = await self.aiObjects.search_websites(**search_kwargs) + + logger.debug(f"Web search returned {len(search_results)} results:") + for i, result in enumerate(search_results, 1): + logger.debug(f" {i}. {result.url} - {result.title}") + + # Deduplicate while preserving order + seen = set() + search_urls = [] + for r in search_results: + u = str(r.url) + if u not in seen: + seen.add(u) + search_urls.append(u) + + logger.info(f"After initial deduplication: {len(search_urls)} unique URLs from {len(search_results)} search results") + + if not search_urls: + logger.error("No relevant websites found") + return WebResearchActionResult(success=False, error="No relevant websites found") + + # Now use AI to determine the main URLs based on user's intention + logger.info(f"AI selecting main URLs from {len(search_urls)} search results based on user intent") + + # Create a prompt for AI to identify main URLs based on user's intention + ai_prompt = f""" + Select the most relevant URLs from these search results: + + {chr(10).join([f"{i+1}. {url}" for i, url in enumerate(search_urls)])} + + Return only the URLs that are most relevant for the user's query. + One URL per line. + """ + # Create AI call request + ai_request = AiCallRequest( + prompt=ai_prompt, + options=AiCallOptions() + ) + ai_response_obj = await self.aiObjects.call(ai_request) + ai_response = ai_response_obj.content + logger.debug(f"AI response for main URL selection: {ai_response}") + + # Parse AI response to extract URLs + websites = [] + for line in ai_response.strip().split('\n'): + line = line.strip() + if line and ('http://' in line or 'https://' in line): + # Extract URL from the line + for word in line.split(): + if word.startswith('http://') or word.startswith('https://'): + websites.append(word.rstrip('.,;')) + break + + if not websites: + logger.warning("AI did not identify any main URLs, using first few search results") + websites = search_urls[:3] # Fallback to first 3 search results + + # Deduplicate while preserving order + seen = set() + unique_websites = [] + for url in websites: + if url not in seen: + seen.add(url) + unique_websites.append(url) + + websites = unique_websites + logger.info(f"After AI selection deduplication: {len(websites)} unique URLs from {len(websites)} AI-selected URLs") + + logger.info(f"AI selected {len(websites)} main URLs (after deduplication):") + for i, url in enumerate(websites, 1): + logger.info(f" {i}. {url}") + + # Step 2: Smart website selection using AI interface + logger.info(f"=== STEP 2: FILTERED URL LIST BY USER PROMPT'S INTENTION ===") + logger.info(f"AI analyzing {len(websites)} URLs for relevance to: '{request.user_prompt}'") + + selectedWebsites, aiResponse = await self.aiObjects.selectRelevantWebsites(websites, request.user_prompt) + + logger.debug(f"AI Response: {aiResponse}") + logger.debug(f"AI selected {len(selectedWebsites)} most relevant URLs:") + for i, url in enumerate(selectedWebsites, 1): + logger.debug(f" {i}. {url}") + + # Show which were filtered out + filtered_out = [url for url in websites if url not in selectedWebsites] + if filtered_out: + logger.debug(f"Filtered out {len(filtered_out)} less relevant URLs:") + for i, url in enumerate(filtered_out, 1): + logger.debug(f" {i}. {url}") + + # Step 3+4+5: Recursive crawling with configurable depth + # Get configuration parameters + max_depth = int(APP_CONFIG.get("Web_Research_MAX_DEPTH", "2")) + max_links_per_domain = int(APP_CONFIG.get("Web_Research_MAX_LINKS_PER_DOMAIN", "4")) + crawl_timeout_minutes = int(APP_CONFIG.get("Web_Research_CRAWL_TIMEOUT_MINUTES", "10")) + crawl_timeout_seconds = crawl_timeout_minutes * 60 + + # Use the configured max_depth or the request's pages_search_depth, whichever is smaller + effective_depth = min(max_depth, request.options.pages_search_depth) + + logger.info(f"=== STEP 3+4+5: RECURSIVE CRAWLING (DEPTH {effective_depth}) ===") + logger.info(f"Starting recursive crawl of {len(selectedWebsites)} main websites...") + logger.info(f"Search depth: {effective_depth} levels (max configured: {max_depth})") + logger.info(f"Max links per domain: {max_links_per_domain}") + logger.info(f"Crawl timeout: {crawl_timeout_minutes} minutes") + + # Use recursive crawling with URL index to avoid duplicates + import asyncio + try: + allContent = await asyncio.wait_for( + self.aiObjects.crawlRecursively( + urls=selectedWebsites, + max_depth=effective_depth, + extract_depth=request.options.extract_depth, + max_per_domain=max_links_per_domain, + global_processed_urls=global_processed_urls + ), + timeout=crawl_timeout_seconds + ) + logger.info(f"Crawling completed within timeout: {len(allContent)} pages crawled") + except asyncio.TimeoutError: + logger.warning(f"Crawling timed out after {crawl_timeout_minutes} minutes, using partial results") + # crawlRecursively now handles timeouts gracefully and returns partial results + # Try to get the partial results that were collected + allContent = {} + + if not allContent: + logger.error("Could not extract content from any websites") + return WebResearchActionResult(success=False, error="Could not extract content from any websites") + + logger.info(f"=== WEB RESEARCH COMPLETED ===") + logger.info(f"Successfully crawled {len(allContent)} URLs total") + logger.info(f"Crawl depth: {effective_depth} levels") + + # Create simple result with raw content + sources = [WebSearchResultItem(title=url, url=url) for url in selectedWebsites] + + # Get all additional links (all URLs except main ones) + additional_links = [url for url in allContent.keys() if url not in selectedWebsites] + + # Combine all content into a single result + combinedContent = "" + for url, content in allContent.items(): + combinedContent += f"\n\n=== {url} ===\n{content}\n" + + documentData = WebResearchDocumentData( + user_prompt=request.user_prompt, + websites_analyzed=len(allContent), + additional_links_found=len(additional_links), + analysis_result=combinedContent, # Raw content, no analysis + sources=sources, + additional_links=additional_links, + individual_content=allContent, # Individual URL -> content mapping + debug_info={ + "crawl_depth": effective_depth, + "max_configured_depth": max_depth, + "max_links_per_domain": max_links_per_domain, + "crawl_timeout_minutes": crawl_timeout_minutes, + "total_urls_crawled": len(allContent), + "main_urls": len(selectedWebsites), + "additional_urls": len(additional_links) + } + ) + + document = WebResearchActionDocument( + documentName=f"web_research_{request.user_prompt[:50]}.json", + documentData=documentData, + mimeType="application/json" + ) + + return WebResearchActionResult( + success=True, + documents=[document], + resultLabel="web_research_results" + ) + + except Exception as e: + logger.error(f"Error in web research: {str(e)}") + return WebResearchActionResult(success=False, error=str(e)) diff --git a/test_extractor_formats.py b/test_extractor_formats.py deleted file mode 100644 index 201622ff..00000000 --- a/test_extractor_formats.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script to demonstrate enhanced extractor format support. -Shows all supported file extensions and MIME types for each extractor. -""" - -import sys -import os -from pathlib import Path - -# Add the gateway module to the path -sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'modules')) - -from modules.services.serviceExtraction.subRegistry import ExtractorRegistry - -def test_extractor_formats(): - """Test and display all supported formats from extractors.""" - print("🔍 Testing Plug-and-Play Extractor System") - print("=" * 60) - - # Create registry - registry = ExtractorRegistry() - - # Get all supported formats - formats = registry.getAllSupportedFormats() - - print("\n📋 Supported File Extensions by Extractor:") - print("-" * 50) - for extractor_name, extensions in formats["extensions"].items(): - if extensions: - print(f" {extractor_name:20} → {', '.join(extensions)}") - else: - print(f" {extractor_name:20} → (all extensions - fallback)") - - print("\n📋 Supported MIME Types by Extractor:") - print("-" * 50) - for extractor_name, mime_types in formats["mime_types"].items(): - if mime_types: - print(f" {extractor_name:20} → {', '.join(mime_types)}") - else: - print(f" {extractor_name:20} → (all MIME types - fallback)") - - # Test individual extractors - print("\n🧪 Testing Individual Extractors:") - print("-" * 50) - - # Get all registered extractors - for key, extractor in registry._map.items(): - if hasattr(extractor, 'getSupportedExtensions') and hasattr(extractor, 'getSupportedMimeTypes'): - extensions = extractor.getSupportedExtensions() - mime_types = extractor.getSupportedMimeTypes() - print(f"\n {extractor.__class__.__name__}:") - print(f" Extensions: {extensions}") - print(f" MIME Types: {mime_types}") - - # Test detection with various file types - print("\n🔬 Testing File Detection:") - print("-" * 50) - - test_files = [ - # Document formats - ("document.pdf", "application/pdf"), - ("spreadsheet.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), - ("presentation.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation"), - ("document.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"), - - # Text and code files - ("readme.txt", "text/plain"), - ("readme.md", "text/markdown"), - ("app.log", "text/plain"), - ("Main.java", "text/x-java-source"), - ("script.js", "text/javascript"), - ("component.tsx", "text/typescript"), - ("main.py", "text/x-python"), - ("config.yaml", "text/x-yaml"), - ("package.json", "application/json"), - ("data.csv", "text/csv"), - ("config.xml", "application/xml"), - ("webpage.html", "text/html"), - ("styles.css", "text/css"), - ("script.sh", "text/x-sh"), - ("Dockerfile", "text/plain"), - (".gitignore", "text/plain"), - ("app.config", "text/plain"), - ("database.sql", "text/x-sql"), - ("schema.ddl", "application/sql"), - - # Images - ("image.png", "image/png"), - ("photo.jpg", "image/jpeg"), - - # Unknown - ("unknown.xyz", "application/octet-stream") - ] - - for filename, mime_type in test_files: - extractor = registry.resolve(mime_type, filename) - if extractor: - print(f" {filename:25} ({mime_type:50}) → {extractor.__class__.__name__}") - else: - print(f" {filename:25} ({mime_type:50}) → No extractor found") - - print("\n✅ Plug-and-Play extractor system test completed!") - print("\nKey improvements:") - print(" • 🔌 TRUE PLUG-AND-PLAY: Just add extractor file, it auto-registers!") - print(" • 📋 No more manual registration of file types") - print(" • 🔍 Auto-discovery scans extractors directory") - print(" • 📝 Each extractor declares its own supported formats") - print(" • 🚀 Easy to add new file types - just create new extractor") - print(" • 🧹 Clean, maintainable code with no redundancy") - print("\nTo add a new file type:") - print(" 1. Create extractorXyz.py in extractors/ directory") - print(" 2. Implement Extractor interface with getSupportedExtensions()") - print(" 3. That's it! No registry changes needed!") - -if __name__ == "__main__": - test_extractor_formats() diff --git a/test_image_processing.py b/test_image_processing.py deleted file mode 100644 index ae993083..00000000 --- a/test_image_processing.py +++ /dev/null @@ -1,83 +0,0 @@ -""" -Simple test to verify image processing works correctly. -""" - -import asyncio -import sys -import os -import base64 -import logging - -# Add the gateway module to the path -sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'modules')) - -from modules.datamodels.datamodelAi import AiCallOptions, OperationType -from modules.services.serviceAi.mainServiceAi import AiService - -# Set up logging -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') -logger = logging.getLogger(__name__) - -async def test_image_processing(): - """Test image processing with a simple base64 image.""" - print("🧪 Testing image processing...") - logger.info("🧪 Testing image processing...") - - try: - print("🔧 Initializing AI service...") - logger.info("🔧 Initializing AI service...") - - # Initialize AI service - ai_service = await AiService.create() - print("✅ AI service initialized successfully") - logger.info("✅ AI service initialized successfully") - - # Create a simple test image (1x1 pixel PNG in base64) - test_image_base64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg==" - print(f"📸 Test image base64 length: {len(test_image_base64)}") - logger.info(f"📸 Test image base64 length: {len(test_image_base64)}") - - # Test the readImage method directly - print("📸 Testing readImage method...") - logger.info("📸 Testing readImage method...") - - result = await ai_service.readImage( - prompt="What do you see in this image?", - imageData=test_image_base64, - mimeType="image/png" - ) - - print(f"✅ Image processing result: {result}") - logger.info(f"✅ Image processing result: {result}") - - return True - - except Exception as e: - print(f"❌ Image processing test failed: {str(e)}") - logger.error(f"❌ Image processing test failed: {str(e)}") - import traceback - traceback.print_exc() - logger.error(f"Traceback: {traceback.format_exc()}") - return False - -async def main(): - """Main function to run the image processing test.""" - print("🎯 Starting Image Processing Test") - print("=" * 60) - logger.info("🎯 Starting Image Processing Test") - logger.info("=" * 60) - - success = await test_image_processing() - - if success: - print("🎉 Image processing test completed successfully!") - logger.info("🎉 Image processing test completed successfully!") - else: - print("❌ Image processing test failed!") - logger.error("❌ Image processing test failed!") - - print("=" * 60) - logger.info("=" * 60) - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/test_multifile_processing.py b/test_multifile_processing.py deleted file mode 100644 index 737127bf..00000000 --- a/test_multifile_processing.py +++ /dev/null @@ -1,263 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script for multi-file processing implementation. -This script tests the new multi-file functionality without breaking existing single-file processing. -""" - -import asyncio -import json -import logging -from typing import Dict, Any, List - -# Setup logging -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -async def test_multi_file_detection(): - """Test AI-powered multi-file detection.""" - print("=== Testing Multi-File Detection ===") - - # Mock AI service for testing - class MockAiService: - async def call(self, request): - class MockResponse: - def __init__(self, content): - self.content = content - return MockResponse('{"is_multi_file": true, "strategy": "per_entity", "criteria": "customer_id", "file_naming_pattern": "{customer_name}_data.docx", "reasoning": "User wants separate files for each customer"}') - - class MockAiObjects: - def __init__(self): - self.call = MockAiService().call - - # Import the AI service - try: - from modules.services.serviceAi.mainServiceAi import AiService - - # Create mock service center - class MockServiceCenter: - def __init__(self): - self.utils = MockUtils() - - class MockUtils: - def debugLogToFile(self, message, category): - print(f"[{category}] {message}") - - # Create AI service instance - ai_service = AiService(MockServiceCenter()) - ai_service.aiObjects = MockAiObjects() - - # Test prompts - test_prompts = [ - "Create one file for each customer in the document", - "Split the data into separate files by category", - "Generate individual files for each product", - "Create a single report with all data", - "Erstelle eine Datei für jeden Kunden", # German - "Créer un fichier par section" # French - ] - - for prompt in test_prompts: - print(f"\nTesting prompt: '{prompt}'") - try: - analysis = await ai_service._analyzePromptIntent(prompt, ai_service) - print(f" Analysis: {analysis}") - - if analysis.get("is_multi_file"): - print(f" ✓ Detected as multi-file with strategy: {analysis.get('strategy')}") - else: - print(f" ✓ Detected as single-file") - - except Exception as e: - print(f" ✗ Error: {str(e)}") - - print("\n=== Multi-File Detection Test Complete ===") - return True - - except ImportError as e: - print(f"Import error: {e}") - print("Make sure you're running from the gateway directory") - return False - except Exception as e: - print(f"Error during testing: {e}") - return False - -async def test_json_schema_validation(): - """Test JSON schema validation for both single and multi-file.""" - print("\n=== Testing JSON Schema Validation ===") - - try: - from modules.services.serviceGeneration.subJsonSchema import ( - get_document_subJsonSchema, - get_multi_document_subJsonSchema, - get_adaptive_json_schema, - validate_json_document - ) - - # Test single document schema - single_doc_schema = get_document_subJsonSchema() - print(f"✓ Single document schema loaded: {len(single_doc_schema)} properties") - - # Test multi-document schema - multi_doc_schema = get_multi_document_subJsonSchema() - print(f"✓ Multi-document schema loaded: {len(multi_doc_schema)} properties") - - # Test adaptive schema selection - single_analysis = {"is_multi_file": False} - multi_analysis = {"is_multi_file": True} - - single_schema = get_adaptive_json_schema(single_analysis) - multi_schema = get_adaptive_json_schema(multi_analysis) - - print(f"✓ Adaptive schema selection working") - print(f" Single-file schema type: {single_schema.get('type', 'unknown')}") - print(f" Multi-file schema type: {multi_schema.get('type', 'unknown')}") - - # Test validation with sample data - single_doc_data = { - "metadata": {"title": "Test Document"}, - "sections": [ - { - "id": "section_1", - "content_type": "paragraph", - "elements": [{"text": "Test content"}], - "order": 1 - } - ] - } - - multi_doc_data = { - "metadata": { - "title": "Test Documents", - "splitStrategy": "per_entity" - }, - "documents": [ - { - "id": "doc_1", - "title": "Document 1", - "filename": "doc1.docx", - "sections": [ - { - "id": "section_1", - "content_type": "paragraph", - "elements": [{"text": "Content 1"}], - "order": 1 - } - ] - } - ] - } - - single_valid = validate_json_document(single_doc_data) - multi_valid = validate_json_document(multi_doc_data) - - print(f"✓ Single document validation: {'PASS' if single_valid else 'FAIL'}") - print(f"✓ Multi-document validation: {'PASS' if multi_valid else 'FAIL'}") - - print("\n=== JSON Schema Validation Test Complete ===") - return True - - except ImportError as e: - print(f"Import error: {e}") - return False - except Exception as e: - print(f"Error during schema testing: {e}") - return False - -async def test_prompt_builder(): - """Test adaptive prompt building.""" - print("\n=== Testing Prompt Builder ===") - - try: - from modules.services.serviceGeneration.subPromptBuilder import ( - buildAdaptiveExtractionPrompt, - buildGenericExtractionPrompt - ) - - # Mock services - class MockServices: - def __init__(self): - self.utils = MockUtils() - - class MockUtils: - def debugLogToFile(self, message, category): - print(f"[{category}] {message}") - - services = MockServices() - - # Test adaptive prompt building - prompt_analysis = { - "is_multi_file": True, - "strategy": "per_entity", - "criteria": "customer_id", - "file_naming_pattern": "{customer_name}_data.docx" - } - - adaptive_prompt = await buildAdaptiveExtractionPrompt( - outputFormat="docx", - userPrompt="Create one file for each customer", - title="Customer Data", - promptAnalysis=prompt_analysis, - aiService=None, - services=services - ) - - print(f"✓ Adaptive prompt generated: {len(adaptive_prompt)} characters") - print(f" Contains multi-file instructions: {'documents' in adaptive_prompt}") - - # Test generic prompt building - generic_prompt = await buildGenericExtractionPrompt( - outputFormat="docx", - userPrompt="Create a single report", - title="Report", - aiService=None, - services=services - ) - - print(f"✓ Generic prompt generated: {len(generic_prompt)} characters") - print(f" Contains single-file instructions: {'sections' in generic_prompt}") - - print("\n=== Prompt Builder Test Complete ===") - return True - - except ImportError as e: - print(f"Import error: {e}") - return False - except Exception as e: - print(f"Error during prompt builder testing: {e}") - return False - -async def main(): - """Run all tests.""" - print("Starting Multi-File Processing Tests...") - print("=" * 50) - - tests = [ - test_multi_file_detection, - test_json_schema_validation, - test_prompt_builder - ] - - results = [] - for test in tests: - try: - result = await test() - results.append(result) - except Exception as e: - print(f"Test failed with exception: {e}") - results.append(False) - - print("\n" + "=" * 50) - print("Test Results Summary:") - print(f" Tests run: {len(tests)}") - print(f" Passed: {sum(results)}") - print(f" Failed: {len(tests) - sum(results)}") - - if all(results): - print("\n🎉 All tests passed! Multi-file processing is ready.") - else: - print("\n⚠️ Some tests failed. Check the implementation.") - - return all(results) - -if __name__ == "__main__": - asyncio.run(main())