From 82eb4f62b9fe545620d828489a6c1e4cf919af7d Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Tue, 14 Oct 2025 01:13:25 +0200
Subject: [PATCH] Refactored AI Service components - ready for UI workflow
testing
---
env_dev.20251012_121418.backup | 90 -
env_dev.20251013_140140.backup | 94 -
env_int.20251012_121418.backup | 90 -
env_int.20251013_140140.backup | 94 -
env_prod.20251012_121418.backup | 90 -
env_prod.20251013_140140.backup | 94 -
modules/services/serviceAi/mainServiceAi.py | 2382 +----------------
modules/services/serviceAi/subCoreAi.py | 596 +++++
.../serviceAi/subDocumentGeneration.py | 459 ++++
.../serviceAi/subDocumentProcessing.py | 1042 +++++++
modules/services/serviceAi/subUtilities.py | 316 +++
modules/services/serviceAi/subWebResearch.py | 384 +++
test_extractor_formats.py | 117 -
test_image_processing.py | 83 -
test_multifile_processing.py | 263 --
15 files changed, 2874 insertions(+), 3320 deletions(-)
delete mode 100644 env_dev.20251012_121418.backup
delete mode 100644 env_dev.20251013_140140.backup
delete mode 100644 env_int.20251012_121418.backup
delete mode 100644 env_int.20251013_140140.backup
delete mode 100644 env_prod.20251012_121418.backup
delete mode 100644 env_prod.20251013_140140.backup
create mode 100644 modules/services/serviceAi/subCoreAi.py
create mode 100644 modules/services/serviceAi/subDocumentGeneration.py
create mode 100644 modules/services/serviceAi/subDocumentProcessing.py
create mode 100644 modules/services/serviceAi/subUtilities.py
create mode 100644 modules/services/serviceAi/subWebResearch.py
delete mode 100644 test_extractor_formats.py
delete mode 100644 test_image_processing.py
delete mode 100644 test_multifile_processing.py
diff --git a/env_dev.20251012_121418.backup b/env_dev.20251012_121418.backup
deleted file mode 100644
index 9ebbb93b..00000000
--- a/env_dev.20251012_121418.backup
+++ /dev/null
@@ -1,90 +0,0 @@
-# Development Environment Configuration
-
-# System Configuration
-APP_ENV_TYPE = dev
-APP_ENV_LABEL = Development Instance Patrick
-APP_API_URL = http://localhost:8000
-APP_KEY_SYSVAR = D:/Athi/Local/Web/poweron/local/key.txt
-APP_INIT_PASS_ADMIN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEeFFtRGtQeVUtcjlrU3dab1ZxUm9WSks0MlJVYUtERFlqUElHemZrOGNENk1tcmJNX3Vxc01UMDhlNU40VzZZRVBpUGNmT3podzZrOGhOeEJIUEt4eVlSWG5UYXA3d09DVXlLT21Kb1JYSUU9
-APP_INIT_PASS_EVENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERzZjNm56WGVBdjJTeG5Udjd6OGQwUVotYXUzQjJ1YVNyVXVBa3NZVml3ODU0MVNkZjhWWmJwNUFkc19BcHlHMTU1Q3BRcHU0cDBoZkFlR2l6UEZQU3d2U3MtMDh5UDZteGFoQ0EyMUE1ckE9
-
-# PostgreSQL Storage (new)
-DB_APP_HOST=localhost
-DB_APP_DATABASE=poweron_app
-DB_APP_USER=poweron_dev
-DB_APP_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEcUIxNEFfQ2xnS0RrSC1KNnUxTlVvTGZoMHgzaEI4Z3NlVzVROTVLak5Ubi1vaEZubFZaMTFKMGd6MXAxekN2d2NvMy1hRjg2UVhybktlcFA5anZ1WjFlQmZhcXdwaGhWdzRDc3ExeUhzWTg9
-DB_APP_PORT=5432
-
-# PostgreSQL Storage (new)
-DB_CHAT_HOST=localhost
-DB_CHAT_DATABASE=poweron_chat
-DB_CHAT_USER=poweron_dev
-DB_CHAT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERFNzNVhoalpCR0QxYXAwdEpXWXVVOTdZdWtqWW5FNXFGcFl2amNYLWYwYl9STXltRlFxLWNzVWlMVnNYdXk0RklnRExFT0FaQjg2aGswNnhhSGhCN29KN2VEb2FlUV9NTlV3b0tLelplSVU9
-DB_CHAT_PORT=5432
-
-# PostgreSQL Storage (new)
-DB_MANAGEMENT_HOST=localhost
-DB_MANAGEMENT_DATABASE=poweron_management
-DB_MANAGEMENT_USER=poweron_dev
-DB_MANAGEMENT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEUldqSTVpUnFqdGhITDYzT3RScGlMYVdTMmZhOXdudDRCc3dhdllOd3l6MS1vWHY2MjVsTUF1Sk9saEJOSk9ONUlBZjQwb2c2T1gtWWJhcXFzVVVXd01xc0U0b0lJX0JyVDRxaDhNS01JcWs9
-DB_MANAGEMENT_PORT=5432
-
-# Security Configuration
-APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2ZlUFRlcFdOZ001RnlzM2FhLWhRV2tjWWFhaWQwQ3hkcUFvbThMcndxSjFpYTdfRV9OZGhTcksxbXFTZWg5MDZvOHpCVXBHcDJYaHlJM0tyNWRZckZsVHpQcmxTZHJoZUs1M3lfU2ljRnJaTmNSQ0w0X085OXI0QW80M2xfQnJqZmZ6VEh3TUltX0xzeE42SGtZPQ==
-APP_TOKEN_EXPIRY=300
-
-# CORS Configuration
-APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net
-
-# Logging configuration
-APP_LOGGING_LOG_LEVEL = DEBUG
-APP_LOGGING_LOG_DIR = D:/Athi/Local/Web/poweron/local/logs
-APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
-APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
-APP_LOGGING_CONSOLE_ENABLED = True
-APP_LOGGING_FILE_ENABLED = True
-APP_LOGGING_ROTATION_SIZE = 10485760
-APP_LOGGING_BACKUP_COUNT = 5
-
-# Service Redirects
-Service_MSFT_REDIRECT_URI = http://localhost:8000/api/msft/auth/callback
-Service_GOOGLE_REDIRECT_URI = http://localhost:8000/api/google/auth/callback
-
-# OpenAI configuration
-Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions
-Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEajBuZmtYTVdqLTBpQm9KZ2pCXzRCV3VhZzlYTEhKb1FqWXNrV3lyb25uZUN1WVVQUEY3dGYtejludV9MNGlKeVREanZGOGloV09mY2ttQ3k5SjBFOGFac2ZQTkNKNUZWVnRINVQyeWhsR2wyYnVrRDNzV2NqSHB0ajQ4UWtGeGZtbmR0Q3VvS0hDZlphVmpSc2Z6RG5nPT0=
-Connector_AiOpenai_MODEL_NAME = gpt-4o
-Connector_AiOpenai_TEMPERATURE = 0.2
-Connector_AiOpenai_MAX_TOKENS = 2000
-
-# Anthropic configuration
-Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages
-Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpENmFBWG16STFQUVZxNzZZRzRLYTA4X3lRanF1VkF4cU45OExNMzlsQmdISGFxTUxud1dXODBKcFhMVG9KNjdWVnlTTFFROVc3NDlsdlNHLUJXeG41NDBHaXhHR0VHVWl5UW9RNkVWbmlhakRKVW5pM0R4VHk0LUw0TV9LdkljNHdBLXJua21NQkl2b3l4UkVkMGN1YjBrMmJEeWtMay1jbmxrYWJNbUV0aktCXzU1djR2d2RSQXZORTNwcG92ZUVvVGMtQzQzTTVncEZTRGRtZUFIZWQ0dz09
-Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
-Connector_AiAnthropic_TEMPERATURE = 0.2
-Connector_AiAnthropic_MAX_TOKENS = 2000
-
-# Perplexity AI configuration
-Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
-Connector_AiPerplexity_API_SECRET = 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
-Connector_AiPerplexity_MODEL_NAME = sonar
-Connector_AiPerplexity_TEMPERATURE = 0.2
-Connector_AiPerplexity_MAX_TOKENS = 2000
-
-# Agent Mail configuration
-Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQk4xYnpmbnItUEU3dHU4eHB5dzVYay1WT012RTRLUWJDTlBILVY5dC1FX3VMNjZmLThrbDRFNWFSNGprY3RRTlpYNGlubVBpNnY3MjNJcGtzVk9PMzRacl9LUlM2RU5vTVVZWHJvaUhWSHVfc1pNR0pfQmI5SEprOG5KdlB1QnQ=
-Service_MSFT_TENANT_ID = common
-
-# Google Service configuration
-Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
-Service_GOOGLE_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
-
-# Tavily Web Search configuration
-Connector_WebTavily_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQTdnUHMwd2pIaXNtMmtCTFREd0pyQXRKb1F5eGtHSnkyOGZiUnlBOFc0b3Vzcndrc3ViRm1nMDJIOEZKYWxqdWNkZGh5N0Z4R0JlQmxXSG5pVnJUR2VYckZhMWNMZ1FNeXJ3enJLVlpiblhOZTNleUg3ZzZyUzRZanFSeDlVMkI=
-
-# Google Cloud Speech Services configuration
-Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhIYTlyMkhuNjA4NlF4dk82U2NScHhTVGY3UG83NkhfX3RrcWVtWWcyLXRjU1dTT21zWEl6YWRMMUFndXpsUnJOeHh3QThsNDZKRXROTzdXRUdsT0JZajZJNVlfb0gtMXkwWm9DOERPVnpjU0pyUEZfOGJsUnprT3ltMVVhalUyUm9hMUFtZEtHUnJqOGZ4dEZjZm5SWVVTckVCWnY1UkdVSHVmUlgwbnAyc0xDQW84R3ViSko5OHVCVWZRUVNiaG1pVFB6X3EwS0FPd2dUYjhiSmRjcXh2WEZiXzI4SFZqT21tbDduUWRyVWdFZXpmcVM5ZDR0VWtzZnF5UER6cGwwS2JlLV9CSTZ0Z0IyQ1h0YW9TcmhRTXZEckp4bWhmTkt6UTNYMk4zVkpnbUJmaDIxZnoyR2dWTEYwTUFEV0w2eUdUUGpoZk9XRkt4RVF1Z1NPdUpBeTcyWV9PY1Ffd2s0ZEdVekxGekhoeEl4TmNqaXYtbUJuSVdycFducERWdWtZajZnX011Q2w4eE9VMTBqQ1ZxRmdScWhXY1E3WWhzX1JZcHhxam9FbDVPN3Q1MWtrMUZuTUg3LVFQVHp1T1hpQWNDMzEzekVJWk9ybl91YUVjSkFob1VaMi1ONEtuMnRSOEg1S3QybUMwbVZDejItajBLTjM2Zy1hNzZQMW5LLVVDVGdFWm5BZUxNeEFnUkZzU3dxV0lCUlc0LWo4b05GczVpOGZSV2ZxbFBwUml6OU5tYjdnTks3Y3hrVEZVTHlmc1NPdFh4WE5pWldEZklOQUxBbjBpMTlkX3FFQVJ6c2NSZGdzTThycE92VW82enZKamhiRGFnU25aZGlHZHhZd2lUUmhuTVptNjhoWVlJQkxIOEkzbzJNMjZCZFJyM25tdXBnQ2ZWaHV3b2p6UWJpdk9xUEhBc1dyTlNmeF9wbm5yYUhHV01UZnVXWDFlNzBkdXlWUWhvcmJpSmljbmE3LUpUZEg4VzRwZ2JVSjdYUm1sODViQXVxUzdGTmZFbVpiN2V1YW5XV3U4b2VRWmxldGVGVHZsSldoekhVLU9wZ2V0cGZIYkNqM2pXVGctQVAyUm4xTHhpd1VVLXFhcnVEV21Rby1hbTlqTl84TjVveHdYTExUVkhHQ0ltaTB2WXJnY1NQVE5PbWg3ejgySElYc1JSTlQ3NDlFUWR6STZVUjVqaXFRN200NF9LY1ljQ0R2UldlWUtKY1NQVnJ4QXRyYTBGSWVuenhyM0Z0cWtndTd1eG8xRzY5a2dNZ1hkQm5MV3BHVzA2N1QwUkd6WlRGYTZQOUhnVWQ2S0Y5U0s1dXFNVXh5Q2pLWVUxSUQ2MlR1ak52NmRIZ2hlYTk1SGZGWS1RV3hWVU9rR3d1Rk9MLS11REZXbzhqMHpsSm1HYW1jMUNLT29YOHZsRWNaLTVvOFpmT3l3MHVwaERTT0dNLWFjcGRYZ25qT2szTkVFUnRFR3JWYS1aNXFIRnMyalozTlQzNFF2NXJLVHVPVF9zdTF6ZjlkbzJ4RFc2ZENmNFFxZDZzTzhfMUl0bW96V0lPZkh1dXFYZlEteFBlSG84Si1FNS1TTi1OMkFnX2pOYW8xY3MxMVJnVC02MDUyaXZfMEVHWDQtVlRpcENmV0h3V0dCWEFRS2prQXdNRlQ5dnRFVHU0Q1dNTmh0SlBCaU55bFMydWM1TTFFLW96ODBnV3dNZHFZTWZhRURYSHlrdzF3RlRuWDBoQUhSOUJWemtRM3pxcDJFbGJoaTJ3ZktRTlJxbXltaHBoZXVJVDlxS3cxNWo2c0ZBV0NzaUstRWdsMW1xLXFkanZGYUFiU0tSLXFQa0tkcDFoMV9kak41ZjQ0R214UmtOR1ZBanRuemY3Mmw1SkZ5aDZodGIzT3N2aV85MW9kcld6c0g0ZDgtTWo3b3Y3VjJCRnR2U2tMVm9rUXNVRnVHbzZXVTZ6RmI2RkNmajBfMWVnODVFbnpkT0oyci15czJHU0p1cUowTGZJMzVnd3hIRjQyTVhKOGRkcFRKdVpyQ3Yzd01Jb1lSajFmV0paeEV0cjk1SmpmdWpDVFJMUmMtUFctOGhaTmlKQXNRVlVUNlhJemxudHZCR056SVlBb3NOTEYxRTRLaFlVd2d3TWtxVlB6ZEtQLTkxOGMyY3N0a2pYRFUweDBNaGhja2xSSklPOUZla1dKTWRNbG8tUGdSNEV5cW90OWlOZFlIUExBd3U2b2hyS1owbXVMM3p0Qm41cUtzWUxYNzB1N3JpUTNBSGdsT0NuamNTb1lIbXR4MG1sakNPVkxBUXRLVE1xX0YxWDhOcERIY1lTQVFqS01CaXZKNllFaXlIR0JsM1pKMmV1OUo3TGI1WkRaVnYxUTl1LTM0SU1qN1V1b0RCT0x0VHNLTmNLZnk1S0MxYnBBcm03WnVua0xqaEhGUzhOU253ZkppRzdudXBSVlMxeFVOSWxtZ1o2RVBSQUhEUEFuQ1hxSVZMME4yWUtaU3VyRGo3RkUyRUNjT0pNcE1BdE1ZRzdXVl8ydUtXZjdMdHdEVW4teHUtTi1HSGliLUxud21TX0NtcGVkRFBHNkZ1WTlNczR4OUJfUVluc1BoV09oWS1scUdsNnB5d1U5M1huX3k4QzAyNldtb2hybktYN2xKZ1NTNWFsaWwzV3pCRVhkaGR5eTNlV1d6ZzFfaFZTT0E4UjRpQ3pKdEZxUlJ6UFZXM3laUndyWEk2NlBXLUpoajVhZzVwQXpWVzUtVjVNZFBwdWdQa3AxZC1KdGdqNnhibjN4dmFYb2cxcEVwc1g5R09zRUdINUZtOE5QRjVUU0dpZy1QVl9odnFtVDNuWFZLSURtMXlSMlhRNTBWSVFJbEdOOWpfVWV0SmdRWDdlUXZZWE8xRUxDN1I0aEN6MHYwNzM1cmpJS0ZpMnBYWkxfb3FsbEV1VnlqWGxqdVJ6SHlwSjAzRlMycTBaQ295NXNnZERpUnJQcjhrUUd3bkI4bDVzRmxQblhkaFJPTTdISnVUQmhET3BOMTM4bjVvUEc2VmZhb2lrR1FyTUl2RWNEeGg0U0dsNnV6eU5zOUxiNDY5SXBxR0hBS00wOTgyWTFnWkQyaEtLVUloT3ZxZGh0RWVGRmJzenFsaUtfZENQM0JzdkVVeTdXR3hUSmJST1NBMUI1NkVFWncwNW5JZVVLX1p1RXdqVnFfQWpvQ08yQjZhN1NkTkpTSnUxOVRXZXE0WFEtZWxhZW1NNXYtQ2sya0VGLURmS01lMkctNVY3c2ZhN0ZGRFgwWHlabTFkeS1hcUZ1dDZ3cnpPQ3hha2IzVE11M0pqbklmU0diczBqTFBNZC1QZGp6VzNTSnJVSjJoWkJUQjVORG4tYUJmMEJtSUNUdVpEaGt6OTM3TjFOdVhXUHItZjRtZ25nU3NhZC1sVTVXNTRDTmxZbnlfeHNsdkpuMXhUYnE1MnpVQ0ZOclRWM1M4eHdXTzRXbFRZZVQtTS1iRVdXVWZMSGotcWg3MUxUYTFnSEEtanBCRHlZRUNIdGdpUFhsYjdYUndCZnRITzhMZVJ1dHFoVlVNb0duVjlxd0U4OGRuQVV3MG90R0hiYW5MWkxWVklzbWFRNzBfSUNrdzc5bVdtTXg0dExEYnRCaDI3c1I4TWFwLXZKR0wxSjRZYjZIV3ZqZjNqTWhFT0RGSDVMc1A1UzY2bDBiMGFSUy1fNVRQRzRJWDVydUpqb1ZfSHNVbldVeUN2YlAxSW5WVDdxVzJ1WHpLeUdmb0xWMDNHN05oQzY3YnhvUUdhS2xaOHNidkVvbTZtSHFlblhOYmwyR3NQdVJDRUdxREhWdF9ZcXhwUWxHc2hyLW5vUGhIUVhJNUNhY0hFU0ptVnI0TFVhZDE1TFBBUEstSkRoZWJ5MHJhUmZrR1ZrRlFtRGpxS1pOMmFMQjBsdjluY3FiYUU4eGJVVXlZVEpuNWdHVVhJMGtwaTdZR2NDbXd2eHpOQ09SeTV6N1BaVUpsR1pQVDBZcElJUUt6VnVpQmxSYnE4Y1BCWV9IRWdVV0p3enBGVHItdnBGN3NyNWFBWmkySnByWThsbDliSlExQmp3LVlBaDIyZXp6UnR6cU9rTzJmTDBlSVpON0tiWllMdm1oME1zTFl2S2ZYYllhQlY2VHNZRGtHUDY4U1lIVExLZTU4VzZxSTZrZHl1ZTBDc0g4SjI4WGYyZHV1bm9wQ3R2Z09ld1ZmUkN5alJGeHZKSHl1bWhQVXpNMzdjblpLcUhfSm02Qlh5S1FVN3lIcHl0NnlRPT0=
-
-# Feature SyncDelta JIRA configuration
-Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEbm0yRUJ6VUJKbUwyRW5kMnRaNW4wM2YxMkJUTXVXZUdmdVRCaUZIVHU2TTV2RWZLRmUtZkcwZE4yRUNlNDQ0aUJWYjNfdVg5YjV5c2JwMHhoUUYxZWdkeS11bXR0eGxRLWRVaVU3cUVQZWJlNDRtY1lWUDdqeDVFSlpXS0VFX21WajlRS3lHQjc0bS11akkybWV3QUFlR2hNWUNYLUdiRjZuN2dQODdDSExXWG1Dd2ZGclI2aUhlSWhETVZuY3hYdnhkb2c2LU1JTFBvWFpTNmZtMkNVOTZTejJwbDI2eGE0OS1xUlIwQnlCSmFxRFNCeVJNVzlOMDhTR1VUamx4RDRyV3p6Tk9qVHBrWWdySUM3TVRaYjd3N0JHMFhpdzFhZTNDLTFkRVQ2RVE4U19COXRhRWtNc0NVOHRqUS1CRDFpZ19xQmtFLU9YSDU3TXBZQXpVcld3PT0=
diff --git a/env_dev.20251013_140140.backup b/env_dev.20251013_140140.backup
deleted file mode 100644
index f8913497..00000000
--- a/env_dev.20251013_140140.backup
+++ /dev/null
@@ -1,94 +0,0 @@
-# Development Environment Configuration
-
-# System Configuration
-APP_ENV_TYPE = dev
-APP_ENV_LABEL = Development Instance Patrick
-APP_API_URL = http://localhost:8000
-APP_KEY_SYSVAR = D:/Athi/Local/Web/poweron/local/key.txt
-APP_INIT_PASS_ADMIN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEeFFtRGtQeVUtcjlrU3dab1ZxUm9WSks0MlJVYUtERFlqUElHemZrOGNENk1tcmJNX3Vxc01UMDhlNU40VzZZRVBpUGNmT3podzZrOGhOeEJIUEt4eVlSWG5UYXA3d09DVXlLT21Kb1JYSUU9
-APP_INIT_PASS_EVENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERzZjNm56WGVBdjJTeG5Udjd6OGQwUVotYXUzQjJ1YVNyVXVBa3NZVml3ODU0MVNkZjhWWmJwNUFkc19BcHlHMTU1Q3BRcHU0cDBoZkFlR2l6UEZQU3d2U3MtMDh5UDZteGFoQ0EyMUE1ckE9
-
-# PostgreSQL Storage (new)
-DB_APP_HOST=localhost
-DB_APP_DATABASE=poweron_app
-DB_APP_USER=poweron_dev
-DB_APP_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEcUIxNEFfQ2xnS0RrSC1KNnUxTlVvTGZoMHgzaEI4Z3NlVzVROTVLak5Ubi1vaEZubFZaMTFKMGd6MXAxekN2d2NvMy1hRjg2UVhybktlcFA5anZ1WjFlQmZhcXdwaGhWdzRDc3ExeUhzWTg9
-DB_APP_PORT=5432
-
-# PostgreSQL Storage (new)
-DB_CHAT_HOST=localhost
-DB_CHAT_DATABASE=poweron_chat
-DB_CHAT_USER=poweron_dev
-DB_CHAT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERFNzNVhoalpCR0QxYXAwdEpXWXVVOTdZdWtqWW5FNXFGcFl2amNYLWYwYl9STXltRlFxLWNzVWlMVnNYdXk0RklnRExFT0FaQjg2aGswNnhhSGhCN29KN2VEb2FlUV9NTlV3b0tLelplSVU9
-DB_CHAT_PORT=5432
-
-# PostgreSQL Storage (new)
-DB_MANAGEMENT_HOST=localhost
-DB_MANAGEMENT_DATABASE=poweron_management
-DB_MANAGEMENT_USER=poweron_dev
-DB_MANAGEMENT_PASSWORD_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEUldqSTVpUnFqdGhITDYzT3RScGlMYVdTMmZhOXdudDRCc3dhdllOd3l6MS1vWHY2MjVsTUF1Sk9saEJOSk9ONUlBZjQwb2c2T1gtWWJhcXFzVVVXd01xc0U0b0lJX0JyVDRxaDhNS01JcWs9
-DB_MANAGEMENT_PORT=5432
-
-# Security Configuration
-APP_JWT_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpERjlrSktmZHVuQnJ1VVJDdndLaUcxZGJsT2ZlUFRlcFdOZ001RnlzM2FhLWhRV2tjWWFhaWQwQ3hkcUFvbThMcndxSjFpYTdfRV9OZGhTcksxbXFTZWg5MDZvOHpCVXBHcDJYaHlJM0tyNWRZckZsVHpQcmxTZHJoZUs1M3lfU2ljRnJaTmNSQ0w0X085OXI0QW80M2xfQnJqZmZ6VEh3TUltX0xzeE42SGtZPQ==
-APP_TOKEN_EXPIRY=300
-
-# CORS Configuration
-APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net
-
-# Logging configuration
-APP_LOGGING_LOG_LEVEL = DEBUG
-APP_LOGGING_LOG_DIR = D:/Athi/Local/Web/poweron/local/logs
-APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
-APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
-APP_LOGGING_CONSOLE_ENABLED = True
-APP_LOGGING_FILE_ENABLED = True
-APP_LOGGING_ROTATION_SIZE = 10485760
-APP_LOGGING_BACKUP_COUNT = 5
-
-# Service Redirects
-Service_MSFT_REDIRECT_URI = http://localhost:8000/api/msft/auth/callback
-Service_GOOGLE_REDIRECT_URI = http://localhost:8000/api/google/auth/callback
-
-# OpenAI configuration
-Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions
-Connector_AiOpenai_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEajBuZmtYTVdqLTBpQm9KZ2pCXzRCV3VhZzlYTEhKb1FqWXNrV3lyb25uZUN1WVVQUEY3dGYtejludV9MNGlKeVREanZGOGloV09mY2ttQ3k5SjBFOGFac2ZQTkNKNUZWVnRINVQyeWhsR2wyYnVrRDNzV2NqSHB0ajQ4UWtGeGZtbmR0Q3VvS0hDZlphVmpSc2Z6RG5nPT0=
-Connector_AiOpenai_MODEL_NAME = gpt-4o
-Connector_AiOpenai_TEMPERATURE = 0.2
-Connector_AiOpenai_MAX_TOKENS = 2000
-
-# Anthropic configuration
-Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages
-Connector_AiAnthropic_API_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpENmFBWG16STFQUVZxNzZZRzRLYTA4X3lRanF1VkF4cU45OExNMzlsQmdISGFxTUxud1dXODBKcFhMVG9KNjdWVnlTTFFROVc3NDlsdlNHLUJXeG41NDBHaXhHR0VHVWl5UW9RNkVWbmlhakRKVW5pM0R4VHk0LUw0TV9LdkljNHdBLXJua21NQkl2b3l4UkVkMGN1YjBrMmJEeWtMay1jbmxrYWJNbUV0aktCXzU1djR2d2RSQXZORTNwcG92ZUVvVGMtQzQzTTVncEZTRGRtZUFIZWQ0dz09
-Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
-Connector_AiAnthropic_TEMPERATURE = 0.2
-Connector_AiAnthropic_MAX_TOKENS = 2000
-
-# Perplexity AI configuration
-Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
-Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQm82Mzk2Q1MwZ0dNcUVBcUtuRDJIcTZkMXVvYnpjM3JEMzJiT1NKSHljX282ZDIyZTJYc09VSTdVNXAtOWU2UXp5S193NTk5dHJsWlFjRjhWektFOG1DVGY4ZUhHTXMzS0RPN1lNcF9nSlVWbW5BZ1hkZDVTejl6bVZNRFVvX29xamJidWRFMmtjQmkyRUQ2RUh6UTN1aWNPSUJBPT0=
-Connector_AiPerplexity_MODEL_NAME = sonar
-Connector_AiPerplexity_TEMPERATURE = 0.2
-Connector_AiPerplexity_MAX_TOKENS = 2000
-
-# Agent Mail configuration
-Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_CLIENT_SECRET = KDH8Q~H2OCtdvYy5yx6HOCYEbdnJCq90G21vTcPw
-Service_MSFT_TENANT_ID = common
-
-# Google Service configuration
-Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
-Service_GOOGLE_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETDJhbGVQMHlFQzNPVFI1ZzBMa3pNMGlQUHhaQm10eVl1bFlSeTBybzlTOWE2MURXQ0hkRlo0NlNGbHQxWEl1OVkxQnVKYlhhOXR1cUF4T3k0WDdscktkY1oyYllRTmdDTWpfbUdwWGtSd1JvNlYxeTBJdEtaaS1vYnItcW0yaFM=
-
-# Tavily Web Search configuration
-Connector_WebTavily_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQTdnUHMwd2pIaXNtMmtCTFREd0pyQXRKb1F5eGtHSnkyOGZiUnlBOFc0b3Vzcndrc3ViRm1nMDJIOEZKYWxqdWNkZGh5N0Z4R0JlQmxXSG5pVnJUR2VYckZhMWNMZ1FNeXJ3enJLVlpiblhOZTNleUg3ZzZyUzRZanFSeDlVMkI=
-
-# Google Cloud Speech Services configuration
-Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhIYTlyMkhuNjA4NlF4dk82U2NScHhTVGY3UG83NkhfX3RrcWVtWWcyLXRjU1dTT21zWEl6YWRMMUFndXpsUnJOeHh3QThsNDZKRXROTzdXRUdsT0JZajZJNVlfb0gtMXkwWm9DOERPVnpjU0pyUEZfOGJsUnprT3ltMVVhalUyUm9hMUFtZEtHUnJqOGZ4dEZjZm5SWVVTckVCWnY1UkdVSHVmUlgwbnAyc0xDQW84R3ViSko5OHVCVWZRUVNiaG1pVFB6X3EwS0FPd2dUYjhiSmRjcXh2WEZiXzI4SFZqT21tbDduUWRyVWdFZXpmcVM5ZDR0VWtzZnF5UER6cGwwS2JlLV9CSTZ0Z0IyQ1h0YW9TcmhRTXZEckp4bWhmTkt6UTNYMk4zVkpnbUJmaDIxZnoyR2dWTEYwTUFEV0w2eUdUUGpoZk9XRkt4RVF1Z1NPdUpBeTcyWV9PY1Ffd2s0ZEdVekxGekhoeEl4TmNqaXYtbUJuSVdycFducERWdWtZajZnX011Q2w4eE9VMTBqQ1ZxRmdScWhXY1E3WWhzX1JZcHhxam9FbDVPN3Q1MWtrMUZuTUg3LVFQVHp1T1hpQWNDMzEzekVJWk9ybl91YUVjSkFob1VaMi1ONEtuMnRSOEg1S3QybUMwbVZDejItajBLTjM2Zy1hNzZQMW5LLVVDVGdFWm5BZUxNeEFnUkZzU3dxV0lCUlc0LWo4b05GczVpOGZSV2ZxbFBwUml6OU5tYjdnTks3Y3hrVEZVTHlmc1NPdFh4WE5pWldEZklOQUxBbjBpMTlkX3FFQVJ6c2NSZGdzTThycE92VW82enZKamhiRGFnU25aZGlHZHhZd2lUUmhuTVptNjhoWVlJQkxIOEkzbzJNMjZCZFJyM25tdXBnQ2ZWaHV3b2p6UWJpdk9xUEhBc1dyTlNmeF9wbm5yYUhHV01UZnVXWDFlNzBkdXlWUWhvcmJpSmljbmE3LUpUZEg4VzRwZ2JVSjdYUm1sODViQXVxUzdGTmZFbVpiN2V1YW5XV3U4b2VRWmxldGVGVHZsSldoekhVLU9wZ2V0cGZIYkNqM2pXVGctQVAyUm4xTHhpd1VVLXFhcnVEV21Rby1hbTlqTl84TjVveHdYTExUVkhHQ0ltaTB2WXJnY1NQVE5PbWg3ejgySElYc1JSTlQ3NDlFUWR6STZVUjVqaXFRN200NF9LY1ljQ0R2UldlWUtKY1NQVnJ4QXRyYTBGSWVuenhyM0Z0cWtndTd1eG8xRzY5a2dNZ1hkQm5MV3BHVzA2N1QwUkd6WlRGYTZQOUhnVWQ2S0Y5U0s1dXFNVXh5Q2pLWVUxSUQ2MlR1ak52NmRIZ2hlYTk1SGZGWS1RV3hWVU9rR3d1Rk9MLS11REZXbzhqMHpsSm1HYW1jMUNLT29YOHZsRWNaLTVvOFpmT3l3MHVwaERTT0dNLWFjcGRYZ25qT2szTkVFUnRFR3JWYS1aNXFIRnMyalozTlQzNFF2NXJLVHVPVF9zdTF6ZjlkbzJ4RFc2ZENmNFFxZDZzTzhfMUl0bW96V0lPZkh1dXFYZlEteFBlSG84Si1FNS1TTi1OMkFnX2pOYW8xY3MxMVJnVC02MDUyaXZfMEVHWDQtVlRpcENmV0h3V0dCWEFRS2prQXdNRlQ5dnRFVHU0Q1dNTmh0SlBCaU55bFMydWM1TTFFLW96ODBnV3dNZHFZTWZhRURYSHlrdzF3RlRuWDBoQUhSOUJWemtRM3pxcDJFbGJoaTJ3ZktRTlJxbXltaHBoZXVJVDlxS3cxNWo2c0ZBV0NzaUstRWdsMW1xLXFkanZGYUFiU0tSLXFQa0tkcDFoMV9kak41ZjQ0R214UmtOR1ZBanRuemY3Mmw1SkZ5aDZodGIzT3N2aV85MW9kcld6c0g0ZDgtTWo3b3Y3VjJCRnR2U2tMVm9rUXNVRnVHbzZXVTZ6RmI2RkNmajBfMWVnODVFbnpkT0oyci15czJHU0p1cUowTGZJMzVnd3hIRjQyTVhKOGRkcFRKdVpyQ3Yzd01Jb1lSajFmV0paeEV0cjk1SmpmdWpDVFJMUmMtUFctOGhaTmlKQXNRVlVUNlhJemxudHZCR056SVlBb3NOTEYxRTRLaFlVd2d3TWtxVlB6ZEtQLTkxOGMyY3N0a2pYRFUweDBNaGhja2xSSklPOUZla1dKTWRNbG8tUGdSNEV5cW90OWlOZFlIUExBd3U2b2hyS1owbXVMM3p0Qm41cUtzWUxYNzB1N3JpUTNBSGdsT0NuamNTb1lIbXR4MG1sakNPVkxBUXRLVE1xX0YxWDhOcERIY1lTQVFqS01CaXZKNllFaXlIR0JsM1pKMmV1OUo3TGI1WkRaVnYxUTl1LTM0SU1qN1V1b0RCT0x0VHNLTmNLZnk1S0MxYnBBcm03WnVua0xqaEhGUzhOU253ZkppRzdudXBSVlMxeFVOSWxtZ1o2RVBSQUhEUEFuQ1hxSVZMME4yWUtaU3VyRGo3RkUyRUNjT0pNcE1BdE1ZRzdXVl8ydUtXZjdMdHdEVW4teHUtTi1HSGliLUxud21TX0NtcGVkRFBHNkZ1WTlNczR4OUJfUVluc1BoV09oWS1scUdsNnB5d1U5M1huX3k4QzAyNldtb2hybktYN2xKZ1NTNWFsaWwzV3pCRVhkaGR5eTNlV1d6ZzFfaFZTT0E4UjRpQ3pKdEZxUlJ6UFZXM3laUndyWEk2NlBXLUpoajVhZzVwQXpWVzUtVjVNZFBwdWdQa3AxZC1KdGdqNnhibjN4dmFYb2cxcEVwc1g5R09zRUdINUZtOE5QRjVUU0dpZy1QVl9odnFtVDNuWFZLSURtMXlSMlhRNTBWSVFJbEdOOWpfVWV0SmdRWDdlUXZZWE8xRUxDN1I0aEN6MHYwNzM1cmpJS0ZpMnBYWkxfb3FsbEV1VnlqWGxqdVJ6SHlwSjAzRlMycTBaQ295NXNnZERpUnJQcjhrUUd3bkI4bDVzRmxQblhkaFJPTTdISnVUQmhET3BOMTM4bjVvUEc2VmZhb2lrR1FyTUl2RWNEeGg0U0dsNnV6eU5zOUxiNDY5SXBxR0hBS00wOTgyWTFnWkQyaEtLVUloT3ZxZGh0RWVGRmJzenFsaUtfZENQM0JzdkVVeTdXR3hUSmJST1NBMUI1NkVFWncwNW5JZVVLX1p1RXdqVnFfQWpvQ08yQjZhN1NkTkpTSnUxOVRXZXE0WFEtZWxhZW1NNXYtQ2sya0VGLURmS01lMkctNVY3c2ZhN0ZGRFgwWHlabTFkeS1hcUZ1dDZ3cnpPQ3hha2IzVE11M0pqbklmU0diczBqTFBNZC1QZGp6VzNTSnJVSjJoWkJUQjVORG4tYUJmMEJtSUNUdVpEaGt6OTM3TjFOdVhXUHItZjRtZ25nU3NhZC1sVTVXNTRDTmxZbnlfeHNsdkpuMXhUYnE1MnpVQ0ZOclRWM1M4eHdXTzRXbFRZZVQtTS1iRVdXVWZMSGotcWg3MUxUYTFnSEEtanBCRHlZRUNIdGdpUFhsYjdYUndCZnRITzhMZVJ1dHFoVlVNb0duVjlxd0U4OGRuQVV3MG90R0hiYW5MWkxWVklzbWFRNzBfSUNrdzc5bVdtTXg0dExEYnRCaDI3c1I4TWFwLXZKR0wxSjRZYjZIV3ZqZjNqTWhFT0RGSDVMc1A1UzY2bDBiMGFSUy1fNVRQRzRJWDVydUpqb1ZfSHNVbldVeUN2YlAxSW5WVDdxVzJ1WHpLeUdmb0xWMDNHN05oQzY3YnhvUUdhS2xaOHNidkVvbTZtSHFlblhOYmwyR3NQdVJDRUdxREhWdF9ZcXhwUWxHc2hyLW5vUGhIUVhJNUNhY0hFU0ptVnI0TFVhZDE1TFBBUEstSkRoZWJ5MHJhUmZrR1ZrRlFtRGpxS1pOMmFMQjBsdjluY3FiYUU4eGJVVXlZVEpuNWdHVVhJMGtwaTdZR2NDbXd2eHpOQ09SeTV6N1BaVUpsR1pQVDBZcElJUUt6VnVpQmxSYnE4Y1BCWV9IRWdVV0p3enBGVHItdnBGN3NyNWFBWmkySnByWThsbDliSlExQmp3LVlBaDIyZXp6UnR6cU9rTzJmTDBlSVpON0tiWllMdm1oME1zTFl2S2ZYYllhQlY2VHNZRGtHUDY4U1lIVExLZTU4VzZxSTZrZHl1ZTBDc0g4SjI4WGYyZHV1bm9wQ3R2Z09ld1ZmUkN5alJGeHZKSHl1bWhQVXpNMzdjblpLcUhfSm02Qlh5S1FVN3lIcHl0NnlRPT0=
-
-# Feature SyncDelta JIRA configuration
-Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEbm0yRUJ6VUJKbUwyRW5kMnRaNW4wM2YxMkJUTXVXZUdmdVRCaUZIVHU2TTV2RWZLRmUtZkcwZE4yRUNlNDQ0aUJWYjNfdVg5YjV5c2JwMHhoUUYxZWdkeS11bXR0eGxRLWRVaVU3cUVQZWJlNDRtY1lWUDdqeDVFSlpXS0VFX21WajlRS3lHQjc0bS11akkybWV3QUFlR2hNWUNYLUdiRjZuN2dQODdDSExXWG1Dd2ZGclI2aUhlSWhETVZuY3hYdnhkb2c2LU1JTFBvWFpTNmZtMkNVOTZTejJwbDI2eGE0OS1xUlIwQnlCSmFxRFNCeVJNVzlOMDhTR1VUamx4RDRyV3p6Tk9qVHBrWWdySUM3TVRaYjd3N0JHMFhpdzFhZTNDLTFkRVQ2RVE4U19COXRhRWtNc0NVOHRqUS1CRDFpZ19xQmtFLU9YSDU3TXBZQXpVcld3PT0=
-
-# Debug Configuration
-APP_DEBUG_CHAT_WORKFLOW_ENABLED = True
-APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
\ No newline at end of file
diff --git a/env_int.20251012_121418.backup b/env_int.20251012_121418.backup
deleted file mode 100644
index 4a0f3e39..00000000
--- a/env_int.20251012_121418.backup
+++ /dev/null
@@ -1,90 +0,0 @@
-# Integration Environment Configuration
-
-# System Configuration
-APP_ENV_TYPE = int
-APP_ENV_LABEL = Integration Instance
-APP_API_URL = https://gateway-int.poweron-center.net
-APP_KEY_SYSVAR = CONFIG_KEY
-APP_INIT_PASS_ADMIN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjWm41MWZ4TUZGaVlrX3pWZWNwakJsY3Facm0wLVZDd1VKeTFoZEVZQnItcEdUUnVJS1NXeDBpM2xKbGRsYmxOSmRhc29PZjJSU2txQjdLbUVrTTE1NEJjUXBHbV9NOVJWZUR3QlJkQnJvTEU9
-APP_INIT_PASS_EVENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjdmtrakgxa0djekZVNGtTZV8wM2I5UUpCZllveVBMWXROYk5yS3BiV3JEelJSM09VYTRONHpnY3VtMGxDRk5JTEZSRFhtcDZ0RVRmZ1RicTFhb3c5dVZRQ1o4SmlkLVpPTW5MMTU2eTQ0Vkk9
-
-# PostgreSQL Storage (new)
-DB_APP_HOST=gateway-int-server.postgres.database.azure.com
-DB_APP_DATABASE=poweron_app
-DB_APP_USER=heeshkdlby
-DB_APP_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjb2dka2pnN0tUbW1EU0w1Rk1jNERKQ0Z1U3JkVDhuZWZDM0g5M0kwVDE5VHdubkZna3gtZVAxTnl4MDdrR1c1ZXJ3ejJHYkZvcGUwbHJaajBGOWJob0EzRXVHc0JnZkJyNGhHZTZHOXBxd2c9
-DB_APP_PORT=5432
-
-# PostgreSQL Storage (new)
-DB_CHAT_HOST=gateway-int-server.postgres.database.azure.com
-DB_CHAT_DATABASE=poweron_chat
-DB_CHAT_USER=heeshkdlby
-DB_CHAT_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjczYzOUtTa21MMGJVTUQ5UmFfdWc3YlhCbWZOeXFaNEE1QzdJV3BLVjhnalBkLVVCMm5BZzdxdlFXQXc2RHYzLWtPSFZkZE1iWG9rQ1NkVWlpRnF5TURVbnl1cm9iYXlSMGYxd1BGYVc0VDA9
-DB_CHAT_PORT=5432
-
-# PostgreSQL Storage (new)
-DB_MANAGEMENT_HOST=gateway-int-server.postgres.database.azure.com
-DB_MANAGEMENT_DATABASE=poweron_management
-DB_MANAGEMENT_USER=heeshkdlby
-DB_MANAGEMENT_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjTnJKNlJMNmEwQ0Y5dVNrR3pkZk9SQXVvLTRTNW9lQ1g3TTE5cFhBNTd5UENqWW9qdWd3NWNseWhnUHJveDJyd1Z3X1czS3VuZnAwZHBXYVNQWlZsRy12ME42NndEVlR5X3ZPdFBNNmhLYm89
-DB_MANAGEMENT_PORT=5432
-
-# Security Configuration
-APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZzTEp4aDR0MktWRjNoeVBrY1Npd1R0VE9YVHp3M2w1cXRzbUxNaU82QUJvaDNFeVQyN05KblRWblBvbWtoT0VXbkNBbDQ5OHhwSUFnaDZGRG10Vmgtdm1YUkRsYUhFMzRVZURmSFlDTFIzVWg4MXNueDZyMGc5aVpFdWRxY3dkTExGM093ZTVUZVl5LUhGWnlRPQ==
-APP_TOKEN_EXPIRY=300
-
-# CORS Configuration
-APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,https://playground-int.poweron-center.net,http://localhost:5176,https://nyla.poweron-center.net, https://nyla-int.poweron-center.net
-
-# Logging configuration
-APP_LOGGING_LOG_LEVEL = DEBUG
-APP_LOGGING_LOG_DIR = /home/site/wwwroot/
-APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
-APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
-APP_LOGGING_CONSOLE_ENABLED = True
-APP_LOGGING_FILE_ENABLED = True
-APP_LOGGING_ROTATION_SIZE = 10485760
-APP_LOGGING_BACKUP_COUNT = 5
-
-# Service Redirects
-Service_MSFT_REDIRECT_URI = https://gateway-int.poweron-center.net/api/msft/auth/callback
-Service_GOOGLE_REDIRECT_URI = https://gateway-int.poweron-center.net/api/google/auth/callback
-
-# OpenAI configuration
-Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions
-Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjSDBNYkptSkQxTUotYVVpZVNZc0dxNGNwSEtkOEE0T3RZWjROTEhSRlRXdlZmQUxxZ0w3Y0xOV2JNV19LNF9yTUZiU1pUNG15U2VDUDdSVlI4VlpnR3JXVFFtcXBaTEZiaUtSclVFd0lCZG1rWVhra1dfWTVQOTBEYUU0MjByYVNEMTFmeXNOcmpUT216MmJKdlVPeW5nPT0=
-Connector_AiOpenai_MODEL_NAME = gpt-4o
-Connector_AiOpenai_TEMPERATURE = 0.2
-Connector_AiOpenai_MAX_TOKENS = 2000
-
-# Anthropic configuration
-Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages
-Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjT1ZlRWVJdVZMT3ljSFJDcFdxRFBRVkZhS204NnN5RDBlQ0tpenhTM0FFVktuWW9mWHNwRWx2dHB0eDBSZ0JFQnZKWlp6c01pVGREWHd1eGpERnU0Q2xhaks1clQ1ZXVsdnd2ZzhpNXNQS1BhY3FjSkdkVEhHalNaRGR4emhpakZncnpDQUVxOHVXQzVUWmtQc0FsYmFwTF9TSG5FOUFtWk5Ick1NcHFvY2s1T1c2WXlRUFFJZnh6TWhuaVpMYmppcDR0QUx0a0R6RXlwbGRYb1R4dzJkUT09
-Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
-Connector_AiAnthropic_TEMPERATURE = 0.2
-Connector_AiAnthropic_MAX_TOKENS = 2000
-
-# Perplexity AI configuration
-Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
-Connector_AiPerplexity_API_SECRET = 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
-Connector_AiPerplexity_MODEL_NAME = sonar
-Connector_AiPerplexity_TEMPERATURE = 0.2
-Connector_AiPerplexity_MAX_TOKENS = 2000
-
-# Agent Mail configuration
-Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNzB2M3ZjaE1SVE9ON2FKam9yVURxcHl1Ym5VNVUtS0MyWUpNVXVlaWpWS2U3VVd3em9vQl9lcnVYay03bS04YjNBbDZZNTB4eUtjT3ppQjJjY3dOT0FNLW9LeDhIUU5iaTNqNURUWE5La3kzaHNGcU9yNVI0YjhWZTZRRFktcTk=
-Service_MSFT_TENANT_ID = common
-
-# Google Service configuration
-Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
-Service_GOOGLE_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
-
-# Tavily Web Search configuration
-Connector_WebTavily_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkdkJMTDY0akhXNzZDWHVYSEt1cDZoOWEzSktneHZEV2JndTNmWlNSMV9KbFNIZmQzeVlrNE5qUEIwcUlBSGM1a0hOZ3J6djIyOVhnZzI3M1dIUkdicl9FVXF3RGktMmlEYmhnaHJfWTdGUkktSXVUSGdQMC1vSEV6VE8zR2F1SVk=
-
-# Google Cloud Speech Services configuration
-Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2MXF0ZGJoWHBydF85bTczTktiaEJ3Wk1vMW1mZVhDSG1yd0ZxR2ZuSGJTX0N3MWptWXFJTkNTWjh1SUVVTXI4UDVzcGdLMkU5SHJ2TUpkRlRoRWdnSldtYjNTQkh4UDJHY2xmdTdZQ1ZiMTZZcGZxS3RzaHdjV3dtVkZUcEpJcWx0b2xuQVR6ZmpoVFZPY1hNMTV2SnhDaC1IZEh4UUpLTy1ILXA4RG1zamJTbUJ4X0t2M2NkdzJPbEJxSmFpRzV3WC0wZThoVzlxcmpHZ3ZkLVlVY3REZk1vV19WQ05BOWN6cnJ4MWNYYnNiQ0FQSUVnUlpfM3BhMnlsVlZUOG5wM3pzM1lSN1UzWlZKUXRLczlHbjI1LTFvSUJ4SlVXMy1BNk43bE5Hb0RfTTVlWk9oZnFIaVg0SW5pbm9EcXRTTzU1RFlYY3dTcnpKWWNyNjN5T1BGZ0FmX253cEFncmhvZVRuM05KYzhkOEhFMFJsc2NBSEwzZVZ1R0JMOGxsekVwUE55alZaRXFrdzNWWVNGWXNmbnhKeWhQSFo2VXBTUlRPeHdvdVdncEFuOWgydEtsSUFneUN6cGVaTnBSdjNCdVJseGJFdmlMc203UFhLVlYyTENkaGg2dVN6Z2xwT1ZmTmN5bVZGUkM3ZWcyVkt2ckFUVVd3WFFwYnJjNVRobEh2SkVJbXRwUUpEOFJKQ1NUc0Q4NHNqUFhPSDh5cTV6MEcwSDEwRUJCQ2JiTTJlOE5nd3pMMkJaQ1dVYjMwZVVWWnlETmp2dkZ3aXEtQ29WNkxZTFkzYUkxdTlQUU1OTnhWWU12YU9MVnJQa1d2ZjRtUlhneTNubEMxTmp1eUNPOThSMlB3Y1F0T2tCdFNsNFlKalZPV25yR2QycVBUb096RmZ1V0FTaGsxLV9FWDBmenBIOXpMdGpLcUc0TWRoY2hlMFhYTzlET1ZRekw0ZHNwUVBQdVJBX2h6Q2ZzWVZJWTNybTJiekp3WmhmWF9SUFBXQzlqUjctcVlHWWVMZWVQallzR0JGTVF0WmtnWlg1aTM1bFprNVExZXY5dnNvWF93UjhwbkJ3RzNXaVJ2d2RRU3JJVlBvaVh4eTlBRUtqWkJia3dJQVVBV2Nqdm9FUTRUVW1TaHp2ZUwxT0N2ZndxQ2Nka1RYWXF0LWxIWFE0dTFQcVhncFFPM0hFdUUtYlFnemx3WkF4bjA1aDFULUdrZlVZbEJtRGRCdjJyVkdJSXozd0I0dF9zbWhOeHFqRDA4T1NVaWR5cjBwSVgwbllPU294NjZGTnM1bFhIdGpNQUxFOENWd3FCbGpSRFRmRXotQnU0N2lCVEU5RGF6Qi10S2U2NGdadDlrRjZtVE5oZkw5ZWFjXzhCTmxXQzNFTFgxRXVYY3J3YkxnbnlBSm9PY3h4MlM1NVFQbVNDRW5Ld1dvNWMxSmdoTXJuaE1pT2VFeXYwWXBHZ29MZDVlN2lwUUNIeGNCVVdQVi1rRXdJMWFncUlPTXR0MmZVQ1l0d09mZTdzWGFBWUJMUFd3b0RSOU8zeER2UWpNdzAxS0ZJWnB5S3FJdU9wUDJnTTNwMWw3VFVqVXQ3ZGZnU1RkUktkc0NhUHJ0SGFxZ0lVWDEzYjNtU2JfMGNWM1Y0dHlCTzNESEdENC1jUWF5MVppRzR1QlBNSUJySjFfRi1ENHEwcmJ4S3hQUFpXVHA0TG9DZWdoUlo5WnNSM1lCZm1KbEs2ak1yUUU4Wk9JcVJGUkJwc0NvUkMyTjhoTWxtZmVQeDREZVRKZkhYN2duLVNTeGZzdFdBVnhEandJSXB5QjM0azF0ckI3Tk1wSzFhNGVOUVRrNjU0cG9JQ29pN09xOFkwR1lMTlktaGp4TktxdTVtTnNEcldsV2pEZm5nQWpJc2hxY0hjQnVSWUR5VVdaUXBHWUloTzFZUC1oNzJ4UjZ1dnpLcDJxWEZtQlNIMWkzZ0hXWXdKeC1iLXdZWVJhcU04VFlpMU5pd2ZIdTdCdkVWVFVBdmJuRk16bEFFQTh4alBrcTV2RzliT2hGdTVPOXlRMjFuZktiRTZIamQ1VFVqS0hRTXhxcU1mdkgyQ1NjQmZfcjl4c3NJd0RIeDVMZUFBbHJqdEJxWWl3aWdGUEQxR3ZnMkNGdVB4RUxkZi1xOVlFQXh1NjRfbkFEaEJ5TVZlUGFrWVhSTVRPeGxqNlJDTHNsRWRrei1pYjhnUmZrb3BvWkQ2QXBzYjFHNXZoWU1LSExhLWtlYlJTZlJmYUM5Y1Rhb1pkMVYyWTByM3NTS0VXMG1ybm1BTVN2QXRYaXZqX2dKSkZrajZSS2cyVlNOQnd5Y29zMlVyaWlNbTJEb3FuUFFtbWNTNVpZTktUenFZSl91cVFXZjRkQUZyYmtPczU2S1RKQ19ONGFOTHlwX2hOOEE1UHZEVjhnT0xxRjMxTEE4SHhRbmlmTkZwVXJBdlJDbU5oZS05SzI4QVhEWDZaN2ZiSlFwUGRXSnB5TE9MZV9ia3pYcmZVa1dicG5FMHRXUFZXMWJQVDAwOEdDQzJmZEl0ZDhUOEFpZXZWWXl5Q2xwSmFienNCMldlb2NKb2ZRYV9KbUdHRzNUcjU1VUFhMzk1a2J6dDVuNTl6NTdpM0hGa3k0UWVtbF9pdDVsQVp2cndDLUU5dnNYOF9CLS0ySXhBSFdCSnpqV010bllBb3U0cEZZYVF5R2tSNFM5NlRhdS1fb1NqbDBKMkw0V2N0VEZhNExtQlR3ckZ3cVlCeHVXdXJ6X0s4cEtsaG5rVUxCN2RRbHQxTmcyVFBqYUxyOHJzeFBXVUJaRHpXbUoxdHZzMFBzQk1UTUFvX1pGNFNMNDFvZWdTdEUtMUNKMXNIeVlvQk1CeEdpZVdmN0tsSDVZZHJXSGt5c2o2MHdwSTZIMVBhRzM1eU43Q2FtcVNidExxczNJeUx5U2RuUG5EeHpCTlg2SV9WNk1ET3BRNXFuc0pNWlVvZUYtY21oRGtJSmwxQ09QbHBUV3BuS3B5NE9RVkhfellqZjJUQ0diSV94QlhQWmdaaC1TRWxsMUVWSXB0aE1McFZDZDNwQUVKZ2t5cXRTXzlRZVJwN0pZSnJSV21XMlh0TzFRVEl0c2I4QjBxOGRCYkNxek04a011X1lrb2poQ3h2LUhKTGJiUlhneHp5QWFBcE5nMElkNTVzM3JGOWtUQ19wNVBTaVVHUHFDNFJnNXJaWDNBSkMwbi1WbTdtSnFySkhNQl9ZQjZrR2xDcXhTRExhMmNHcGlyWjR3ZU9SSjRZd1l4ZjVPeHNiYk53SW5SYnZPTzNkd1lnZmFseV9tQ3BxM3lNYVBHT0J0elJnMTByZ3VHemxta0tVQzZZRllmQ2VLZ1ZCNDhUUTc3LWNCZXBMekFwWW1fQkQ1NktzNGFMYUdYTU0xbXprY1FONUNlUHNMY3h2NFJMMmhNa3VNdzF4TVFWQk9odnJUMjFJMVd3Z2N6Sms5aEM2SWlWZFViZ0JWTEpUWWM5NmIzOS1oQmRqdkt1NUUycFlVcUxERUZGbnZqTUxIYnJmMDBHZDEzbnJsWEEzSUo3UmNPUDg1dnRUU1FzcWtjTWZwUG9zM0JTY3RqMDdST2UxcXFTM0d0bGkwdFhnMk5LaUlxNWx3V1pLaVlLUFJXZzBzVl9Ia1V1OHdYUEFWOU50UndycGtCdzM0Q0NQamp2VTNqbFBLaGhsbUk5dUI5MjU5OHVySk1oY0drUWtXUloyVVRvOWJmbUVYRzFVeWNQczh2NXJCeVppRlZiWDNJaDhOSmRmX2lURTNVS3NXQXFZT1QtUmdvMWJoVWYxU3lqUUJhbzEyX3I3TXhwbm9wc1FoQ1ZUTlNBRjMyQTBTY2tzbHZ3RFUtTjVxQ0o1QXRTVks2WENwMGZCRGstNU1jN3FhUFJCQThyaFhhMVRsbnlSRXNGRmt3Yk01X21ldmV3bTItWm1JaGpZQWZROEFtT1d1UUtPQlhYVVFqT2NxLUxQenJHX3JfMEdscDRiMXcyZ1ZmU3NFMzVoelZJaDlvT0ZoRGQ2bmtlM0M5ZHlCd2ZMbnRZRkZUWHVBUEx4czNfTmtMckh5eXZrZFBzOEItOGRYOEhsMzBhZ0xlOWFjZzgteVBsdnpPT1pYdUxnbFNXYnhKaVB6QUxVdUJCOFpvU2x2c1FHZV94MDBOVWJhYkxISkswc0U5UmdPWFJLXzZNYklHTjN1QzRKaldKdEVHb0pOU284N3c2LXZGMGVleEZ5NGZ6OGV1dm1tM0J0aTQ3VFlNOEJrdEh3PT0=
-
-# Feature SyncDelta JIRA configuration
-Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkTUNsWm4wX0p6eXFDZmJ4dFdHNEs1MV9MUzdrb3RzeC1jVWVYZ0REWHRyZkFiaGZLcUQtTXFBZzZkNzRmQ0gxbEhGbUNlVVFfR1JEQTc0aldkZkgyWnBOcjdlUlZxR0tDTEdKRExULXAyUEtsVmNTMkRKU1BJNnFiM0hlMXo4YndMcHlRMExtZDQ3Zm9vNFhMcEZCcHpBPT0=
diff --git a/env_int.20251013_140140.backup b/env_int.20251013_140140.backup
deleted file mode 100644
index 28188ffb..00000000
--- a/env_int.20251013_140140.backup
+++ /dev/null
@@ -1,94 +0,0 @@
-# Integration Environment Configuration
-
-# System Configuration
-APP_ENV_TYPE = int
-APP_ENV_LABEL = Integration Instance
-APP_API_URL = https://gateway-int.poweron-center.net
-APP_KEY_SYSVAR = CONFIG_KEY
-APP_INIT_PASS_ADMIN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjWm41MWZ4TUZGaVlrX3pWZWNwakJsY3Facm0wLVZDd1VKeTFoZEVZQnItcEdUUnVJS1NXeDBpM2xKbGRsYmxOSmRhc29PZjJSU2txQjdLbUVrTTE1NEJjUXBHbV9NOVJWZUR3QlJkQnJvTEU9
-APP_INIT_PASS_EVENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjdmtrakgxa0djekZVNGtTZV8wM2I5UUpCZllveVBMWXROYk5yS3BiV3JEelJSM09VYTRONHpnY3VtMGxDRk5JTEZSRFhtcDZ0RVRmZ1RicTFhb3c5dVZRQ1o4SmlkLVpPTW5MMTU2eTQ0Vkk9
-
-# PostgreSQL Storage (new)
-DB_APP_HOST=gateway-int-server.postgres.database.azure.com
-DB_APP_DATABASE=poweron_app
-DB_APP_USER=heeshkdlby
-DB_APP_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjb2dka2pnN0tUbW1EU0w1Rk1jNERKQ0Z1U3JkVDhuZWZDM0g5M0kwVDE5VHdubkZna3gtZVAxTnl4MDdrR1c1ZXJ3ejJHYkZvcGUwbHJaajBGOWJob0EzRXVHc0JnZkJyNGhHZTZHOXBxd2c9
-DB_APP_PORT=5432
-
-# PostgreSQL Storage (new)
-DB_CHAT_HOST=gateway-int-server.postgres.database.azure.com
-DB_CHAT_DATABASE=poweron_chat
-DB_CHAT_USER=heeshkdlby
-DB_CHAT_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjczYzOUtTa21MMGJVTUQ5UmFfdWc3YlhCbWZOeXFaNEE1QzdJV3BLVjhnalBkLVVCMm5BZzdxdlFXQXc2RHYzLWtPSFZkZE1iWG9rQ1NkVWlpRnF5TURVbnl1cm9iYXlSMGYxd1BGYVc0VDA9
-DB_CHAT_PORT=5432
-
-# PostgreSQL Storage (new)
-DB_MANAGEMENT_HOST=gateway-int-server.postgres.database.azure.com
-DB_MANAGEMENT_DATABASE=poweron_management
-DB_MANAGEMENT_USER=heeshkdlby
-DB_MANAGEMENT_PASSWORD_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjTnJKNlJMNmEwQ0Y5dVNrR3pkZk9SQXVvLTRTNW9lQ1g3TTE5cFhBNTd5UENqWW9qdWd3NWNseWhnUHJveDJyd1Z3X1czS3VuZnAwZHBXYVNQWlZsRy12ME42NndEVlR5X3ZPdFBNNmhLYm89
-DB_MANAGEMENT_PORT=5432
-
-# Security Configuration
-APP_JWT_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNUctb2RwU25iR3ZnanBOdHZhWUtIajZ1RnZzTEp4aDR0MktWRjNoeVBrY1Npd1R0VE9YVHp3M2w1cXRzbUxNaU82QUJvaDNFeVQyN05KblRWblBvbWtoT0VXbkNBbDQ5OHhwSUFnaDZGRG10Vmgtdm1YUkRsYUhFMzRVZURmSFlDTFIzVWg4MXNueDZyMGc5aVpFdWRxY3dkTExGM093ZTVUZVl5LUhGWnlRPQ==
-APP_TOKEN_EXPIRY=300
-
-# CORS Configuration
-APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,https://playground-int.poweron-center.net,http://localhost:5176,https://nyla.poweron-center.net, https://nyla-int.poweron-center.net
-
-# Logging configuration
-APP_LOGGING_LOG_LEVEL = DEBUG
-APP_LOGGING_LOG_DIR = /home/site/wwwroot/
-APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
-APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
-APP_LOGGING_CONSOLE_ENABLED = True
-APP_LOGGING_FILE_ENABLED = True
-APP_LOGGING_ROTATION_SIZE = 10485760
-APP_LOGGING_BACKUP_COUNT = 5
-
-# Service Redirects
-Service_MSFT_REDIRECT_URI = https://gateway-int.poweron-center.net/api/msft/auth/callback
-Service_GOOGLE_REDIRECT_URI = https://gateway-int.poweron-center.net/api/google/auth/callback
-
-# OpenAI configuration
-Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions
-Connector_AiOpenai_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjSDBNYkptSkQxTUotYVVpZVNZc0dxNGNwSEtkOEE0T3RZWjROTEhSRlRXdlZmQUxxZ0w3Y0xOV2JNV19LNF9yTUZiU1pUNG15U2VDUDdSVlI4VlpnR3JXVFFtcXBaTEZiaUtSclVFd0lCZG1rWVhra1dfWTVQOTBEYUU0MjByYVNEMTFmeXNOcmpUT216MmJKdlVPeW5nPT0=
-Connector_AiOpenai_MODEL_NAME = gpt-4o
-Connector_AiOpenai_TEMPERATURE = 0.2
-Connector_AiOpenai_MAX_TOKENS = 2000
-
-# Anthropic configuration
-Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages
-Connector_AiAnthropic_API_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjT1ZlRWVJdVZMT3ljSFJDcFdxRFBRVkZhS204NnN5RDBlQ0tpenhTM0FFVktuWW9mWHNwRWx2dHB0eDBSZ0JFQnZKWlp6c01pVGREWHd1eGpERnU0Q2xhaks1clQ1ZXVsdnd2ZzhpNXNQS1BhY3FjSkdkVEhHalNaRGR4emhpakZncnpDQUVxOHVXQzVUWmtQc0FsYmFwTF9TSG5FOUFtWk5Ick1NcHFvY2s1T1c2WXlRUFFJZnh6TWhuaVpMYmppcDR0QUx0a0R6RXlwbGRYb1R4dzJkUT09
-Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
-Connector_AiAnthropic_TEMPERATURE = 0.2
-Connector_AiAnthropic_MAX_TOKENS = 2000
-
-# Perplexity AI configuration
-Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
-Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQm82Mzk2UWZJdUFhSW8yc3RKc0tKRXphd0xWMkZOVlFpSGZ4SGhFWnk0cTF5VjlKQVZjdS1QSWdkS0pUSWw4OFU5MjUxdTVQel9aeWVIZTZ5TXRuVmFkZG0zWEdTOGdHMHpsTzI0TGlWYURKU1Q0VVpKTlhxUk5FTmN6SUJScDZ3ZldIaUJZcWpaQVRiSEpyQm9tRTNDWk9KTnZBPT0=
-Connector_AiPerplexity_MODEL_NAME = sonar
-Connector_AiPerplexity_TEMPERATURE = 0.2
-Connector_AiPerplexity_MAX_TOKENS = 2000
-
-# Agent Mail configuration
-Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_CLIENT_SECRET = KDH8Q~H2OCtdvYy5yx6HOCYEbdnJCq90G21vTcPw
-Service_MSFT_TENANT_ID = common
-
-# Google Service configuration
-Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
-Service_GOOGLE_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNThGeVRNd3hacThtRnE0bzlDa0JPUWQyaEd6QjlFckdsMGZjRlRfUks2bXV3aDdVRTF3LVRlZVY5WjVzSXV4ZGNnX002RDl3dkNYdGFzZkxVUW01My1wTHRCanVCLUozZEx4TlduQlB5MnpvNTR2SGlvbFl1YkhzTEtsSi1SOEo=
-
-# Tavily Web Search configuration
-Connector_WebTavily_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkdkJMTDY0akhXNzZDWHVYSEt1cDZoOWEzSktneHZEV2JndTNmWlNSMV9KbFNIZmQzeVlrNE5qUEIwcUlBSGM1a0hOZ3J6djIyOVhnZzI3M1dIUkdicl9FVXF3RGktMmlEYmhnaHJfWTdGUkktSXVUSGdQMC1vSEV6VE8zR2F1SVk=
-
-# Google Cloud Speech Services configuration
-Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2MXF0ZGJoWHBydF85bTczTktiaEJ3Wk1vMW1mZVhDSG1yd0ZxR2ZuSGJTX0N3MWptWXFJTkNTWjh1SUVVTXI4UDVzcGdLMkU5SHJ2TUpkRlRoRWdnSldtYjNTQkh4UDJHY2xmdTdZQ1ZiMTZZcGZxS3RzaHdjV3dtVkZUcEpJcWx0b2xuQVR6ZmpoVFZPY1hNMTV2SnhDaC1IZEh4UUpLTy1ILXA4RG1zamJTbUJ4X0t2M2NkdzJPbEJxSmFpRzV3WC0wZThoVzlxcmpHZ3ZkLVlVY3REZk1vV19WQ05BOWN6cnJ4MWNYYnNiQ0FQSUVnUlpfM3BhMnlsVlZUOG5wM3pzM1lSN1UzWlZKUXRLczlHbjI1LTFvSUJ4SlVXMy1BNk43bE5Hb0RfTTVlWk9oZnFIaVg0SW5pbm9EcXRTTzU1RFlYY3dTcnpKWWNyNjN5T1BGZ0FmX253cEFncmhvZVRuM05KYzhkOEhFMFJsc2NBSEwzZVZ1R0JMOGxsekVwUE55alZaRXFrdzNWWVNGWXNmbnhKeWhQSFo2VXBTUlRPeHdvdVdncEFuOWgydEtsSUFneUN6cGVaTnBSdjNCdVJseGJFdmlMc203UFhLVlYyTENkaGg2dVN6Z2xwT1ZmTmN5bVZGUkM3ZWcyVkt2ckFUVVd3WFFwYnJjNVRobEh2SkVJbXRwUUpEOFJKQ1NUc0Q4NHNqUFhPSDh5cTV6MEcwSDEwRUJCQ2JiTTJlOE5nd3pMMkJaQ1dVYjMwZVVWWnlETmp2dkZ3aXEtQ29WNkxZTFkzYUkxdTlQUU1OTnhWWU12YU9MVnJQa1d2ZjRtUlhneTNubEMxTmp1eUNPOThSMlB3Y1F0T2tCdFNsNFlKalZPV25yR2QycVBUb096RmZ1V0FTaGsxLV9FWDBmenBIOXpMdGpLcUc0TWRoY2hlMFhYTzlET1ZRekw0ZHNwUVBQdVJBX2h6Q2ZzWVZJWTNybTJiekp3WmhmWF9SUFBXQzlqUjctcVlHWWVMZWVQallzR0JGTVF0WmtnWlg1aTM1bFprNVExZXY5dnNvWF93UjhwbkJ3RzNXaVJ2d2RRU3JJVlBvaVh4eTlBRUtqWkJia3dJQVVBV2Nqdm9FUTRUVW1TaHp2ZUwxT0N2ZndxQ2Nka1RYWXF0LWxIWFE0dTFQcVhncFFPM0hFdUUtYlFnemx3WkF4bjA1aDFULUdrZlVZbEJtRGRCdjJyVkdJSXozd0I0dF9zbWhOeHFqRDA4T1NVaWR5cjBwSVgwbllPU294NjZGTnM1bFhIdGpNQUxFOENWd3FCbGpSRFRmRXotQnU0N2lCVEU5RGF6Qi10S2U2NGdadDlrRjZtVE5oZkw5ZWFjXzhCTmxXQzNFTFgxRXVYY3J3YkxnbnlBSm9PY3h4MlM1NVFQbVNDRW5Ld1dvNWMxSmdoTXJuaE1pT2VFeXYwWXBHZ29MZDVlN2lwUUNIeGNCVVdQVi1rRXdJMWFncUlPTXR0MmZVQ1l0d09mZTdzWGFBWUJMUFd3b0RSOU8zeER2UWpNdzAxS0ZJWnB5S3FJdU9wUDJnTTNwMWw3VFVqVXQ3ZGZnU1RkUktkc0NhUHJ0SGFxZ0lVWDEzYjNtU2JfMGNWM1Y0dHlCTzNESEdENC1jUWF5MVppRzR1QlBNSUJySjFfRi1ENHEwcmJ4S3hQUFpXVHA0TG9DZWdoUlo5WnNSM1lCZm1KbEs2ak1yUUU4Wk9JcVJGUkJwc0NvUkMyTjhoTWxtZmVQeDREZVRKZkhYN2duLVNTeGZzdFdBVnhEandJSXB5QjM0azF0ckI3Tk1wSzFhNGVOUVRrNjU0cG9JQ29pN09xOFkwR1lMTlktaGp4TktxdTVtTnNEcldsV2pEZm5nQWpJc2hxY0hjQnVSWUR5VVdaUXBHWUloTzFZUC1oNzJ4UjZ1dnpLcDJxWEZtQlNIMWkzZ0hXWXdKeC1iLXdZWVJhcU04VFlpMU5pd2ZIdTdCdkVWVFVBdmJuRk16bEFFQTh4alBrcTV2RzliT2hGdTVPOXlRMjFuZktiRTZIamQ1VFVqS0hRTXhxcU1mdkgyQ1NjQmZfcjl4c3NJd0RIeDVMZUFBbHJqdEJxWWl3aWdGUEQxR3ZnMkNGdVB4RUxkZi1xOVlFQXh1NjRfbkFEaEJ5TVZlUGFrWVhSTVRPeGxqNlJDTHNsRWRrei1pYjhnUmZrb3BvWkQ2QXBzYjFHNXZoWU1LSExhLWtlYlJTZlJmYUM5Y1Rhb1pkMVYyWTByM3NTS0VXMG1ybm1BTVN2QXRYaXZqX2dKSkZrajZSS2cyVlNOQnd5Y29zMlVyaWlNbTJEb3FuUFFtbWNTNVpZTktUenFZSl91cVFXZjRkQUZyYmtPczU2S1RKQ19ONGFOTHlwX2hOOEE1UHZEVjhnT0xxRjMxTEE4SHhRbmlmTkZwVXJBdlJDbU5oZS05SzI4QVhEWDZaN2ZiSlFwUGRXSnB5TE9MZV9ia3pYcmZVa1dicG5FMHRXUFZXMWJQVDAwOEdDQzJmZEl0ZDhUOEFpZXZWWXl5Q2xwSmFienNCMldlb2NKb2ZRYV9KbUdHRzNUcjU1VUFhMzk1a2J6dDVuNTl6NTdpM0hGa3k0UWVtbF9pdDVsQVp2cndDLUU5dnNYOF9CLS0ySXhBSFdCSnpqV010bllBb3U0cEZZYVF5R2tSNFM5NlRhdS1fb1NqbDBKMkw0V2N0VEZhNExtQlR3ckZ3cVlCeHVXdXJ6X0s4cEtsaG5rVUxCN2RRbHQxTmcyVFBqYUxyOHJzeFBXVUJaRHpXbUoxdHZzMFBzQk1UTUFvX1pGNFNMNDFvZWdTdEUtMUNKMXNIeVlvQk1CeEdpZVdmN0tsSDVZZHJXSGt5c2o2MHdwSTZIMVBhRzM1eU43Q2FtcVNidExxczNJeUx5U2RuUG5EeHpCTlg2SV9WNk1ET3BRNXFuc0pNWlVvZUYtY21oRGtJSmwxQ09QbHBUV3BuS3B5NE9RVkhfellqZjJUQ0diSV94QlhQWmdaaC1TRWxsMUVWSXB0aE1McFZDZDNwQUVKZ2t5cXRTXzlRZVJwN0pZSnJSV21XMlh0TzFRVEl0c2I4QjBxOGRCYkNxek04a011X1lrb2poQ3h2LUhKTGJiUlhneHp5QWFBcE5nMElkNTVzM3JGOWtUQ19wNVBTaVVHUHFDNFJnNXJaWDNBSkMwbi1WbTdtSnFySkhNQl9ZQjZrR2xDcXhTRExhMmNHcGlyWjR3ZU9SSjRZd1l4ZjVPeHNiYk53SW5SYnZPTzNkd1lnZmFseV9tQ3BxM3lNYVBHT0J0elJnMTByZ3VHemxta0tVQzZZRllmQ2VLZ1ZCNDhUUTc3LWNCZXBMekFwWW1fQkQ1NktzNGFMYUdYTU0xbXprY1FONUNlUHNMY3h2NFJMMmhNa3VNdzF4TVFWQk9odnJUMjFJMVd3Z2N6Sms5aEM2SWlWZFViZ0JWTEpUWWM5NmIzOS1oQmRqdkt1NUUycFlVcUxERUZGbnZqTUxIYnJmMDBHZDEzbnJsWEEzSUo3UmNPUDg1dnRUU1FzcWtjTWZwUG9zM0JTY3RqMDdST2UxcXFTM0d0bGkwdFhnMk5LaUlxNWx3V1pLaVlLUFJXZzBzVl9Ia1V1OHdYUEFWOU50UndycGtCdzM0Q0NQamp2VTNqbFBLaGhsbUk5dUI5MjU5OHVySk1oY0drUWtXUloyVVRvOWJmbUVYRzFVeWNQczh2NXJCeVppRlZiWDNJaDhOSmRmX2lURTNVS3NXQXFZT1QtUmdvMWJoVWYxU3lqUUJhbzEyX3I3TXhwbm9wc1FoQ1ZUTlNBRjMyQTBTY2tzbHZ3RFUtTjVxQ0o1QXRTVks2WENwMGZCRGstNU1jN3FhUFJCQThyaFhhMVRsbnlSRXNGRmt3Yk01X21ldmV3bTItWm1JaGpZQWZROEFtT1d1UUtPQlhYVVFqT2NxLUxQenJHX3JfMEdscDRiMXcyZ1ZmU3NFMzVoelZJaDlvT0ZoRGQ2bmtlM0M5ZHlCd2ZMbnRZRkZUWHVBUEx4czNfTmtMckh5eXZrZFBzOEItOGRYOEhsMzBhZ0xlOWFjZzgteVBsdnpPT1pYdUxnbFNXYnhKaVB6QUxVdUJCOFpvU2x2c1FHZV94MDBOVWJhYkxISkswc0U5UmdPWFJLXzZNYklHTjN1QzRKaldKdEVHb0pOU284N3c2LXZGMGVleEZ5NGZ6OGV1dm1tM0J0aTQ3VFlNOEJrdEh3PT0=
-
-# Feature SyncDelta JIRA configuration
-Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkTUNsWm4wX0p6eXFDZmJ4dFdHNEs1MV9MUzdrb3RzeC1jVWVYZ0REWHRyZkFiaGZLcUQtTXFBZzZkNzRmQ0gxbEhGbUNlVVFfR1JEQTc0aldkZkgyWnBOcjdlUlZxR0tDTEdKRExULXAyUEtsVmNTMkRKU1BJNnFiM0hlMXo4YndMcHlRMExtZDQ3Zm9vNFhMcEZCcHpBPT0=
-
-# Debug Configuration
-APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
-APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
\ No newline at end of file
diff --git a/env_prod.20251012_121418.backup b/env_prod.20251012_121418.backup
deleted file mode 100644
index c1ba8086..00000000
--- a/env_prod.20251012_121418.backup
+++ /dev/null
@@ -1,90 +0,0 @@
-# Production Environment Configuration
-
-# System Configuration
-APP_ENV_TYPE = prod
-APP_ENV_LABEL = Production Instance
-APP_API_URL = https://gateway.poweron-center.net
-APP_KEY_SYSVAR = CONFIG_KEY
-APP_INIT_PASS_ADMIN_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pSXoyVEVwNDZ6cmthQTROUkxGUjh1UWF2UU5zaWRuX3p2aHJCVFo2NEstR0RqdnQ5clZmeVliRlhHZGFHTlhZV2dzMmRPZFVEemVlSHd5VHR3cmpNUXRaRlhZSFZ6d1dsX2Y5Zl9lOXdYdEU9
-APP_INIT_PASS_EVENT_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5peGNMWExjWGZxQ2VndXVOSUVGcWhQTWd0N3d0blU3bGJvNjgzNVVNNktCQnZlTEtVckV5RUtQMjMwRTBkdmxEMlZwX0k1M1hlOFFNY3hjaWsyd2JmRGl2UWxfSXEwenVnQ3NmaTlxckp2VXM9
-
-# PostgreSQL Storage (new)
-DB_APP_HOST=gateway-prod-server.postgres.database.azure.com
-DB_APP_DATABASE=poweron_app
-DB_APP_USER=gzxxmcrdhn
-DB_APP_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pVmtwYWZQakdWZnJPamVlRWJPa0tnc3daSVVHejVrQ0x1VFZZbHhVSkk0S2tFWl92T2NwWURBMU9UbFROMHZ2TkNKZFlEWjhJZDZ0bnFndC1oYjhNRW1VLWpEYnlDNEJwcGVKckpUVlp6YTg9
-DB_APP_PORT=5432
-
-# PostgreSQL Storage (new)
-DB_CHAT_HOST=gateway-prod-server.postgres.database.azure.com
-DB_CHAT_DATABASE=poweron_chat
-DB_CHAT_USER=gzxxmcrdhn
-DB_CHAT_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pZVZnTzBPTDY1Q3c2U1pDV0lxbXhoWnlYSXRDWVhIeGJwSkdNMzMxR2h5a1FRN00xcWtYUE4ySGpqRllSaGM5SmRZZk9Bd2trVDJNZDdWcEFIbTJtel91MHpsazlTQnRsV2docGdBc0RVeEU9
-DB_CHAT_PORT=5432
-
-# PostgreSQL Storage (new)
-DB_MANAGEMENT_HOST=gateway-prod-server.postgres.database.azure.com
-DB_MANAGEMENT_DATABASE=poweron_management
-DB_MANAGEMENT_USER=gzxxmcrdhn
-DB_MANAGEMENT_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pQXdaRnVEQUx2MmU5ck9XZzNfaGVoRXlYMlVjSVM5dWNTekhmR2VYNkd6WVhELUlkLWdFWWRWQ1JJLWZ4WUNwclZVRlg3ZHBCS0xwM1laNklTaEs1czFDRTMxYlV2TWNueEJlTHFyNEt4aVk9
-DB_MANAGEMENT_PORT=5432
-
-# Security Configuration
-APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pY3JfX1R3cEJhTjAzZGx2amtRSE4yVzZhMmY3a3FHam9BdzBxVWd5R0FRSW1KbmNGS3JDMktKTWptZm4wYmZZZTVDQkh3NVlxSW1MZEdiVWdORng4dm0xV08wZDh0YlBNQTdEbmlnVWduMzNWY1RPX1BqaGtnOTc2ZWNBTnNnd1AtaTNRUExpRThVdzNmdVFHM2hkTjFjcW0ya2szMWNaT3VDeDhXMlJ1NDM4PQ==
-APP_TOKEN_EXPIRY=300
-
-# CORS Configuration
-APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,https://playground-int.poweron-center.net,http://localhost:5176,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
-
-# Logging configuration
-APP_LOGGING_LOG_LEVEL = DEBUG
-APP_LOGGING_LOG_DIR = /home/site/wwwroot/
-APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
-APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
-APP_LOGGING_CONSOLE_ENABLED = True
-APP_LOGGING_FILE_ENABLED = True
-APP_LOGGING_ROTATION_SIZE = 10485760
-APP_LOGGING_BACKUP_COUNT = 5
-
-# Service Redirects
-Service_MSFT_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/msft/auth/callback
-Service_GOOGLE_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/google/auth/callback
-
-# OpenAI configuration
-Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions
-Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pU05XM2hMaExPMnpYeFpwRVhyYl9JZmRITmlmRDlWOUJSSWE4NTFLZUptSkJhNlEycHBLZmh3WFA2ZmU5VmxHZks1UUNVOUZnckZNdXZ2MTY2dFg1Nl8yWDRrcTRlT0tHYkhyRGZINTEzU25iYVFRMzJGeUZIdlc4LU9GbmpQYmtmU3lJT2VVZ1UzLVd3R25ZQ092SUVnPT0=
-Connector_AiOpenai_MODEL_NAME = gpt-4o
-Connector_AiOpenai_TEMPERATURE = 0.2
-Connector_AiOpenai_MAX_TOKENS = 2000
-
-# Anthropic configuration
-Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages
-Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pNTA1RkZ3UllCOXVsNVZzbkw2Rkl1TWxCZ0wwWEVXUm9ReUhBcVl1cGFUdW9FRVh4elVxR0x3NVRxZkc4SkxHVFdzSU1YNG5Rb0FqSHJhdElwWm1iLWdubTVDcUl3UkVjVHNoU0xLa0ZTSFlfTlJUVXg4cVVwUWdlVDBTSFU5SnBzS0ZnVjlQcmtiNzV2UTNMck1IakZ0OWlubUtlWDZnMk4yX2JsZ1U4Wm1yT29fM2d2NVBNOWNBbWtTRWNyQ2tZNjhwSVF6bG5SU3dTenR2MzA3Z19NUT09
-Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
-Connector_AiAnthropic_TEMPERATURE = 0.2
-Connector_AiAnthropic_MAX_TOKENS = 2000
-
-# Perplexity AI configuration
-Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
-Connector_AiPerplexity_API_SECRET = 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
-Connector_AiPerplexity_MODEL_NAME = sonar
-Connector_AiPerplexity_TEMPERATURE = 0.2
-Connector_AiPerplexity_MAX_TOKENS = 2000
-
-# Agent Mail configuration
-Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pVEhHdlZHU3FNMmhuRGVwaGc3YzIxSjlZNzBCQjlOV2pSYVNXb0t1ZnVwQzZsQzY4cHMtVlZtNF85OEVaV1BMTzdXMmpzaGZpaG1DalJ0bkNPMHA5ZUcwZjNDdGk1TFdxYTJSZnVrVmhhZ2VRUEZxbjJOOGFhWk9EYlY3dmRVTnI=
-Service_MSFT_TENANT_ID = common
-
-# Google Service configuration
-Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
-Service_GOOGLE_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pV2JEV0lNUXhwa1VTUGh2RWcyYnJHSFQyTmdBOEhwRkJWc3MwOFZlcHJGUmlGOVVFbG1XalNyUXVuaExESy1xeFNIQlRiSFVIWTB6Rm1fNFg0OHZZSkF4ZlBIcFZDMjZHcFRERXJ0WlVFclhHa29Za1BqWGxsM05NZGFRc1BLZnE=
-
-# Tavily Web Search configuration
-Connector_WebTavily_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pMjhJNS1CZFJubUlkN3ZrTUoxR0Y1QzJFWEJSMk0wQkI0UndqOW1UelVieWhGaTVBcHoxRXo1VjRzVVRROHFIeHMyS3Q5cDZCeUlEMzE1ZlhVTmNveFk5VmFQMm80NTRyVW1TZHVsR3dUN0RtMnd4LW1VWlpqOXJPeXZBTmg4OEM=
-
-# Google Cloud Speech Services configuration
-Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pNjlJdmFMeERXUUQzR0duRUY4cGRZRzdwQlpnVFAzSzQ5cHZNRnVUZ0xWd3dQMHR3QjVsdF92NmdUQlJGRk1RcG1RYWZzcE9RbEhjQmR5Yk5Ud3ZKTW5jbmpEVGJ2ZkxVeVJpcUxaT2lNREFXaks5WHg5aVlHcXlUZldMdnZGYklHWjlJOWJ6Wm5RSkNmdm5feENjS1E0QUVXTTE5SW5sNFBEeTJ1RjRmVm9SQUNIYmF2U1U2dklsbTVlWFpCcHMwTFF1SUg5NmNfcWhQRFlpeWt0U19HMXNuUHd2RFdrVl9XdUFaY0hWdVBPYWlybU1CdGlCN1A0RzZBbi1IUVJ1TWMxTE9Ea09sTURhcDFZb1JIUW1zUFJybW15MDcxOUtfVXA2N0xwMnFrczA1YTJaN05pRHhOYWNzMjVmUHdhbVdlemF3TEIzN0pJaVo3bGJBMXJnZmNYTXVJVDdmYkRXWTlBT2F2NmN4eTlteUI1SlJTOXc2WWFWUTBCZTJBVHRLVDhEVjBFeHE0Nmk1YkxYd3N3RXgtVUdGdlZFSmk4dHM0QjFmbktsQTctbmJMT0MtMDlKS1pUR0pELXBxckhULUUycjlBZmVJQjFrM0xEUm50U2ZabExtVjZ1WWZ1WnlobUZIOVlndjNydUZfczJUWVVRZURTd1lYazllaER4VU10cXUyVS1ZNG9Ha2hnbTAzOEpGMklFSWpWeVV5eFB2UlVWYmJJakZnOVM2R2lJSXRSM3VzVEZZNUVpNmVjRzdXRUJsT2hzcjhZWERFeGV5c1dFQVM3dkhGY2Q3ckNBRDZCcVdhZnZkdzM3QVNpODZYWE81TEIyZGUycldkSVRvbm5hR3Jib2UzOEtXdUpHQ2FyWDQtMDdQbC1ycEdfUzdXd0U2dHFIVjhoRDJ0YkNsWUpva1dzOGNPdXRpZjVwUldtT3FVN3RrZUhTN3JfX1M3LU9PaXZELWkzRmtMbjgxZGZ6ZjVJNW9RZW1nM2hqUXo4Z2I5Z2tSVTVMdUNLblRxOGQ1Y3F4SGZIbWo4YkFBV3FIbjB6LUxGNHdsQWgxQUM4bzVrblBObFFfVWNaQ3QwejQ1eGFlSXVIcXlyVEZEdzVKNV9pd2o4RW1UVjlqb3VMWnF0V1JTcWF1R0RjdUNjM2lLUHRqZDl2WWtXUnhmbVdxeHA3REFHTkdkMjM4LTllajBWQnd3RHlFSVdiUThfQnduOVFJdmR6OUVGN1lOYjBqclhadHozX21kRzlUT2EtWVBkYWFRSjRGdW80dmlEUTVrVjhWbjJYNGtCeGNtNzRHQXJsRlZyWjBYdHltVDM2MV9IT0RFT2dLLTVBREtsS09HdUxrODRLcEQ1TmRoVDh6WmgybGc5MzgtbmJSYThQd3FFaUcxbmg3eE95RkJVX2hHM20wT1k2c21qd24wSkFWNGROaklQeHZrc21PdTVsdHVxR0pxd3Ztb1NQVHEtd25URHRNa1pqa3BLdVdkTnNFeDNManJST0dOb1RWM2hqekxFTlFSZkd6TlZBY1VQT1NFOVlDQzlPQWVlVXQ4MW0wdGkzd0Myam1lSWE2aEtVVTVNc3N3dENpa1BWRl9ZQ3daYllONWRmRUF0THpleFRmdWRqTFM2aldmLUFuZzFGdkFQNHR6d21SdzRGQ0Q4cU8yV0xGUTVUY01TZlYxSzZ4cmtfUGZvVDhmYmNBX1pibTVTcl9lenJoME9KSnBucUxPRU1PRXBmLWFENEgwRWZOU0RvRDlvQk9ueVp0dXJrUVgtQUk5VldVbV9MS19PYmlua3liWl80Z2hMcFRnTXBkZDA3enIxRWFzaU56TEZKa0hPQUtNY0dCY1pnQ2V3Zml6ZFczWFBESUlLd3BSVEs5ZXlGLUpINDRsd1NBVjBkR1dvbE8wLWZBeEhFQ0hvY3E5UGJsTDdteGdSRjBIZTRobXpsd29PMmhKQkxXY3Znd2FMdWtZU1VkQlVRZXlSZ3FaVnNqcXpwR3N3SktOTDA3aUZIcE9TR1VDcXdaTDhQX2E5VDlwckoyX0xlNmFQcnoydEkwc0s1S08yaVlsM0pwYktUVWl3LU5hQzF2UVZNSm9ZR3QyQWdrUXB2a25QNzhkVEFOYmZ0b1BmTXRCMmVQZTAtYzdOeUlBYlNINlZNZW1nUTFfSV92UlJiWGt6Qms1c1hBc3kzZkVRMzEwNVJDOS1JeVg4YWtVeUJyOTZPQ0FnSUs1Z25sMlY0S1V1c0dIWEpuX2pMQmZ4Z29SY1U0bVZscXNWcjJwRy1UZEFYSXBzQURGblRTelBybU5BeDF6N3hZLXZwSHBkMmlzbHZWN2JkU3hRcE0zQ0hna3QwYWlJX3hBdGcxUHdGRE55cndUNHRvbXU5VTRMRmZDRjhvXzIwajI1Y0RCcmR2OV94cS1XYkNwalNHS2lObHlkNGZBbklycnZMSlJYVnlfakRXb1ZfWUo2MGxzYUNIektYeENGTkUzMUJXRE9WRHRrY2o5UFJHckZza2RQbjNPUkstbG9GZG4yNmxKeEdtbHo4WDZFc0lvT01wZkxuN29ycXl3X1hTN1prRGdvWG9hRFYwNzBwVVpuMW0wQlZYbGZxZjFQUHp2XzBQT3Fqa3lzejVKZmJDMG0wRzhqWV9HY1dxaXB2VFNQUzV2LUJSOXRFRUllak83cUI3RGUtYVBJakF1YUVOV0otT1BxUHJqS0NLdFVHc0tsT2RGcWd6UTU4Yi1kc0JZS1VPT1NXSlc3TDM5ZDVEZlRDOURZU1hMT0YxZ25ndVBUaG1VcGsxWFZSS1RxT1ZZTU1vclZjVU5iYmZMd0VBTXlvdTE0YjdoclZ6ZnNKMmE2Yy1ORmNCMnJNX3dwcVJSN2RSd2d6aENLRXQyTjhkcDlLTFVZMHBydFowNTJoZm1mVHNRVHI1YjhTNnl1Vll4dFZhenZfa0dybk9KYVh6LUluSUo0djUzRFNEdzBoVGt5UU9tMlg5UnBLbk9WaEhoU2txY2tUSXJmemlmNEExb3Q1blI5bE9adHluWVI3NXZQNUtXdmpra05aNy15dTBXdlVqcXhteFVqSXFxNnlQR2FGeVNONkx3NVpQUk1FNk5yTUY4T1hQV1FCdm9PYzdFTGl4QXZkODltSlprbGJ6cWREcEM1VlNwN3V5aWdWYXNkekk4X3U0cjJjZ1k2X190cmNnMlpMQVlLdExxM3pFNkZudVFKci1CalE1U3kzdmotQ01LV0ZzWnp0VUxRblhkdlN6VG1MWHNQdGlrNmF4RnFtd0c3UXNqZFVRZTRFMGl1NFU5T2k3VEpjZXA1U052VkJtdUhDWEpTaDRGQnM0SDQwY2IxdDVNbUtELTQ0R0s0OHpfTHdFOHZ0VmRMTC1FUVpPSkJ4QXRWNnl5MURUdjVyUk53emRwbDBxUnloUmlheXhKY3RBUG1mX3JxM2w0VlZvcE40b2ROeG15NS01RFlvUHdoYllLNVhCZUNEd0dwQnFCLVdZU0RhVEFzR2gxTVpub3FGRnl4VDNiSVZrTnpMQUlxeGJGQzh5WlNZR2NKbklHRVRTaVJ2REduN0hXaGo5MHFGb1FOa0U5TUFwQ09zOXVWMnRRNVlJWmZpaTUxLWFIeWR0UEFtaVNDX1k5Q1p3Y2V4ckVXQVBRYzV1eGwwMWd0SE15WUxiYzUyLTUzTGlyTUhZUDFlRTFjcFpieWQwU0pxRWJXSE53Nkd5aHp5T28wZVd6Z1phLTQ4TmgxU3hvNHpySzExUk5WZlFFS3VpOXNHMDdZU0gzSGxYUlU4WmgwNUlPdlhQcUI0cGtITmQ4SlByczN0THUxNHc0a21vUEp6S1hLNnFRNmFfdlpmUWpJQ1VNYXVEOW1abzlsd2RoRG5pVXRVbjBKV2RFTGFEa3ZYTHByOTJjalc1b3hTWkFmS2RPdVlTUTVkRkpSTnZsMWtnYWZEUm1SR3lBemdON2xiN3pkZlNfX2NSYU5wWHNybHh4V0lnNHJjQ2NON1hiRHMycUdmNC1kay13bUE0OTBPN0xmNDA1NlQxVmRySEJvM1VUN2Y2Sl9KX2pZVHRPWEdfR2RYNUoxY01Va3pXb2VBd3lZb3BSXzU5NVJfWlhEYXFSVDJrUnFHWG42RVZJUVQ2RlJWUEkyQnRnREI3eHNiRERiQ3FUczJsRTBDZ3pUUGZPcjExZUFKc21QUWxVYVBmV2hPZXRGd3lJX3ZTczhCVG1jWFVwanhIZHlyTTdiR2c5cTBVSXBRV1U4ZExtWWdub1pTSHU0cU5aYWJVWmExbXI0MjE3WUVnPT0=
-
-# Feature SyncDelta JIRA configuration
-Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pTDhnTVNzRUhScU8wYnZsZk52bHFkSWxLc18xQmtCeC1HbnNwTzVBbXRNTmQzRjZYaGE2MVlCNGtnWDk1T2I5VXVKNHpKU1VRbXEyN2tRWUJnU2ltZE5qZ3lmNEF6Z1hMTTEwZkk2NUNBYjhmVTJEcWpRUW9HNEVpSGFWdjBWQXQ3eUtHUTFJS3U5QWpaeno0RFNhMUxnPT0=
diff --git a/env_prod.20251013_140140.backup b/env_prod.20251013_140140.backup
deleted file mode 100644
index a0583019..00000000
--- a/env_prod.20251013_140140.backup
+++ /dev/null
@@ -1,94 +0,0 @@
-# Production Environment Configuration
-
-# System Configuration
-APP_ENV_TYPE = prod
-APP_ENV_LABEL = Production Instance
-APP_API_URL = https://gateway.poweron-center.net
-APP_KEY_SYSVAR = CONFIG_KEY
-APP_INIT_PASS_ADMIN_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pSXoyVEVwNDZ6cmthQTROUkxGUjh1UWF2UU5zaWRuX3p2aHJCVFo2NEstR0RqdnQ5clZmeVliRlhHZGFHTlhZV2dzMmRPZFVEemVlSHd5VHR3cmpNUXRaRlhZSFZ6d1dsX2Y5Zl9lOXdYdEU9
-APP_INIT_PASS_EVENT_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5peGNMWExjWGZxQ2VndXVOSUVGcWhQTWd0N3d0blU3bGJvNjgzNVVNNktCQnZlTEtVckV5RUtQMjMwRTBkdmxEMlZwX0k1M1hlOFFNY3hjaWsyd2JmRGl2UWxfSXEwenVnQ3NmaTlxckp2VXM9
-
-# PostgreSQL Storage (new)
-DB_APP_HOST=gateway-prod-server.postgres.database.azure.com
-DB_APP_DATABASE=poweron_app
-DB_APP_USER=gzxxmcrdhn
-DB_APP_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pVmtwYWZQakdWZnJPamVlRWJPa0tnc3daSVVHejVrQ0x1VFZZbHhVSkk0S2tFWl92T2NwWURBMU9UbFROMHZ2TkNKZFlEWjhJZDZ0bnFndC1oYjhNRW1VLWpEYnlDNEJwcGVKckpUVlp6YTg9
-DB_APP_PORT=5432
-
-# PostgreSQL Storage (new)
-DB_CHAT_HOST=gateway-prod-server.postgres.database.azure.com
-DB_CHAT_DATABASE=poweron_chat
-DB_CHAT_USER=gzxxmcrdhn
-DB_CHAT_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pZVZnTzBPTDY1Q3c2U1pDV0lxbXhoWnlYSXRDWVhIeGJwSkdNMzMxR2h5a1FRN00xcWtYUE4ySGpqRllSaGM5SmRZZk9Bd2trVDJNZDdWcEFIbTJtel91MHpsazlTQnRsV2docGdBc0RVeEU9
-DB_CHAT_PORT=5432
-
-# PostgreSQL Storage (new)
-DB_MANAGEMENT_HOST=gateway-prod-server.postgres.database.azure.com
-DB_MANAGEMENT_DATABASE=poweron_management
-DB_MANAGEMENT_USER=gzxxmcrdhn
-DB_MANAGEMENT_PASSWORD_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pQXdaRnVEQUx2MmU5ck9XZzNfaGVoRXlYMlVjSVM5dWNTekhmR2VYNkd6WVhELUlkLWdFWWRWQ1JJLWZ4WUNwclZVRlg3ZHBCS0xwM1laNklTaEs1czFDRTMxYlV2TWNueEJlTHFyNEt4aVk9
-DB_MANAGEMENT_PORT=5432
-
-# Security Configuration
-APP_JWT_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pY3JfX1R3cEJhTjAzZGx2amtRSE4yVzZhMmY3a3FHam9BdzBxVWd5R0FRSW1KbmNGS3JDMktKTWptZm4wYmZZZTVDQkh3NVlxSW1MZEdiVWdORng4dm0xV08wZDh0YlBNQTdEbmlnVWduMzNWY1RPX1BqaGtnOTc2ZWNBTnNnd1AtaTNRUExpRThVdzNmdVFHM2hkTjFjcW0ya2szMWNaT3VDeDhXMlJ1NDM4PQ==
-APP_TOKEN_EXPIRY=300
-
-# CORS Configuration
-APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,https://playground-int.poweron-center.net,http://localhost:5176,https://nyla.poweron-center.net,https://nyla-int.poweron-center.net
-
-# Logging configuration
-APP_LOGGING_LOG_LEVEL = DEBUG
-APP_LOGGING_LOG_DIR = /home/site/wwwroot/
-APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
-APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
-APP_LOGGING_CONSOLE_ENABLED = True
-APP_LOGGING_FILE_ENABLED = True
-APP_LOGGING_ROTATION_SIZE = 10485760
-APP_LOGGING_BACKUP_COUNT = 5
-
-# Service Redirects
-Service_MSFT_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/msft/auth/callback
-Service_GOOGLE_REDIRECT_URI = https://gateway-prod.poweron-center.net/api/google/auth/callback
-
-# OpenAI configuration
-Connector_AiOpenai_API_URL = https://api.openai.com/v1/chat/completions
-Connector_AiOpenai_API_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pU05XM2hMaExPMnpYeFpwRVhyYl9JZmRITmlmRDlWOUJSSWE4NTFLZUptSkJhNlEycHBLZmh3WFA2ZmU5VmxHZks1UUNVOUZnckZNdXZ2MTY2dFg1Nl8yWDRrcTRlT0tHYkhyRGZINTEzU25iYVFRMzJGeUZIdlc4LU9GbmpQYmtmU3lJT2VVZ1UzLVd3R25ZQ092SUVnPT0=
-Connector_AiOpenai_MODEL_NAME = gpt-4o
-Connector_AiOpenai_TEMPERATURE = 0.2
-Connector_AiOpenai_MAX_TOKENS = 2000
-
-# Anthropic configuration
-Connector_AiAnthropic_API_URL = https://api.anthropic.com/v1/messages
-Connector_AiAnthropic_API_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pNTA1RkZ3UllCOXVsNVZzbkw2Rkl1TWxCZ0wwWEVXUm9ReUhBcVl1cGFUdW9FRVh4elVxR0x3NVRxZkc4SkxHVFdzSU1YNG5Rb0FqSHJhdElwWm1iLWdubTVDcUl3UkVjVHNoU0xLa0ZTSFlfTlJUVXg4cVVwUWdlVDBTSFU5SnBzS0ZnVjlQcmtiNzV2UTNMck1IakZ0OWlubUtlWDZnMk4yX2JsZ1U4Wm1yT29fM2d2NVBNOWNBbWtTRWNyQ2tZNjhwSVF6bG5SU3dTenR2MzA3Z19NUT09
-Connector_AiAnthropic_MODEL_NAME = claude-3-5-sonnet-20241022
-Connector_AiAnthropic_TEMPERATURE = 0.2
-Connector_AiAnthropic_MAX_TOKENS = 2000
-
-# Perplexity AI configuration
-Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
-Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQm82Mzk2Q1FGRkJEUkI4LXlQbHYzT2RkdVJEcmM4WGdZTWpJTEhoeUF1NW5LUVpJdDBYN3k1WFN4a2FQSWJSQmd0U0xJbzZDTmFFN05FcXl0Z3V1OEpsZjYydV94TXVjVjVXRTRYSWdLMkd5XzZIbFV6emRCZHpuOUpQeThadE5xcDNDVGV1RHJrUEN0c1BBYXctZFNWcFRuVXhRPT0=
-Connector_AiPerplexity_MODEL_NAME = sonar
-Connector_AiPerplexity_TEMPERATURE = 0.2
-Connector_AiPerplexity_MAX_TOKENS = 2000
-
-# Agent Mail configuration
-Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_CLIENT_SECRET = KDH8Q~H2OCtdvYy5yx6HOCYEbdnJCq90G21vTcPw
-Service_MSFT_TENANT_ID = common
-
-# Google Service configuration
-Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
-Service_GOOGLE_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pV2JEV0lNUXhwa1VTUGh2RWcyYnJHSFQyTmdBOEhwRkJWc3MwOFZlcHJGUmlGOVVFbG1XalNyUXVuaExESy1xeFNIQlRiSFVIWTB6Rm1fNFg0OHZZSkF4ZlBIcFZDMjZHcFRERXJ0WlVFclhHa29Za1BqWGxsM05NZGFRc1BLZnE=
-
-# Tavily Web Search configuration
-Connector_WebTavily_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pMjhJNS1CZFJubUlkN3ZrTUoxR0Y1QzJFWEJSMk0wQkI0UndqOW1UelVieWhGaTVBcHoxRXo1VjRzVVRROHFIeHMyS3Q5cDZCeUlEMzE1ZlhVTmNveFk5VmFQMm80NTRyVW1TZHVsR3dUN0RtMnd4LW1VWlpqOXJPeXZBTmg4OEM=
-
-# Google Cloud Speech Services configuration
-Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pNjlJdmFMeERXUUQzR0duRUY4cGRZRzdwQlpnVFAzSzQ5cHZNRnVUZ0xWd3dQMHR3QjVsdF92NmdUQlJGRk1RcG1RYWZzcE9RbEhjQmR5Yk5Ud3ZKTW5jbmpEVGJ2ZkxVeVJpcUxaT2lNREFXaks5WHg5aVlHcXlUZldMdnZGYklHWjlJOWJ6Wm5RSkNmdm5feENjS1E0QUVXTTE5SW5sNFBEeTJ1RjRmVm9SQUNIYmF2U1U2dklsbTVlWFpCcHMwTFF1SUg5NmNfcWhQRFlpeWt0U19HMXNuUHd2RFdrVl9XdUFaY0hWdVBPYWlybU1CdGlCN1A0RzZBbi1IUVJ1TWMxTE9Ea09sTURhcDFZb1JIUW1zUFJybW15MDcxOUtfVXA2N0xwMnFrczA1YTJaN05pRHhOYWNzMjVmUHdhbVdlemF3TEIzN0pJaVo3bGJBMXJnZmNYTXVJVDdmYkRXWTlBT2F2NmN4eTlteUI1SlJTOXc2WWFWUTBCZTJBVHRLVDhEVjBFeHE0Nmk1YkxYd3N3RXgtVUdGdlZFSmk4dHM0QjFmbktsQTctbmJMT0MtMDlKS1pUR0pELXBxckhULUUycjlBZmVJQjFrM0xEUm50U2ZabExtVjZ1WWZ1WnlobUZIOVlndjNydUZfczJUWVVRZURTd1lYazllaER4VU10cXUyVS1ZNG9Ha2hnbTAzOEpGMklFSWpWeVV5eFB2UlVWYmJJakZnOVM2R2lJSXRSM3VzVEZZNUVpNmVjRzdXRUJsT2hzcjhZWERFeGV5c1dFQVM3dkhGY2Q3ckNBRDZCcVdhZnZkdzM3QVNpODZYWE81TEIyZGUycldkSVRvbm5hR3Jib2UzOEtXdUpHQ2FyWDQtMDdQbC1ycEdfUzdXd0U2dHFIVjhoRDJ0YkNsWUpva1dzOGNPdXRpZjVwUldtT3FVN3RrZUhTN3JfX1M3LU9PaXZELWkzRmtMbjgxZGZ6ZjVJNW9RZW1nM2hqUXo4Z2I5Z2tSVTVMdUNLblRxOGQ1Y3F4SGZIbWo4YkFBV3FIbjB6LUxGNHdsQWgxQUM4bzVrblBObFFfVWNaQ3QwejQ1eGFlSXVIcXlyVEZEdzVKNV9pd2o4RW1UVjlqb3VMWnF0V1JTcWF1R0RjdUNjM2lLUHRqZDl2WWtXUnhmbVdxeHA3REFHTkdkMjM4LTllajBWQnd3RHlFSVdiUThfQnduOVFJdmR6OUVGN1lOYjBqclhadHozX21kRzlUT2EtWVBkYWFRSjRGdW80dmlEUTVrVjhWbjJYNGtCeGNtNzRHQXJsRlZyWjBYdHltVDM2MV9IT0RFT2dLLTVBREtsS09HdUxrODRLcEQ1TmRoVDh6WmgybGc5MzgtbmJSYThQd3FFaUcxbmg3eE95RkJVX2hHM20wT1k2c21qd24wSkFWNGROaklQeHZrc21PdTVsdHVxR0pxd3Ztb1NQVHEtd25URHRNa1pqa3BLdVdkTnNFeDNManJST0dOb1RWM2hqekxFTlFSZkd6TlZBY1VQT1NFOVlDQzlPQWVlVXQ4MW0wdGkzd0Myam1lSWE2aEtVVTVNc3N3dENpa1BWRl9ZQ3daYllONWRmRUF0THpleFRmdWRqTFM2aldmLUFuZzFGdkFQNHR6d21SdzRGQ0Q4cU8yV0xGUTVUY01TZlYxSzZ4cmtfUGZvVDhmYmNBX1pibTVTcl9lenJoME9KSnBucUxPRU1PRXBmLWFENEgwRWZOU0RvRDlvQk9ueVp0dXJrUVgtQUk5VldVbV9MS19PYmlua3liWl80Z2hMcFRnTXBkZDA3enIxRWFzaU56TEZKa0hPQUtNY0dCY1pnQ2V3Zml6ZFczWFBESUlLd3BSVEs5ZXlGLUpINDRsd1NBVjBkR1dvbE8wLWZBeEhFQ0hvY3E5UGJsTDdteGdSRjBIZTRobXpsd29PMmhKQkxXY3Znd2FMdWtZU1VkQlVRZXlSZ3FaVnNqcXpwR3N3SktOTDA3aUZIcE9TR1VDcXdaTDhQX2E5VDlwckoyX0xlNmFQcnoydEkwc0s1S08yaVlsM0pwYktUVWl3LU5hQzF2UVZNSm9ZR3QyQWdrUXB2a25QNzhkVEFOYmZ0b1BmTXRCMmVQZTAtYzdOeUlBYlNINlZNZW1nUTFfSV92UlJiWGt6Qms1c1hBc3kzZkVRMzEwNVJDOS1JeVg4YWtVeUJyOTZPQ0FnSUs1Z25sMlY0S1V1c0dIWEpuX2pMQmZ4Z29SY1U0bVZscXNWcjJwRy1UZEFYSXBzQURGblRTelBybU5BeDF6N3hZLXZwSHBkMmlzbHZWN2JkU3hRcE0zQ0hna3QwYWlJX3hBdGcxUHdGRE55cndUNHRvbXU5VTRMRmZDRjhvXzIwajI1Y0RCcmR2OV94cS1XYkNwalNHS2lObHlkNGZBbklycnZMSlJYVnlfakRXb1ZfWUo2MGxzYUNIektYeENGTkUzMUJXRE9WRHRrY2o5UFJHckZza2RQbjNPUkstbG9GZG4yNmxKeEdtbHo4WDZFc0lvT01wZkxuN29ycXl3X1hTN1prRGdvWG9hRFYwNzBwVVpuMW0wQlZYbGZxZjFQUHp2XzBQT3Fqa3lzejVKZmJDMG0wRzhqWV9HY1dxaXB2VFNQUzV2LUJSOXRFRUllak83cUI3RGUtYVBJakF1YUVOV0otT1BxUHJqS0NLdFVHc0tsT2RGcWd6UTU4Yi1kc0JZS1VPT1NXSlc3TDM5ZDVEZlRDOURZU1hMT0YxZ25ndVBUaG1VcGsxWFZSS1RxT1ZZTU1vclZjVU5iYmZMd0VBTXlvdTE0YjdoclZ6ZnNKMmE2Yy1ORmNCMnJNX3dwcVJSN2RSd2d6aENLRXQyTjhkcDlLTFVZMHBydFowNTJoZm1mVHNRVHI1YjhTNnl1Vll4dFZhenZfa0dybk9KYVh6LUluSUo0djUzRFNEdzBoVGt5UU9tMlg5UnBLbk9WaEhoU2txY2tUSXJmemlmNEExb3Q1blI5bE9adHluWVI3NXZQNUtXdmpra05aNy15dTBXdlVqcXhteFVqSXFxNnlQR2FGeVNONkx3NVpQUk1FNk5yTUY4T1hQV1FCdm9PYzdFTGl4QXZkODltSlprbGJ6cWREcEM1VlNwN3V5aWdWYXNkekk4X3U0cjJjZ1k2X190cmNnMlpMQVlLdExxM3pFNkZudVFKci1CalE1U3kzdmotQ01LV0ZzWnp0VUxRblhkdlN6VG1MWHNQdGlrNmF4RnFtd0c3UXNqZFVRZTRFMGl1NFU5T2k3VEpjZXA1U052VkJtdUhDWEpTaDRGQnM0SDQwY2IxdDVNbUtELTQ0R0s0OHpfTHdFOHZ0VmRMTC1FUVpPSkJ4QXRWNnl5MURUdjVyUk53emRwbDBxUnloUmlheXhKY3RBUG1mX3JxM2w0VlZvcE40b2ROeG15NS01RFlvUHdoYllLNVhCZUNEd0dwQnFCLVdZU0RhVEFzR2gxTVpub3FGRnl4VDNiSVZrTnpMQUlxeGJGQzh5WlNZR2NKbklHRVRTaVJ2REduN0hXaGo5MHFGb1FOa0U5TUFwQ09zOXVWMnRRNVlJWmZpaTUxLWFIeWR0UEFtaVNDX1k5Q1p3Y2V4ckVXQVBRYzV1eGwwMWd0SE15WUxiYzUyLTUzTGlyTUhZUDFlRTFjcFpieWQwU0pxRWJXSE53Nkd5aHp5T28wZVd6Z1phLTQ4TmgxU3hvNHpySzExUk5WZlFFS3VpOXNHMDdZU0gzSGxYUlU4WmgwNUlPdlhQcUI0cGtITmQ4SlByczN0THUxNHc0a21vUEp6S1hLNnFRNmFfdlpmUWpJQ1VNYXVEOW1abzlsd2RoRG5pVXRVbjBKV2RFTGFEa3ZYTHByOTJjalc1b3hTWkFmS2RPdVlTUTVkRkpSTnZsMWtnYWZEUm1SR3lBemdON2xiN3pkZlNfX2NSYU5wWHNybHh4V0lnNHJjQ2NON1hiRHMycUdmNC1kay13bUE0OTBPN0xmNDA1NlQxVmRySEJvM1VUN2Y2Sl9KX2pZVHRPWEdfR2RYNUoxY01Va3pXb2VBd3lZb3BSXzU5NVJfWlhEYXFSVDJrUnFHWG42RVZJUVQ2RlJWUEkyQnRnREI3eHNiRERiQ3FUczJsRTBDZ3pUUGZPcjExZUFKc21QUWxVYVBmV2hPZXRGd3lJX3ZTczhCVG1jWFVwanhIZHlyTTdiR2c5cTBVSXBRV1U4ZExtWWdub1pTSHU0cU5aYWJVWmExbXI0MjE3WUVnPT0=
-
-# Feature SyncDelta JIRA configuration
-Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pTDhnTVNzRUhScU8wYnZsZk52bHFkSWxLc18xQmtCeC1HbnNwTzVBbXRNTmQzRjZYaGE2MVlCNGtnWDk1T2I5VXVKNHpKU1VRbXEyN2tRWUJnU2ltZE5qZ3lmNEF6Z1hMTTEwZkk2NUNBYjhmVTJEcWpRUW9HNEVpSGFWdjBWQXQ3eUtHUTFJS3U5QWpaeno0RFNhMUxnPT0=
-
-# Debug Configuration
-APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
-APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
\ No newline at end of file
diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py
index 8f2964e2..ed5e318a 100644
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@@ -15,16 +15,29 @@ from modules.datamodels.datamodelWeb import (
)
from modules.interfaces.interfaceAiObjects import AiObjects
from modules.shared.configuration import APP_CONFIG
+from modules.services.serviceAi.subCoreAi import SubCoreAi
+from modules.services.serviceAi.subDocumentProcessing import SubDocumentProcessing
+from modules.services.serviceAi.subWebResearch import SubWebResearch
+from modules.services.serviceAi.subDocumentGeneration import SubDocumentGeneration
+from modules.services.serviceAi.subUtilities import SubUtilities
logger = logging.getLogger(__name__)
-
-# Model registry is now provided by interfaces via AiModels
-
-
class AiService:
- """Centralized AI service orchestrating documents, model selection, failover, and web operations.
+ """Lightweight AI service orchestrator that delegates to specialized sub-modules.
+
+ Manager delegates to specialized sub-modules:
+ - SubCoreAi: Core AI operations (readImage, generateImage, callAi, planning, text calls)
+ - SubDocumentProcessing: Document chunking, processing, and merging logic
+ - SubWebResearch: Web research and crawling functionality
+ - SubDocumentGeneration: Single-file and multi-file document generation
+ - SubUtilities: Helper functions, text processing, and debugging utilities
+
+ The main service acts as a coordinator:
+ 1. Manages lazy initialization of sub-modules
+ 2. Delegates operations to appropriate sub-modules
+ 3. Maintains the same public API for backward compatibility
"""
def __init__(self, serviceCenter=None) -> None:
@@ -37,6 +50,11 @@ class AiService:
# Only depend on interfaces
self.aiObjects = None # Will be initialized in create()
self._extractionService = None # Lazy initialization
+ self._coreAi = None # Lazy initialization
+ self._documentProcessor = None # Lazy initialization
+ self._webResearch = None # Lazy initialization
+ self._documentGenerator = None # Lazy initialization
+ self._utilities = None # Lazy initialization
@property
def extractionService(self):
@@ -46,6 +64,46 @@ class AiService:
self._extractionService = ExtractionService(self.services)
return self._extractionService
+ @property
+ def coreAi(self):
+ """Lazy initialization of core AI service."""
+ if self._coreAi is None:
+ logger.info("Lazy initializing SubCoreAi...")
+ self._coreAi = SubCoreAi(self.services, self.aiObjects)
+ return self._coreAi
+
+ @property
+ def documentProcessor(self):
+ """Lazy initialization of document processing service."""
+ if self._documentProcessor is None:
+ logger.info("Lazy initializing SubDocumentProcessing...")
+ self._documentProcessor = SubDocumentProcessing(self.services, self.aiObjects)
+ return self._documentProcessor
+
+ @property
+ def webResearchService(self):
+ """Lazy initialization of web research service."""
+ if self._webResearch is None:
+ logger.info("Lazy initializing SubWebResearch...")
+ self._webResearch = SubWebResearch(self.services, self.aiObjects)
+ return self._webResearch
+
+ @property
+ def documentGenerator(self):
+ """Lazy initialization of document generation service."""
+ if self._documentGenerator is None:
+ logger.info("Lazy initializing SubDocumentGeneration...")
+ self._documentGenerator = SubDocumentGeneration(self.services, self.aiObjects, self.documentProcessor)
+ return self._documentGenerator
+
+ @property
+ def utilities(self):
+ """Lazy initialization of utilities service."""
+ if self._utilities is None:
+ logger.info("Lazy initializing SubUtilities...")
+ self._utilities = SubUtilities(self.services)
+ return self._utilities
+
async def _ensureAiObjectsInitialized(self):
"""Ensure aiObjects is initialized."""
if self.aiObjects is None:
@@ -72,45 +130,8 @@ class AiService:
options: Optional[AiCallOptions] = None,
) -> str:
"""Call AI for image analysis using interface.callImage()."""
- try:
- # Check if imageData is valid
- if not imageData:
- error_msg = "No image data provided"
- self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE")
- logger.error(f"Error in AI image analysis: {error_msg}")
- return f"Error: {error_msg}"
-
- self.services.utils.debugLogToFile(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}", "AI_SERVICE")
- logger.info(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}")
-
- # Always use IMAGE_ANALYSIS operation type for image processing
- if options is None:
- options = AiCallOptions(operationType=OperationType.IMAGE_ANALYSIS)
- else:
- # Override the operation type to ensure image analysis
- options.operationType = OperationType.IMAGE_ANALYSIS
-
- self.services.utils.debugLogToFile(f"Calling aiObjects.callImage with operationType: {options.operationType}", "AI_SERVICE")
- logger.info(f"Calling aiObjects.callImage with operationType: {options.operationType}")
- result = await self.aiObjects.callImage(prompt, imageData, mimeType, options)
-
- # Debug the result
- self.services.utils.debugLogToFile(f"Raw AI result type: {type(result)}, value: {repr(result)}", "AI_SERVICE")
-
- # Check if result is valid
- if not result or (isinstance(result, str) and not result.strip()):
- error_msg = f"No response from AI image analysis (result: {repr(result)})"
- self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE")
- logger.error(f"Error in AI image analysis: {error_msg}")
- return f"Error: {error_msg}"
-
- self.services.utils.debugLogToFile(f"callImage returned: {result[:200]}..." if len(result) > 200 else result, "AI_SERVICE")
- logger.info(f"callImage returned: {result[:200]}..." if len(result) > 200 else result)
- return result
- except Exception as e:
- self.services.utils.debugLogToFile(f"Error in AI image analysis: {str(e)}", "AI_SERVICE")
- logger.error(f"Error in AI image analysis: {str(e)}")
- return f"Error: {str(e)}"
+ await self._ensureAiObjectsInitialized()
+ return await self.coreAi.readImage(prompt, imageData, mimeType, options)
# AI Image Generation
async def generateImage(
@@ -122,1220 +143,16 @@ class AiService:
options: Optional[AiCallOptions] = None,
) -> Dict[str, Any]:
"""Generate an image using AI using interface.generateImage()."""
- try:
- return await self.aiObjects.generateImage(prompt, size, quality, style, options)
- except Exception as e:
- logger.error(f"Error in AI image generation: {str(e)}")
- return {"success": False, "error": str(e)}
+ await self._ensureAiObjectsInitialized()
+ return await self.coreAi.generateImage(prompt, size, quality, style, options)
- # Web Research - Using interface functions
+ # Web Research
async def webResearch(self, request: WebResearchRequest) -> WebResearchActionResult:
"""Perform web research using interface functions."""
- try:
- logger.info(f"WEB RESEARCH STARTED")
- logger.info(f"User Query: {request.user_prompt}")
- logger.info(f"Max Results: {request.max_results}, Max Pages: {request.options.max_pages}")
-
- # Global URL index to track all processed URLs across the entire research session
- global_processed_urls = set()
-
- # Step 1: Find relevant websites - either provided URLs or AI-determined main URLs
- logger.info(f"=== STEP 1: INITIAL MAIN URLS LIST ===")
-
- if request.urls:
- # Use provided URLs as initial main URLs
- websites = request.urls
- logger.info(f"Using provided URLs ({len(websites)}):")
- for i, url in enumerate(websites, 1):
- logger.info(f" {i}. {url}")
- else:
- # Use AI to determine main URLs based on user's intention
- logger.info(f"AI analyzing user intent: '{request.user_prompt}'")
-
- # Use AI to generate optimized Tavily search query and search parameters
- query_optimizer_prompt = f"""You are a search query optimizer.
-
- USER QUERY: {request.user_prompt}
-
- Your task: Create a search query and parameters for the USER QUERY given.
-
- RULES:
- 1. The search query MUST be related to the user query above
- 2. Extract key terms from the user query
- 3. Determine appropriate country/language based on the query context
- 4. Keep search query short (2-6 words)
-
- Return ONLY this JSON format:
- {{
- "user_prompt": "search query based on user query above",
- "country": "Full English country name (ISO-3166; map codes via pycountry/i18n-iso-countries)",
- "language": "language_code_or_null",
- "topic": "general|news|academic_or_null",
- "time_range": "d|w|m|y_or_null",
- "selection_strategy": "single|multiple|specific_page",
- "selection_criteria": "what URLs to prioritize",
- "expected_url_patterns": ["pattern1", "pattern2"],
- "estimated_result_count": number
- }}"""
-
- # Get AI response for query optimization
- ai_request = AiCallRequest(
- prompt=query_optimizer_prompt,
- options=AiCallOptions()
- )
- ai_response_obj = await self.aiObjects.call(ai_request)
- ai_response = ai_response_obj.content
- logger.debug(f"AI query optimizer response: {ai_response}")
-
- # Parse AI response to extract search query
- import json
- try:
- # Clean the response by removing markdown code blocks
- cleaned_response = ai_response.strip()
- if cleaned_response.startswith('```json'):
- cleaned_response = cleaned_response[7:] # Remove ```json
- if cleaned_response.endswith('```'):
- cleaned_response = cleaned_response[:-3] # Remove ```
- cleaned_response = cleaned_response.strip()
-
- query_data = json.loads(cleaned_response)
- search_query = query_data.get("user_prompt", request.user_prompt)
- ai_country = query_data.get("country")
- ai_language = query_data.get("language")
- ai_topic = query_data.get("topic")
- ai_time_range = query_data.get("time_range")
- selection_strategy = query_data.get("selection_strategy", "multiple")
- selection_criteria = query_data.get("selection_criteria", "relevant URLs")
- expected_patterns = query_data.get("expected_url_patterns", [])
- estimated_count = query_data.get("estimated_result_count", request.max_results)
-
- logger.info(f"AI optimized search query: '{search_query}'")
- logger.info(f"Selection strategy: {selection_strategy}")
- logger.info(f"Selection criteria: {selection_criteria}")
- logger.info(f"Expected URL patterns: {expected_patterns}")
- logger.info(f"Estimated result count: {estimated_count}")
-
- except json.JSONDecodeError:
- logger.warning("Failed to parse AI response as JSON, using original query")
- search_query = request.user_prompt
- ai_country = None
- ai_language = None
- ai_topic = None
- ai_time_range = None
- selection_strategy = "multiple"
-
- # Perform the web search with AI-determined parameters
- search_kwargs = {
- "query": search_query,
- "max_results": request.max_results,
- "search_depth": request.options.search_depth,
- "auto_parameters": False # Use explicit parameters
- }
-
- # Add parameters only if they have valid values
- def _normalizeCountry(c: Optional[str]) -> Optional[str]:
- if not c:
- return None
- s = str(c).strip()
- if not s or s.lower() in ['null', 'none', 'undefined']:
- return None
- # Map common codes to full English names when easy to do without extra deps
- mapping = {
- 'ch': 'Switzerland', 'che': 'Switzerland',
- 'de': 'Germany', 'ger': 'Germany', 'deu': 'Germany',
- 'at': 'Austria', 'aut': 'Austria',
- 'us': 'United States', 'usa': 'United States', 'uni ted states': 'United States',
- 'uk': 'United Kingdom', 'gb': 'United Kingdom', 'gbr': 'United Kingdom'
- }
- key = s.lower()
- if key in mapping:
- return mapping[key]
- # If looks like full name, capitalize first letter only (Tavily accepts English names)
- return s
-
- norm_ai_country = _normalizeCountry(ai_country)
- norm_req_country = _normalizeCountry(request.options.country)
- if norm_ai_country:
- search_kwargs["country"] = norm_ai_country
- elif norm_req_country:
- search_kwargs["country"] = norm_req_country
-
- if ai_language and ai_language not in ['null', '', 'none', 'undefined']:
- search_kwargs["language"] = ai_language
- elif request.options.language and request.options.language not in ['null', '', 'none', 'undefined']:
- search_kwargs["language"] = request.options.language
-
- if ai_topic and ai_topic in ['general', 'news', 'academic']:
- search_kwargs["topic"] = ai_topic
- elif request.options.topic and request.options.topic in ['general', 'news', 'academic']:
- search_kwargs["topic"] = request.options.topic
-
- if ai_time_range and ai_time_range in ['d', 'w', 'm', 'y']:
- search_kwargs["time_range"] = ai_time_range
- elif request.options.time_range and request.options.time_range in ['d', 'w', 'm', 'y']:
- search_kwargs["time_range"] = request.options.time_range
-
- # Constrain by expected domains if provided by AI
- try:
- include_domains = []
- for p in expected_patterns or []:
- if not isinstance(p, str):
- continue
- # Extract bare domain from pattern or URL
- import re
- m = re.search(r"(?:https?://)?([^/\s]+)", p.strip())
- if m:
- domain = m.group(1).lower()
- # strip leading www.
- if domain.startswith('www.'):
- domain = domain[4:]
- include_domains.append(domain)
- # Deduplicate
- if include_domains:
- seen = set()
- uniq = []
- for d in include_domains:
- if d not in seen:
- seen.add(d)
- uniq.append(d)
- search_kwargs["include_domains"] = uniq
- except Exception:
- pass
-
- # Log the parameters being used
- logger.info(f"Search parameters: country={search_kwargs.get('country', 'not_set')}, language={search_kwargs.get('language', 'not_set')}, topic={search_kwargs.get('topic', 'not_set')}, time_range={search_kwargs.get('time_range', 'not_set')}, include_domains={search_kwargs.get('include_domains', [])}")
-
- search_results = await self.aiObjects.search_websites(**search_kwargs)
-
- logger.debug(f"Web search returned {len(search_results)} results:")
- for i, result in enumerate(search_results, 1):
- logger.debug(f" {i}. {result.url} - {result.title}")
-
- # Deduplicate while preserving order
- seen = set()
- search_urls = []
- for r in search_results:
- u = str(r.url)
- if u not in seen:
- seen.add(u)
- search_urls.append(u)
-
- logger.info(f"After initial deduplication: {len(search_urls)} unique URLs from {len(search_results)} search results")
-
- if not search_urls:
- logger.error("No relevant websites found")
- return WebResearchActionResult(success=False, error="No relevant websites found")
-
- # Now use AI to determine the main URLs based on user's intention
- logger.info(f"AI selecting main URLs from {len(search_urls)} search results based on user intent")
-
- # Create a prompt for AI to identify main URLs based on user's intention
- ai_prompt = f"""
- Select the most relevant URLs from these search results:
-
- {chr(10).join([f"{i+1}. {url}" for i, url in enumerate(search_urls)])}
-
- Return only the URLs that are most relevant for the user's query.
- One URL per line.
- """
- # Create AI call request
- ai_request = AiCallRequest(
- prompt=ai_prompt,
- options=AiCallOptions()
- )
- ai_response_obj = await self.aiObjects.call(ai_request)
- ai_response = ai_response_obj.content
- logger.debug(f"AI response for main URL selection: {ai_response}")
-
- # Parse AI response to extract URLs
- websites = []
- for line in ai_response.strip().split('\n'):
- line = line.strip()
- if line and ('http://' in line or 'https://' in line):
- # Extract URL from the line
- for word in line.split():
- if word.startswith('http://') or word.startswith('https://'):
- websites.append(word.rstrip('.,;'))
- break
-
- if not websites:
- logger.warning("AI did not identify any main URLs, using first few search results")
- websites = search_urls[:3] # Fallback to first 3 search results
-
- # Deduplicate while preserving order
- seen = set()
- unique_websites = []
- for url in websites:
- if url not in seen:
- seen.add(url)
- unique_websites.append(url)
-
- websites = unique_websites
- logger.info(f"After AI selection deduplication: {len(websites)} unique URLs from {len(websites)} AI-selected URLs")
-
- logger.info(f"AI selected {len(websites)} main URLs (after deduplication):")
- for i, url in enumerate(websites, 1):
- logger.info(f" {i}. {url}")
-
- # Step 2: Smart website selection using AI interface
- logger.info(f"=== STEP 2: FILTERED URL LIST BY USER PROMPT'S INTENTION ===")
- logger.info(f"AI analyzing {len(websites)} URLs for relevance to: '{request.user_prompt}'")
-
- selectedWebsites, aiResponse = await self.aiObjects.selectRelevantWebsites(websites, request.user_prompt)
-
- logger.debug(f"AI Response: {aiResponse}")
- logger.debug(f"AI selected {len(selectedWebsites)} most relevant URLs:")
- for i, url in enumerate(selectedWebsites, 1):
- logger.debug(f" {i}. {url}")
-
- # Show which were filtered out
- filtered_out = [url for url in websites if url not in selectedWebsites]
- if filtered_out:
- logger.debug(f"Filtered out {len(filtered_out)} less relevant URLs:")
- for i, url in enumerate(filtered_out, 1):
- logger.debug(f" {i}. {url}")
-
- # Step 3+4+5: Recursive crawling with configurable depth
- # Get configuration parameters
- max_depth = int(APP_CONFIG.get("Web_Research_MAX_DEPTH", "2"))
- max_links_per_domain = int(APP_CONFIG.get("Web_Research_MAX_LINKS_PER_DOMAIN", "4"))
- crawl_timeout_minutes = int(APP_CONFIG.get("Web_Research_CRAWL_TIMEOUT_MINUTES", "10"))
- crawl_timeout_seconds = crawl_timeout_minutes * 60
-
- # Use the configured max_depth or the request's pages_search_depth, whichever is smaller
- effective_depth = min(max_depth, request.options.pages_search_depth)
-
- logger.info(f"=== STEP 3+4+5: RECURSIVE CRAWLING (DEPTH {effective_depth}) ===")
- logger.info(f"Starting recursive crawl of {len(selectedWebsites)} main websites...")
- logger.info(f"Search depth: {effective_depth} levels (max configured: {max_depth})")
- logger.info(f"Max links per domain: {max_links_per_domain}")
- logger.info(f"Crawl timeout: {crawl_timeout_minutes} minutes")
-
- # Use recursive crawling with URL index to avoid duplicates
- import asyncio
- try:
- allContent = await asyncio.wait_for(
- self.aiObjects.crawlRecursively(
- urls=selectedWebsites,
- max_depth=effective_depth,
- extract_depth=request.options.extract_depth,
- max_per_domain=max_links_per_domain,
- global_processed_urls=global_processed_urls
- ),
- timeout=crawl_timeout_seconds
- )
- logger.info(f"Crawling completed within timeout: {len(allContent)} pages crawled")
- except asyncio.TimeoutError:
- logger.warning(f"Crawling timed out after {crawl_timeout_minutes} minutes, using partial results")
- # crawlRecursively now handles timeouts gracefully and returns partial results
- # Try to get the partial results that were collected
- allContent = {}
-
- if not allContent:
- logger.error("Could not extract content from any websites")
- return WebResearchActionResult(success=False, error="Could not extract content from any websites")
-
- logger.info(f"=== WEB RESEARCH COMPLETED ===")
- logger.info(f"Successfully crawled {len(allContent)} URLs total")
- logger.info(f"Crawl depth: {effective_depth} levels")
-
- # Create simple result with raw content
- sources = [WebSearchResultItem(title=url, url=url) for url in selectedWebsites]
-
- # Get all additional links (all URLs except main ones)
- additional_links = [url for url in allContent.keys() if url not in selectedWebsites]
-
- # Combine all content into a single result
- combinedContent = ""
- for url, content in allContent.items():
- combinedContent += f"\n\n=== {url} ===\n{content}\n"
-
- documentData = WebResearchDocumentData(
- user_prompt=request.user_prompt,
- websites_analyzed=len(allContent),
- additional_links_found=len(additional_links),
- analysis_result=combinedContent, # Raw content, no analysis
- sources=sources,
- additional_links=additional_links,
- individual_content=allContent, # Individual URL -> content mapping
- debug_info={
- "crawl_depth": effective_depth,
- "max_configured_depth": max_depth,
- "max_links_per_domain": max_links_per_domain,
- "crawl_timeout_minutes": crawl_timeout_minutes,
- "total_urls_crawled": len(allContent),
- "main_urls": len(selectedWebsites),
- "additional_urls": len(additional_links)
- }
- )
-
- document = WebResearchActionDocument(
- documentName=f"web_research_{request.user_prompt[:50]}.json",
- documentData=documentData,
- mimeType="application/json"
- )
-
- return WebResearchActionResult(
- success=True,
- documents=[document],
- resultLabel="web_research_results"
- )
-
- except Exception as e:
- logger.error(f"Error in web research: {str(e)}")
- return WebResearchActionResult(success=False, error=str(e))
-
- def _calculateMaxContextBytes(self, options: Optional[AiCallOptions]) -> int:
- """Calculate maximum context bytes based on model capabilities and options."""
- if options and options.maxContextBytes:
- return options.maxContextBytes
-
- # Default model capabilities (this should be enhanced with actual model registry)
- defaultMaxTokens = 4000
- safetyMargin = options.safetyMargin if options else 0.1
-
- # Calculate bytes (4 chars per token estimation)
- maxContextBytes = int(defaultMaxTokens * (1 - safetyMargin) * 4)
-
- return maxContextBytes
-
- async def _processDocumentsPerChunk(
- self,
- documents: List[ChatDocument],
- prompt: str,
- options: Optional[AiCallOptions] = None
- ) -> str:
- """
- Process documents with per-chunk AI calls and merge results.
- FIXED: Now preserves chunk relationships and document structure.
-
- Args:
- documents: List of ChatDocument objects to process
- prompt: AI prompt for processing
- options: AI call options
-
- Returns:
- Merged AI results as string with preserved document structure
- """
- if not documents:
- return ""
-
- # Get model capabilities for size calculation
- model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options)
-
- # Build extraction options for chunking with intelligent merging
- extractionOptions: Dict[str, Any] = {
- "prompt": prompt,
- "operationType": options.operationType if options else "general",
- "processDocumentsIndividually": True, # Process each document separately
- "maxSize": model_capabilities["maxContextBytes"],
- "chunkAllowed": True,
- "textChunkSize": model_capabilities["textChunkSize"],
- "imageChunkSize": model_capabilities["imageChunkSize"],
- "imageMaxPixels": 1024 * 1024,
- "imageQuality": 85,
- "mergeStrategy": {
- "useIntelligentMerging": True, # Enable intelligent token-aware merging
- "modelCapabilities": model_capabilities,
- "prompt": prompt,
- "groupBy": "typeGroup",
- "orderBy": "id",
- "mergeType": "concatenate"
- },
- }
-
- logger.debug(f"Per-chunk extraction options: prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
-
- try:
- # Extract content with chunking
- extractionResult = self.extractionService.extractContent(documents, extractionOptions)
-
- if not isinstance(extractionResult, list):
- return "[Error: No extraction results]"
-
- # FIXED: Process chunks with proper mapping
- chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options)
-
- # FIXED: Merge with preserved chunk relationships
- mergedContent = self._mergeChunkResults(chunkResults, options)
-
- return mergedContent
-
- except Exception as e:
- logger.error(f"Error in per-chunk processing: {str(e)}")
- return f"[Error in per-chunk processing: {str(e)}]"
-
- async def _processDocumentsPerChunkJson(
- self,
- documents: List[ChatDocument],
- prompt: str,
- options: Optional[AiCallOptions] = None
- ) -> Dict[str, Any]:
- """
- Process documents with per-chunk AI calls and merge results in JSON mode.
- Returns structured JSON document instead of text.
- """
- if not documents:
- return {"metadata": {"title": "Empty Document"}, "sections": []}
-
- # Get model capabilities for size calculation
- model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options)
-
- # Build extraction options for chunking with intelligent merging
- extractionOptions: Dict[str, Any] = {
- "prompt": prompt,
- "operationType": options.operationType if options else "general",
- "processDocumentsIndividually": True, # Process each document separately
- "maxSize": model_capabilities["maxContextBytes"],
- "chunkAllowed": True,
- "textChunkSize": model_capabilities["textChunkSize"],
- "imageChunkSize": model_capabilities["imageChunkSize"],
- "imageMaxPixels": 1024 * 1024,
- "imageQuality": 85,
- "mergeStrategy": {
- "useIntelligentMerging": True, # Enable intelligent token-aware merging
- "modelCapabilities": model_capabilities,
- "prompt": prompt,
- "groupBy": "typeGroup",
- "orderBy": "id",
- "mergeType": "concatenate"
- },
- }
-
- logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
-
- try:
- # Extract content with chunking
- extractionResult = self.extractionService.extractContent(documents, extractionOptions)
-
- if not isinstance(extractionResult, list):
- return {"metadata": {"title": "Error Document"}, "sections": []}
-
- # Process chunks with proper mapping
- chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options, generate_json=True)
-
- # Merge with JSON mode
- mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options)
-
- return mergedJsonDocument
-
- except Exception as e:
- logger.error(f"Error in per-chunk processing (JSON mode): {str(e)}")
- return {"metadata": {"title": "Error Document"}, "sections": []}
-
- async def _processChunksWithMapping(
- self,
- extractionResult: List[ContentExtracted],
- prompt: str,
- options: Optional[AiCallOptions] = None,
- generate_json: bool = False
- ) -> List[ChunkResult]:
- """Process chunks with proper mapping to preserve relationships."""
- from modules.datamodels.datamodelExtraction import ChunkResult
- import asyncio
- import time
-
- # Collect all chunks that need processing with proper indexing
- chunks_to_process = []
- chunk_index = 0
-
- for ec in extractionResult:
- # Get document MIME type from metadata
- document_mime_type = None
- for part in ec.parts:
- if part.metadata and 'documentMimeType' in part.metadata:
- document_mime_type = part.metadata['documentMimeType']
- break
-
- for part in ec.parts:
- if part.typeGroup in ("text", "table", "structure", "image", "container", "binary"):
- # Skip empty container chunks (they're just metadata containers)
- if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0):
- logger.debug(f"Skipping empty container chunk: mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}")
- continue
-
- chunks_to_process.append({
- 'part': part,
- 'chunk_index': chunk_index,
- 'document_id': ec.id,
- 'document_mime_type': document_mime_type
- })
- chunk_index += 1
-
- logger.info(f"Processing {len(chunks_to_process)} chunks with proper mapping")
-
- # Process chunks in parallel with proper mapping
- async def process_single_chunk(chunk_info: Dict) -> ChunkResult:
- part = chunk_info['part']
- chunk_index = chunk_info['chunk_index']
- document_id = chunk_info['document_id']
- document_mime_type = chunk_info.get('document_mime_type', part.mimeType)
-
- start_time = time.time()
-
- try:
- # FIXED: Check MIME type first, then fallback to typeGroup
- is_image = (
- (document_mime_type and document_mime_type.startswith('image/')) or
- (part.mimeType and part.mimeType.startswith('image/')) or
- (part.typeGroup == "image")
- )
-
- # Debug logging
- self.services.utils.debugLogToFile(f"Chunk {chunk_index}: document_mime_type={document_mime_type}, part.mimeType={part.mimeType}, part.typeGroup={part.typeGroup}, is_image={is_image}", "AI_SERVICE")
- logger.info(f"Chunk {chunk_index}: document_mime_type={document_mime_type}, part.mimeType={part.mimeType}, part.typeGroup={part.typeGroup}, is_image={is_image}")
-
- if is_image:
- # Use the same extraction prompt for image analysis (contains table JSON format)
- self.services.utils.debugLogToFile(f"Processing image chunk {chunk_index}: mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE")
-
- # Check if image data is available
- if not part.data:
- error_msg = f"No image data available for chunk {chunk_index}"
- logger.warning(error_msg)
- ai_result = f"Error: {error_msg}"
- else:
- try:
- ai_result = await self.readImage(
- prompt=prompt,
- imageData=part.data,
- mimeType=part.mimeType,
- options=options
- )
-
- self.services.utils.debugLogToFile(f"Image analysis result for chunk {chunk_index}: length={len(ai_result) if ai_result else 0}, preview={ai_result[:200] if ai_result else 'None'}...", "AI_SERVICE")
-
- # Check if result is empty or None
- if not ai_result or not ai_result.strip():
- logger.warning(f"Image chunk {chunk_index} returned empty response from AI")
- ai_result = "No content detected in image"
-
- except Exception as e:
- logger.error(f"Error processing image chunk {chunk_index}: {str(e)}")
- ai_result = f"Error analyzing image: {str(e)}"
-
- # If generating JSON, clean image analysis result
- if generate_json:
- try:
- import json
- import re
-
- # Clean the response - remove markdown code blocks if present
- cleaned_result = ai_result.strip()
-
- # Remove various markdown patterns
- if cleaned_result.startswith('```json'):
- cleaned_result = re.sub(r'^```json\s*', '', cleaned_result)
- cleaned_result = re.sub(r'\s*```$', '', cleaned_result)
- elif cleaned_result.startswith('```'):
- cleaned_result = re.sub(r'^```\s*', '', cleaned_result)
- cleaned_result = re.sub(r'\s*```$', '', cleaned_result)
-
- # Remove any leading/trailing text that's not JSON
- # Look for the first { and last } to extract JSON
- first_brace = cleaned_result.find('{')
- last_brace = cleaned_result.rfind('}')
-
- if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
- cleaned_result = cleaned_result[first_brace:last_brace + 1]
-
- # Additional cleaning for common AI response issues
- cleaned_result = cleaned_result.strip()
-
- # Validate JSON
- json.loads(cleaned_result)
- ai_result = cleaned_result # Use cleaned version
- self.services.utils.debugLogToFile(f"Image chunk {chunk_index} JSON validation successful", "AI_SERVICE")
-
- except json.JSONDecodeError as e:
- logger.warning(f"Image chunk {chunk_index} returned invalid JSON: {str(e)}")
- logger.warning(f"Raw response was: '{ai_result[:500]}...'")
-
- # Create fallback JSON with the actual response content (not the error message)
- # Use the original AI response content, not the error message
- fallback_content = ai_result if ai_result and ai_result.strip() else "No content detected"
-
- self.services.utils.debugLogToFile(f"IMAGE FALLBACK CONTENT PREVIEW: '{fallback_content[:200]}...'", "AI_SERVICE")
-
- ai_result = json.dumps({
- "metadata": {"title": f"Image Analysis - Chunk {chunk_index}"},
- "sections": [{
- "id": f"image_section_{chunk_index}",
- "type": "paragraph",
- "data": {"text": fallback_content}
- }]
- })
- self.services.utils.debugLogToFile(f"Created fallback JSON for image chunk {chunk_index} with actual content", "AI_SERVICE")
- elif part.typeGroup in ("container", "binary"):
- # Handle ALL container and binary content generically - let AI process any document type
- self.services.utils.debugLogToFile(f"DEBUG: Chunk {chunk_index}: typeGroup={part.typeGroup}, mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE")
-
- # Skip empty container chunks (they're just metadata containers)
- if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0):
- self.services.utils.debugLogToFile(f"DEBUG: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE")
- logger.info(f"Chunk {chunk_index}: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}")
- # Skip processing this chunk
- pass
- elif part.mimeType and part.data and len(part.data.strip()) > 0:
- # Process any document container as text content
- request_options = options if options is not None else AiCallOptions()
- request_options.operationType = OperationType.GENERAL
- self.services.utils.debugLogToFile(f"EXTRACTION CONTAINER CHUNK {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}", "AI_SERVICE")
- logger.info(f"Chunk {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}")
-
- # Log extraction prompt and context
- self.services.utils.debugLogToFile(f"EXTRACTION PROMPT: {prompt}", "AI_SERVICE")
- self.services.utils.debugLogToFile(f"EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters", "AI_SERVICE")
-
- request = AiCallRequest(
- prompt=prompt,
- context=part.data,
- options=request_options
- )
- response = await self.aiObjects.call(request)
- ai_result = response.content
-
- # Log extraction response
- self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE")
-
- # Save full extraction prompt and response to debug file - only if debug enabled
- debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
- if debug_enabled:
- try:
- import os
- from datetime import datetime, UTC
- ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
- debug_root = "./test-chat/ai"
- os.makedirs(debug_root, exist_ok=True)
- with open(os.path.join(debug_root, f"{ts}_extraction_container_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f:
- f.write(f"EXTRACTION PROMPT:\n{prompt}\n\n")
- f.write(f"EXTRACTION CONTEXT:\n{part.data if part.data else 'No context'}\n\n")
- f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n")
- except Exception:
- pass
-
- # If generating JSON, validate the response
- if generate_json:
- try:
- import json
- import re
-
- # Clean the response - remove markdown code blocks if present
- cleaned_result = ai_result.strip()
-
- # Remove various markdown patterns
- if cleaned_result.startswith('```json'):
- cleaned_result = re.sub(r'^```json\s*', '', cleaned_result)
- cleaned_result = re.sub(r'\s*```$', '', cleaned_result)
- elif cleaned_result.startswith('```'):
- cleaned_result = re.sub(r'^```\s*', '', cleaned_result)
- cleaned_result = re.sub(r'\s*```$', '', cleaned_result)
-
- # Remove any leading/trailing text that's not JSON
- # Look for the first { and last } to extract JSON
- first_brace = cleaned_result.find('{')
- last_brace = cleaned_result.rfind('}')
-
- if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
- cleaned_result = cleaned_result[first_brace:last_brace + 1]
-
- # Additional cleaning for common AI response issues
- cleaned_result = cleaned_result.strip()
-
- # Validate JSON
- json.loads(cleaned_result)
- ai_result = cleaned_result # Use cleaned version
-
- except json.JSONDecodeError as e:
- logger.warning(f"Container chunk {chunk_index} ({part.mimeType}) returned invalid JSON: {str(e)}")
- logger.warning(f"Raw response was: '{ai_result[:500]}...'")
-
- # Create fallback JSON with the actual response content (not the error message)
- # Use the original AI response content, not the error message
- fallback_content = ai_result if ai_result and ai_result.strip() else "No content detected"
-
- self.services.utils.debugLogToFile(f"FALLBACK CONTENT PREVIEW: '{fallback_content[:200]}...'", "AI_SERVICE")
-
- ai_result = json.dumps({
- "metadata": {"title": f"Document Analysis - Chunk {chunk_index}"},
- "sections": [{
- "id": f"analysis_section_{chunk_index}",
- "type": "paragraph",
- "data": {"text": fallback_content}
- }]
- })
- self.services.utils.debugLogToFile(f"Created fallback JSON for container chunk {chunk_index} with actual content", "AI_SERVICE")
- else:
- # Skip empty or invalid container/binary content - don't create a result
- self.services.utils.debugLogToFile(f"DEBUG: Chunk {chunk_index}: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE")
- # Return None to indicate this chunk should be completely skipped
- return None
- else:
- # Ensure options is not None and set correct operation type for text
- request_options = options if options is not None else AiCallOptions()
- # FIXED: Set operation type to general for text processing
- request_options.operationType = OperationType.GENERAL
- self.services.utils.debugLogToFile(f"EXTRACTION CHUNK {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}", "AI_SERVICE")
- logger.info(f"Chunk {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}")
-
- # Log extraction context length
- self.services.utils.debugLogToFile(f"EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters", "AI_SERVICE")
-
- # Debug: Log the actual prompt being sent to AI
- logger.debug(f"AI PROMPT PREVIEW: {prompt[:300]}...")
- logger.debug(f"AI CONTEXT PREVIEW: {part.data[:200] if part.data else 'None'}...")
-
- request = AiCallRequest(
- prompt=prompt,
- context=part.data,
- options=request_options
- )
- response = await self.aiObjects.call(request)
-
- # Debug: Log what AI actually returned
- logger.debug(f"AI RESPONSE PREVIEW: {response.content[:300] if response.content else 'None'}...")
- ai_result = response.content
-
- # Log extraction response length
- self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE")
-
- # Save extraction response to debug file (without verbose prompt) - only if debug enabled
- debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
- if debug_enabled:
- try:
- import os
- from datetime import datetime, UTC
- ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
- debug_root = "./test-chat/ai"
- os.makedirs(debug_root, exist_ok=True)
- with open(os.path.join(debug_root, f"{ts}_extraction_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f:
- f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n")
- except Exception:
- pass
-
- # If generating JSON, validate the response
- if generate_json:
- try:
- import json
- import re
-
- # Clean the response - remove markdown code blocks and extra formatting
- cleaned_result = ai_result.strip()
-
- # Remove any markdown code block markers (```json, ```, etc.)
- cleaned_result = re.sub(r'^```(?:json)?\s*', '', cleaned_result, flags=re.MULTILINE)
- cleaned_result = re.sub(r'\s*```\s*$', '', cleaned_result, flags=re.MULTILINE)
-
- # Remove any remaining ``` markers anywhere in the text
- cleaned_result = re.sub(r'```', '', cleaned_result)
-
- # Try to extract JSON from the response if it's embedded in other text
- json_match = re.search(r'\{.*\}', cleaned_result, re.DOTALL)
- if json_match:
- cleaned_result = json_match.group(0)
-
- # Validate JSON
- json.loads(cleaned_result)
- ai_result = cleaned_result # Use cleaned version
-
- except json.JSONDecodeError as e:
- logger.warning(f"Chunk {chunk_index} returned invalid JSON: {str(e)}")
- # Create fallback JSON
- ai_result = json.dumps({
- "metadata": {"title": "Error Section"},
- "sections": [{
- "id": f"error_section_{chunk_index}",
- "type": "paragraph",
- "data": {"text": f"Error parsing JSON: {str(e)}"}
- }]
- })
-
- processing_time = time.time() - start_time
-
- logger.info(f"Chunk {chunk_index} processed: {len(ai_result)} chars in {processing_time:.2f}s")
-
- return ChunkResult(
- originalChunk=part,
- aiResult=ai_result,
- chunkIndex=chunk_index,
- documentId=document_id,
- processingTime=processing_time,
- metadata={
- "success": True,
- "chunkSize": len(part.data) if part.data else 0,
- "resultSize": len(ai_result),
- "typeGroup": part.typeGroup
- }
- )
-
- except Exception as e:
- processing_time = time.time() - start_time
- logger.warning(f"Error processing chunk {chunk_index}: {str(e)}")
-
- return ChunkResult(
- originalChunk=part,
- aiResult=f"[Error processing chunk: {str(e)}]",
- chunkIndex=chunk_index,
- documentId=document_id,
- processingTime=processing_time,
- metadata={
- "success": False,
- "error": str(e),
- "chunkSize": len(part.data) if part.data else 0,
- "typeGroup": part.typeGroup
- }
- )
-
- # Process chunks with concurrency control
- max_concurrent = 5 # Default concurrency
- if options and hasattr(options, 'maxConcurrentChunks'):
- max_concurrent = options.maxConcurrentChunks
- elif options and hasattr(options, 'maxParallelChunks'):
- max_concurrent = options.maxParallelChunks
-
- logger.info(f"Processing {len(chunks_to_process)} chunks with max concurrency: {max_concurrent}")
- self.services.utils.debugLogToFile(f"DEBUG: Chunks to process: {len(chunks_to_process)}", "AI_SERVICE")
- for i, chunk_info in enumerate(chunks_to_process):
- self.services.utils.debugLogToFile(f"DEBUG: Chunk {i}: typeGroup={chunk_info['part'].typeGroup}, mimeType={chunk_info['part'].mimeType}, data_length={len(chunk_info['part'].data) if chunk_info['part'].data else 0}", "AI_SERVICE")
-
- # Create semaphore for concurrency control
- semaphore = asyncio.Semaphore(max_concurrent)
-
- async def process_with_semaphore(chunk_info):
- async with semaphore:
- return await process_single_chunk(chunk_info)
-
- # Process all chunks in parallel with concurrency control
- tasks = [process_with_semaphore(chunk_info) for chunk_info in chunks_to_process]
- self.services.utils.debugLogToFile(f"DEBUG: Created {len(tasks)} tasks for parallel processing", "AI_SERVICE")
- chunk_results = await asyncio.gather(*tasks, return_exceptions=True)
- self.services.utils.debugLogToFile(f"DEBUG: Got {len(chunk_results)} results from parallel processing", "AI_SERVICE")
-
- # Handle any exceptions in the gather itself
- processed_results = []
- for i, result in enumerate(chunk_results):
- if isinstance(result, Exception):
- # Create error ChunkResult
- chunk_info = chunks_to_process[i]
- processed_results.append(ChunkResult(
- originalChunk=chunk_info['part'],
- aiResult=f"[Error in parallel processing: {str(result)}]",
- chunkIndex=chunk_info['chunk_index'],
- documentId=chunk_info['document_id'],
- processingTime=0.0,
- metadata={"success": False, "error": str(result)}
- ))
- elif result is not None:
- # Only add non-None results (skip empty containers)
- processed_results.append(result)
-
- logger.info(f"Completed processing {len(processed_results)} chunks")
- return processed_results
-
- def _mergeChunkResults(
- self,
- chunkResults: List[ChunkResult],
- options: Optional[AiCallOptions] = None
- ) -> str:
- """Merge chunk results while preserving document structure and chunk order."""
-
- if not chunkResults:
- return ""
-
- # Get merging configuration from options
- chunk_separator = "\n\n---\n\n"
- include_document_headers = True
- include_chunk_metadata = False
-
- if options:
- if hasattr(options, 'chunkSeparator'):
- chunk_separator = options.chunkSeparator
- elif hasattr(options, 'mergeStrategy') and options.mergeStrategy:
- chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n---\n\n")
-
- # Check for enhanced options
- if hasattr(options, 'preserveChunkMetadata'):
- include_chunk_metadata = options.preserveChunkMetadata
-
- # Group chunk results by document
- results_by_document = {}
- for chunk_result in chunkResults:
- doc_id = chunk_result.documentId
- if doc_id not in results_by_document:
- results_by_document[doc_id] = []
- results_by_document[doc_id].append(chunk_result)
-
- # Sort chunks within each document by chunk index
- for doc_id in results_by_document:
- results_by_document[doc_id].sort(key=lambda x: x.chunkIndex)
-
- # Merge results for each document
- merged_documents = []
-
- for doc_id, doc_chunks in results_by_document.items():
- # Build document header if enabled
- doc_header = ""
- if include_document_headers:
- doc_header = f"\n\n=== DOCUMENT: {doc_id} ===\n\n"
-
- # Merge chunks for this document
- doc_content = ""
- for i, chunk_result in enumerate(doc_chunks):
- # Add chunk separator (except for first chunk)
- if i > 0:
- doc_content += chunk_separator
-
- # Add chunk content with optional metadata
- chunk_metadata = chunk_result.metadata
- if chunk_metadata.get("success", False):
- chunk_content = chunk_result.aiResult
-
- # Add chunk metadata if enabled
- if include_chunk_metadata:
- chunk_info = f"[Chunk {chunk_result.chunkIndex} - {chunk_metadata.get('typeGroup', 'unknown')} - {chunk_metadata.get('chunkSize', 0)} chars]"
- chunk_content = f"{chunk_info}\n{chunk_content}"
-
- doc_content += chunk_content
- else:
- # Handle error chunks
- error_msg = f"[ERROR in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}]"
- doc_content += error_msg
-
- merged_documents.append(doc_header + doc_content)
-
- # Join all documents
- final_result = "\n\n".join(merged_documents)
-
- logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents")
- return final_result.strip()
-
- def _mergeChunkResultsClean(
- self,
- chunkResults: List[ChunkResult],
- options: Optional[AiCallOptions] = None
- ) -> str:
- """Merge chunk results in CLEAN mode - no debug metadata or document headers."""
-
- if not chunkResults:
- return ""
-
- # Get merging configuration from options
- chunk_separator = "\n\n"
- include_document_headers = False # CLEAN MODE: No document headers
- include_chunk_metadata = False # CLEAN MODE: No chunk metadata
-
- if options:
- if hasattr(options, 'chunkSeparator'):
- chunk_separator = options.chunkSeparator
- elif hasattr(options, 'mergeStrategy') and options.mergeStrategy:
- chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n")
-
- # Group chunk results by document
- results_by_document = {}
- for chunk_result in chunkResults:
- doc_id = chunk_result.documentId
- if doc_id not in results_by_document:
- results_by_document[doc_id] = []
- results_by_document[doc_id].append(chunk_result)
-
- # Sort chunks within each document by chunk index
- for doc_id in results_by_document:
- results_by_document[doc_id].sort(key=lambda x: x.chunkIndex)
-
- # Merge results for each document in CLEAN mode
- merged_documents = []
-
- for doc_id, doc_chunks in results_by_document.items():
- # CLEAN MODE: No document headers
- doc_header = ""
-
- # Merge chunks for this document
- doc_content = ""
- for i, chunk_result in enumerate(doc_chunks):
- # Add chunk separator (except for first chunk)
- if i > 0:
- doc_content += chunk_separator
-
- # Add chunk content without metadata
- chunk_metadata = chunk_result.metadata
- if chunk_metadata.get("success", False):
- chunk_content = chunk_result.aiResult
-
- # CLEAN MODE: Skip container/binary chunks entirely
- if chunk_content.startswith("[Skipped ") and "content:" in chunk_content:
- continue # Skip container/binary chunks in clean mode
-
- # CLEAN MODE: Skip empty or whitespace-only chunks
- if not chunk_content.strip():
- continue # Skip empty chunks in clean mode
-
- # CLEAN MODE: No chunk metadata
- doc_content += chunk_content
- else:
- # Handle error chunks silently in clean mode
- continue
-
- merged_documents.append(doc_header + doc_content)
-
- # Join all documents
- final_result = "\n\n".join(merged_documents)
-
- def _mergeChunkResultsJson(
- self,
- chunkResults: List[ChunkResult],
- options: Optional[AiCallOptions] = None
- ) -> Dict[str, Any]:
- """Merge chunk results in JSON mode - returns structured JSON document."""
- import json
-
- if not chunkResults:
- return {"metadata": {"title": "Empty Document"}, "sections": []}
-
- # Group chunk results by document
- results_by_document = {}
- for chunk_result in chunkResults:
- doc_id = chunk_result.documentId
- if doc_id not in results_by_document:
- results_by_document[doc_id] = []
- results_by_document[doc_id].append(chunk_result)
-
- # Sort chunks within each document by chunk index
- for doc_id in results_by_document:
- results_by_document[doc_id].sort(key=lambda x: x.chunkIndex)
-
- # Merge JSON results for each document
- all_documents = []
- all_sections = []
- document_titles = []
- combined_metadata = {"title": "Merged Document", "splitStrategy": "by_section"}
-
- for doc_id, doc_chunks in results_by_document.items():
- # Process each chunk's JSON result
- for chunk_result in doc_chunks:
- chunk_metadata = chunk_result.metadata
- if chunk_metadata.get("success", False):
- try:
- # Parse JSON from AI result
- chunk_json = json.loads(chunk_result.aiResult)
-
- # Check if this is a multi-file response (has "documents" key)
- if isinstance(chunk_json, dict) and "documents" in chunk_json:
- # This is a multi-file response - merge all documents
- logger.debug(f"Processing multi-file response from chunk {chunk_result.chunkIndex} with {len(chunk_json['documents'])} documents")
-
- # Add all documents from this chunk
- for doc in chunk_json["documents"]:
- # Add chunk context to document
- doc["metadata"] = doc.get("metadata", {})
- doc["metadata"]["source_chunk"] = chunk_result.chunkIndex
- doc["metadata"]["source_document"] = doc_id
- all_documents.append(doc)
-
- # Update combined metadata
- if "metadata" in chunk_json:
- combined_metadata.update(chunk_json["metadata"])
-
- # Extract sections from single-file response (fallback)
- elif isinstance(chunk_json, dict) and "sections" in chunk_json:
- for section in chunk_json["sections"]:
- # Add document context to section
- section["metadata"] = section.get("metadata", {})
- section["metadata"]["source_document"] = doc_id
- section["metadata"]["chunk_index"] = chunk_result.chunkIndex
- all_sections.append(section)
-
- # Extract document title
- if isinstance(chunk_json, dict) and "metadata" in chunk_json:
- title = chunk_json["metadata"].get("title", "")
- if title and title not in document_titles:
- document_titles.append(title)
-
- except json.JSONDecodeError as e:
- logger.warning(f"Failed to parse JSON from chunk {chunk_result.chunkIndex}: {str(e)}")
- # Create a fallback section for invalid JSON
- fallback_section = {
- "id": f"error_section_{chunk_result.chunkIndex}",
- "title": "Error Section",
- "content_type": "paragraph",
- "elements": [{
- "text": f"Error parsing chunk {chunk_result.chunkIndex}: {str(e)}"
- }],
- "order": chunk_result.chunkIndex,
- "metadata": {
- "source_document": doc_id,
- "chunk_index": chunk_result.chunkIndex,
- "error": str(e)
- }
- }
- all_sections.append(fallback_section)
- else:
- # Handle error chunks
- error_section = {
- "id": f"error_section_{chunk_result.chunkIndex}",
- "title": "Error Section",
- "content_type": "paragraph",
- "elements": [{
- "text": f"Error in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}"
- }],
- "order": chunk_result.chunkIndex,
- "metadata": {
- "source_document": doc_id,
- "chunk_index": chunk_result.chunkIndex,
- "error": chunk_metadata.get('error', 'Unknown error')
- }
- }
- all_sections.append(error_section)
-
- # Sort sections by order
- all_sections.sort(key=lambda x: x.get("order", 0))
-
- # If we have merged documents from multi-file responses, return them
- if all_documents:
- logger.info(f"Merged {len(all_documents)} documents from {len(chunkResults)} chunks")
- return {
- "metadata": combined_metadata,
- "documents": all_documents
- }
-
- # Otherwise, create merged document with sections (single-file fallback)
- merged_document = {
- "metadata": {
- "title": document_titles[0] if document_titles else "Merged Document",
- "source_documents": list(results_by_document.keys()),
- "extraction_method": "ai_json_extraction",
- "version": "1.0"
- },
- "sections": all_sections,
- "summary": f"Merged document from {len(results_by_document)} source documents",
- "tags": ["merged", "ai_generated"]
- }
-
- logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents (JSON mode)")
- return merged_document
-
- async def _compressContent(self, content: str, targetSize: int, contentType: str) -> str:
- if len(content.encode("utf-8")) <= targetSize:
- return content
-
- try:
- compressionPrompt = f"""
- Komprimiere den folgenden {contentType} auf maximal {targetSize} Zeichen,
- behalte aber alle wichtigen Informationen bei:
-
- {content}
-
- Gib nur den komprimierten Inhalt zurück, ohne zusätzliche Erklärungen.
- """
-
- # Service must not call connectors directly; use simple truncation fallback here
- data = content.encode("utf-8")
- return data[:targetSize].decode("utf-8", errors="ignore") + "... [truncated]"
- except Exception as e:
- logger.warning(f"AI compression failed, using truncation: {str(e)}")
- return content[:targetSize] + "... [truncated]"
-
- # ===== DYNAMIC GENERIC AI CALLS IMPLEMENTATION =====
+ await self._ensureAiObjectsInitialized()
+ return await self.webResearchService.webResearch(request)
+ # Master AI Call (process user prompt with optional unlimited count of input documents delivering one or many output documents, no size limitations)
async def callAi(
self,
prompt: str,
@@ -1362,1058 +179,13 @@ class AiService:
Raises:
Exception: If all available models fail
"""
- # Ensure aiObjects is initialized
await self._ensureAiObjectsInitialized()
- if options is None:
- options = AiCallOptions()
+ # Get document processor and generator
+ documentProcessor = self.documentProcessor
+ documentGenerator = self.documentGenerator
- # Normalize placeholders from List[PromptPlaceholder]
- placeholders_dict: Dict[str, str] = {}
- placeholders_meta: Dict[str, bool] = {}
- if placeholders:
- placeholders_dict = {p.label: p.content for p in placeholders}
- placeholders_meta = {p.label: bool(getattr(p, 'summaryAllowed', False)) for p in placeholders}
-
- # Auto-determine call type based on documents and operation type
- call_type = self._determineCallType(documents, options.operationType)
- options.callType = call_type
-
- try:
- # Build the full prompt that will be sent to AI
- if placeholders:
- full_prompt = prompt
- for p in placeholders:
- placeholder = f"{{{{KEY:{p.label}}}}}"
- full_prompt = full_prompt.replace(placeholder, p.content)
- else:
- full_prompt = prompt
-
- self._writeAiResponseDebug(
- label='ai_prompt_debug',
- content=full_prompt,
- partIndex=1,
- modelName=None,
- continuation=False
- )
- except Exception:
- pass
-
- # Handle document generation with specific output format
- if outputFormat:
- result = await self._callAiWithDocumentGeneration(prompt, documents, options, outputFormat, title)
- # Log AI response for debugging
- try:
- if isinstance(result, dict) and 'content' in result:
- self._writeAiResponseDebug(
- label='ai_document_generation',
- content=result['content'],
- partIndex=1,
- modelName=None, # Document generation doesn't return model info
- continuation=False
- )
- except Exception:
- pass
- return result
-
- if call_type == "planning":
- result = await self._callAiPlanning(prompt, placeholders_dict, placeholders_meta, options)
- # Log AI response for debugging
- try:
- self._writeAiResponseDebug(
- label='ai_planning',
- content=result or "",
- partIndex=1,
- modelName=None, # Planning doesn't return model info
- continuation=False
- )
- except Exception:
- pass
- return result
- else:
- # Set processDocumentsIndividually from the legacy parameter if not set in options
- if options.processDocumentsIndividually is None and documents:
- options.processDocumentsIndividually = False # Default to batch processing
-
- # For text calls, we need to build the full prompt with placeholders here
- # since _callAiText doesn't handle placeholders directly
- if placeholders_dict:
- full_prompt = self._buildPromptWithPlaceholders(prompt, placeholders_dict)
- else:
- full_prompt = prompt
-
- result = await self._callAiText(full_prompt, documents, options)
- # Log AI response for debugging (additional logging for text calls)
- try:
- self._writeAiResponseDebug(
- label='ai_text_main',
- content=result or "",
- partIndex=1,
- modelName=None, # Text calls already log internally
- continuation=False
- )
- except Exception:
- pass
- return result
-
- def _determineCallType(self, documents: Optional[List[ChatDocument]], operation_type: str) -> str:
- """
- Determine call type based on documents and operation type.
-
- Criteria: no documents AND operationType is "generate_plan" -> planning
- All other cases -> text
- """
- has_documents = documents is not None and len(documents) > 0
- is_planning_operation = operation_type == OperationType.GENERATE_PLAN
-
- if not has_documents and is_planning_operation:
- return "planning"
- else:
- return "text"
-
- async def _callAiPlanning(
- self,
- prompt: str,
- placeholders: Optional[Dict[str, str]],
- placeholdersMeta: Optional[Dict[str, bool]],
- options: AiCallOptions
- ) -> str:
- """
- Handle planning calls with placeholder system and selective summarization.
- """
- # Ensure aiObjects is initialized
- await self._ensureAiObjectsInitialized()
-
- # Build full prompt with placeholders; if too large, summarize summaryAllowed placeholders proportionally
- effective_placeholders = placeholders or {}
- full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders)
-
- if options.compressPrompt and placeholdersMeta:
- # Determine model capacity
- try:
- caps = self._getModelCapabilitiesForContent(full_prompt, None, options)
- max_bytes = caps.get("maxContextBytes", len(full_prompt.encode("utf-8")))
- except Exception:
- max_bytes = len(full_prompt.encode("utf-8"))
-
- current_bytes = len(full_prompt.encode("utf-8"))
- if current_bytes > max_bytes:
- # Compute total bytes contributed by allowed placeholders (approximate by content length)
- allowed_labels = [l for l, allow in placeholdersMeta.items() if allow]
- allowed_sizes = {l: len((effective_placeholders.get(l) or "").encode("utf-8")) for l in allowed_labels}
- total_allowed = sum(allowed_sizes.values())
-
- overage = current_bytes - max_bytes
- if total_allowed > 0 and overage > 0:
- # Target total for allowed after reduction
- target_allowed = max(total_allowed - overage, 0)
- # Global ratio to apply across allowed placeholders
- ratio = target_allowed / total_allowed if total_allowed > 0 else 1.0
- ratio = max(0.0, min(1.0, ratio))
-
- reduced: Dict[str, str] = {}
- for label, content in effective_placeholders.items():
- if label in allowed_labels and isinstance(content, str) and len(content) > 0:
- old_len = len(content)
- # Reduce by proportional ratio on characters (fallback if empty)
- reduction_factor = ratio if old_len > 0 else 1.0
- reduced[label] = self._reduceText(content, reduction_factor)
- else:
- reduced[label] = content
-
- effective_placeholders = reduced
- full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders)
-
- # If still slightly over, perform a second-pass fine adjustment with updated ratio
- current_bytes = len(full_prompt.encode("utf-8"))
- if current_bytes > max_bytes and total_allowed > 0:
- overage2 = current_bytes - max_bytes
- # Recompute allowed sizes after first reduction
- allowed_sizes2 = {l: len((effective_placeholders.get(l) or "").encode("utf-8")) for l in allowed_labels}
- total_allowed2 = sum(allowed_sizes2.values())
- if total_allowed2 > 0 and overage2 > 0:
- target_allowed2 = max(total_allowed2 - overage2, 0)
- ratio2 = target_allowed2 / total_allowed2
- ratio2 = max(0.0, min(1.0, ratio2))
- reduced2: Dict[str, str] = {}
- for label, content in effective_placeholders.items():
- if label in allowed_labels and isinstance(content, str) and len(content) > 0:
- old_len = len(content)
- reduction_factor = ratio2 if old_len > 0 else 1.0
- reduced2[label] = self._reduceText(content, reduction_factor)
- else:
- reduced2[label] = content
- effective_placeholders = reduced2
- full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders)
-
-
- # Make AI call using AiObjects (let it handle model selection)
- request = AiCallRequest(
- prompt=full_prompt,
- context="", # Context is already included in the prompt
- options=options
+ return await self.coreAi.callAi(
+ prompt, documents, placeholders, options, outputFormat, title,
+ documentProcessor, documentGenerator
)
- response = await self.aiObjects.call(request)
- try:
- logger.debug(f"AI model selected (planning): {getattr(response, 'modelName', 'unknown')}")
- except Exception:
- pass
- return response.content
-
- async def _callAiText(
- self,
- prompt: str,
- documents: Optional[List[ChatDocument]],
- options: AiCallOptions
- ) -> str:
- """
- Handle text calls with document processing through ExtractionService.
- UNIFIED PROCESSING: Always use per-chunk processing for consistency.
- """
- # Ensure aiObjects is initialized
- await self._ensureAiObjectsInitialized()
-
- # UNIFIED PROCESSING: Always use per-chunk processing for consistency
- # This ensures MIME-type checking, chunk mapping, and parallel processing
- return await self._processDocumentsPerChunk(documents, prompt, options)
-
- async def _callAiDirect(
- self,
- prompt: str,
- documents: Optional[List[ChatDocument]],
- options: AiCallOptions
- ) -> Dict[str, Any]:
- """
- Call AI directly with prompt and documents for JSON output.
- Used for multi-file generation - uses the existing generation pipeline.
- """
- # Use the existing generation pipeline that already works
- # This ensures proper document processing and content extraction
- logger.info(f"Using existing generation pipeline for {len(documents) if documents else 0} documents")
-
- # Process documents with JSON merging using the existing pipeline
- result = await self._processDocumentsPerChunkJson(documents, prompt, options)
-
- # Convert single-file result to multi-file format if needed
- if "sections" in result and "documents" not in result:
- logger.info("Converting single-file result to multi-file format")
- # This is a single-file result, convert it to multi-file format
- return {
- "metadata": result.get("metadata", {"title": "Converted Document"}),
- "documents": [{
- "id": "doc_1",
- "title": result.get("metadata", {}).get("title", "Document"),
- "filename": "document.txt",
- "sections": result.get("sections", [])
- }]
- }
-
- return result
-
- async def _processDocumentsPerChunkJsonWithPrompt(
- self,
- documents: List[ChatDocument],
- custom_prompt: str,
- options: Optional[AiCallOptions] = None
- ) -> Dict[str, Any]:
- """
- Process documents with per-chunk AI calls and merge results in JSON mode.
- Uses a custom prompt instead of the default extraction prompt.
- """
- if not documents:
- return {"metadata": {"title": "Empty Document"}, "sections": []}
-
- # Get model capabilities for size calculation
- model_capabilities = self._getModelCapabilitiesForContent(custom_prompt, documents, options)
-
- # Build extraction options for chunking with intelligent merging
- extractionOptions: Dict[str, Any] = {
- "prompt": custom_prompt, # Use the custom prompt instead of default
- "operationType": options.operationType if options else "general",
- "processDocumentsIndividually": True, # Process each document separately
- "maxSize": model_capabilities["maxContextBytes"],
- "chunkAllowed": True,
- "textChunkSize": model_capabilities["textChunkSize"],
- "imageChunkSize": model_capabilities["imageChunkSize"],
- "imageMaxPixels": 1024 * 1024,
- "imageQuality": 85,
- "mergeStrategy": {
- "useIntelligentMerging": True, # Enable intelligent token-aware merging
- "modelCapabilities": model_capabilities,
- "prompt": custom_prompt, # Use the custom prompt
- "groupBy": "typeGroup",
- "orderBy": "id",
- "mergeType": "concatenate"
- },
- }
-
- logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
-
- try:
- # Extract content with chunking
- extractionResult = self.extractionService.extractContent(documents, extractionOptions)
-
- if not isinstance(extractionResult, list):
- return {"metadata": {"title": "Error Document"}, "sections": []}
-
- # Process chunks with proper mapping
- logger.info(f"Processing {len(extractionResult)} chunks with custom prompt")
- logger.debug(f"Custom prompt preview: {custom_prompt[:200]}...")
-
- # Debug: Show what content is being processed (before filtering)
- for i, ec in enumerate(extractionResult):
- if hasattr(ec, 'parts'):
- for j, part in enumerate(ec.parts):
- if not (hasattr(part, 'data') and part.data):
- # Check if this is an empty container chunk (which is expected)
- part_type = getattr(part, 'typeGroup', None)
- part_mime = getattr(part, 'mimeType', '')
-
- is_empty_container = (
- part_type == "container" and
- part_mime and
- 'document' in part_mime.lower()
- )
-
- if not is_empty_container:
- logger.warning(f"Part {j} has no data - typeGroup='{part_type}', mimeType='{part_mime}'")
-
- chunkResults = await self._processChunksWithMapping(extractionResult, custom_prompt, options, generate_json=True)
-
- # Debug: Show what chunks were actually processed (after filtering)
- logger.info(f"After filtering: {len(chunkResults)} chunks will be processed")
-
- # Merge with JSON mode
- mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options)
-
- # Debug: Show what the AI actually returned
- logger.info(f"AI returned document with keys: {list(mergedJsonDocument.keys())}")
- if 'documents' in mergedJsonDocument:
- logger.info(f"Number of documents: {len(mergedJsonDocument['documents'])}")
- elif 'sections' in mergedJsonDocument:
- logger.info(f"Number of sections: {len(mergedJsonDocument['sections'])}")
-
- return mergedJsonDocument
-
- except Exception as e:
- logger.error(f"Error in per-chunk JSON processing: {str(e)}")
- return {"metadata": {"title": "Error Document"}, "sections": []}
-
- async def _callAiJson(
- self,
- prompt: str,
- documents: Optional[List[ChatDocument]],
- options: AiCallOptions
- ) -> Dict[str, Any]:
- """
- Handle AI calls with document processing for JSON output.
- Returns structured JSON document instead of text.
- """
- # Ensure aiObjects is initialized
- await self._ensureAiObjectsInitialized()
-
- # Process documents with JSON merging
- return await self._processDocumentsPerChunkJson(documents, prompt, options)
-
-
-
- def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]:
- """
- Get model capabilities for content processing, including appropriate size limits for chunking.
- """
- # Estimate total content size
- prompt_size = len(prompt.encode('utf-8'))
- document_size = 0
- if documents:
- # Rough estimate of document content size
- for doc in documents:
- document_size += doc.fileSize or 0
-
- total_size = prompt_size + document_size
-
- # Use AiObjects to select the best model for this content size
- # We'll simulate the model selection by checking available models
- from modules.interfaces.interfaceAiObjects import aiModels
-
- # Find the best model for this content size and operation
- best_model = None
- best_context_length = 0
-
- for model_name, model_info in aiModels.items():
- context_length = model_info.get("contextLength", 0)
-
- # Skip models with no context length or too small for content
- if context_length == 0:
- continue
-
- # Check if model supports the operation type
- capabilities = model_info.get("capabilities", [])
- if options.operationType == OperationType.IMAGE_ANALYSIS and "image_analysis" not in capabilities:
- continue
- elif options.operationType == OperationType.IMAGE_GENERATION and "image_generation" not in capabilities:
- continue
- elif options.operationType == OperationType.WEB_RESEARCH and "web_search" not in capabilities:
- continue
- elif "text_generation" not in capabilities:
- continue
-
- # Prefer models that can handle the content without chunking, but allow chunking if needed
- if context_length >= total_size * 0.8: # 80% of content size
- if context_length > best_context_length:
- best_model = model_info
- best_context_length = context_length
- elif best_model is None: # Fallback to largest available model
- if context_length > best_context_length:
- best_model = model_info
- best_context_length = context_length
-
- # Fallback to a reasonable default if no model found
- if best_model is None:
- best_model = {
- "contextLength": 128000, # GPT-4o default
- "llmName": "gpt-4o"
- }
-
- # Calculate appropriate sizes
- # Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters)
- context_length_bytes = int(best_model["contextLength"] * 4)
- max_context_bytes = int(context_length_bytes * 0.9) # 90% of context length
- text_chunk_size = int(max_context_bytes * 0.7) # 70% of max context for text chunks
- image_chunk_size = int(max_context_bytes * 0.8) # 80% of max context for image chunks
-
- logger.debug(f"Selected model: {best_model.get('llmName', 'unknown')} with context length: {best_model['contextLength']}")
- logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes")
- logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes")
-
- return {
- "maxContextBytes": max_context_bytes,
- "textChunkSize": text_chunk_size,
- "imageChunkSize": image_chunk_size
- }
-
- def _getModelsForOperation(self, operation_type: str, options: AiCallOptions) -> List[ModelCapabilities]:
- """
- Get models capable of handling the specific operation with capability filtering.
- """
- # Use the actual AI objects model selection instead of hardcoded default
- if hasattr(self, 'aiObjects') and self.aiObjects:
- # Let AiObjects handle the model selection
- return []
- else:
- # Fallback to default model if AiObjects not available
- default_model = ModelCapabilities(
- name="default",
- maxTokens=4000,
- capabilities=["text", "reasoning"] if operation_type == "planning" else ["text"],
- costPerToken=0.001,
- processingTime=1.0,
- isAvailable=True
- )
- return [default_model]
-
- def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
- """
- Build full prompt by replacing placeholders with their content.
- Uses the new {{KEY:placeholder}} format.
- """
- if not placeholders:
- return prompt
-
- full_prompt = prompt
- for placeholder, content in placeholders.items():
- # Replace both old format {{placeholder}} and new format {{KEY:placeholder}}
- full_prompt = full_prompt.replace(f"{{{{{placeholder}}}}}", content)
- full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", content)
-
- return full_prompt
-
- def _writeTraceLog(self, contextText: str, data: Any) -> None:
- """Write raw data to the central trace log file without truncation."""
- try:
- import os
- import json
- from datetime import datetime, UTC
- # Only write if logger is in debug mode
- if logger.level > logging.DEBUG:
- return
- # Get log directory from configuration via service center if possible
- logDir = None
- try:
- logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
- except Exception:
- pass
- if not logDir:
- logDir = "./"
- if not os.path.isabs(logDir):
- # Make it relative to gateway directory
- gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
- logDir = os.path.join(gatewayDir, logDir)
- os.makedirs(logDir, exist_ok=True)
- traceFile = os.path.join(logDir, "log_trace.log")
- timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
- traceEntry = f"[{timestamp}] {contextText}\n" + ("=" * 80) + "\n"
- if data is None:
- traceEntry += "No data provided\n"
- else:
- # Prefer exact text; if dict/list, pretty print JSON
- try:
- if isinstance(data, (dict, list)):
- traceEntry += f"JSON Data:\n{json.dumps(data, indent=2, ensure_ascii=False)}\n"
- else:
- text = str(data)
- traceEntry += f"Text Data:\n{text}\n"
- except Exception:
- traceEntry += f"Data (fallback): {str(data)}\n"
- traceEntry += ("=" * 80) + "\n\n"
- with open(traceFile, "a", encoding="utf-8") as f:
- f.write(traceEntry)
- except Exception:
- # Swallow to avoid recursive logging issues
- pass
-
- def _writeAiResponseDebug(self, label: str, content: str, partIndex: int = 1, modelName: str = None, continuation: bool = None) -> None:
- """Persist raw AI response parts for debugging under test-chat/ai - only if debug enabled."""
- try:
- # Check if debug logging is enabled
- debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
- if not debug_enabled:
- return
-
- import os
- from datetime import datetime, UTC
- # Base dir: gateway/test-chat/ai (go up 4 levels from this file)
- # .../gateway/modules/services/serviceAi/mainServiceAi.py -> up to gateway root
- gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
- outDir = os.path.join(gatewayDir, 'test-chat', 'ai')
- os.makedirs(outDir, exist_ok=True)
- ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
- suffix = []
- if partIndex is not None:
- suffix.append(f"part{partIndex}")
- if continuation is not None:
- suffix.append(f"cont_{str(continuation).lower()}")
- if modelName:
- safeModel = ''.join(c if c.isalnum() or c in ('-', '_') else '-' for c in modelName)
- suffix.append(safeModel)
- suffixStr = ('_' + '_'.join(suffix)) if suffix else ''
- fname = f"{ts}_{label}{suffixStr}.txt"
- fpath = os.path.join(outDir, fname)
- with open(fpath, 'w', encoding='utf-8') as f:
- f.write(content or '')
- except Exception:
- # Do not raise; best-effort debug write
- pass
-
- def _exceedsTokenLimit(self, text: str, model: ModelCapabilities, safety_margin: float) -> bool:
- """
- Check if text exceeds model token limit with safety margin.
- """
- # Simple character-based estimation (4 chars per token)
- estimated_tokens = len(text) // 4
- max_tokens = int(model.maxTokens * (1 - safety_margin))
- return estimated_tokens > max_tokens
-
- def _reducePlanningPrompt(
- self,
- full_prompt: str,
- placeholders: Optional[Dict[str, str]],
- model: ModelCapabilities,
- options: AiCallOptions
- ) -> str:
- """
- Reduce planning prompt size by summarizing placeholders while preserving prompt structure.
- """
- if not placeholders:
- return self._reduceText(full_prompt, 0.7)
-
- # Reduce placeholders while preserving prompt
- reduced_placeholders = {}
- for placeholder, content in placeholders.items():
- if len(content) > 1000: # Only reduce long content
- reduction_factor = 0.7
- reduced_content = self._reduceText(content, reduction_factor)
- reduced_placeholders[placeholder] = reduced_content
- else:
- reduced_placeholders[placeholder] = content
-
- return self._buildPromptWithPlaceholders(full_prompt, reduced_placeholders)
-
- def _reduceTextPrompt(
- self,
- prompt: str,
- context: str,
- model: ModelCapabilities,
- options: AiCallOptions
- ) -> str:
- """
- Reduce text prompt size using typeGroup-aware chunking and merging.
- """
- max_size = int(model.maxTokens * (1 - options.safetyMargin))
-
- if options.compressPrompt:
- # Reduce both prompt and context
- target_size = max_size
- current_size = len(prompt) + len(context)
- reduction_factor = (target_size * 0.7) / current_size
-
- if reduction_factor < 1.0:
- prompt = self._reduceText(prompt, reduction_factor)
- context = self._reduceText(context, reduction_factor)
- else:
- # Only reduce context, preserve prompt integrity
- max_context_size = max_size - len(prompt)
- if len(context) > max_context_size:
- reduction_factor = max_context_size / len(context)
- context = self._reduceText(context, reduction_factor)
-
- return prompt + "\n\n" + context if context else prompt
-
- def _extractTextFromContentParts(self, extracted_content) -> str:
- """
- Extract text content from ExtractionService ContentPart objects.
- """
- if not extracted_content or not hasattr(extracted_content, 'parts'):
- return ""
-
- text_parts = []
- for part in extracted_content.parts:
- if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']:
- if hasattr(part, 'data') and part.data:
- text_parts.append(part.data)
-
- return "\n\n".join(text_parts)
-
- def _reduceText(self, text: str, reduction_factor: float) -> str:
- """
- Reduce text size by the specified factor.
- """
- if reduction_factor >= 1.0:
- return text
-
- target_length = int(len(text) * reduction_factor)
- return text[:target_length] + "... [reduced]"
-
- async def _analyzePromptIntent(self, prompt: str, ai_service=None) -> Dict[str, Any]:
- """Use AI to analyze user prompt and determine processing requirements."""
- if not ai_service:
- return {"is_multi_file": False, "strategy": "single", "criteria": None}
-
- try:
- analysis_prompt = f"""
-Analyze this user request and determine if it requires multiple file output or single file output.
-
-User request: "{prompt}"
-
-Respond with JSON only in this exact format:
-{{
- "is_multi_file": true/false,
- "strategy": "single|per_entity|by_section|by_criteria|custom",
- "criteria": "description of how to split content",
- "file_naming_pattern": "suggested pattern for filenames",
- "reasoning": "brief explanation of the analysis"
-}}
-
-Consider:
-- Does the user want separate files for different entities (customers, products, etc.)?
-- Does the user want to split content into multiple documents?
-- What would be the most logical way to organize the content?
-- What language is the request in? (analyze in the original language)
-
-Return only the JSON response.
-"""
-
- from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
- request_options = AiCallOptions()
- request_options.operationType = OperationType.GENERAL
-
- request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
- response = await ai_service.aiObjects.call(request)
-
- if response and response.content:
- import json
- import re
-
- # Extract JSON from response
- result = response.content.strip()
- json_match = re.search(r'\{.*\}', result, re.DOTALL)
- if json_match:
- result = json_match.group(0)
-
- analysis = json.loads(result)
- return analysis
- else:
- return {"is_multi_file": False, "strategy": "single", "criteria": None}
-
- except Exception as e:
- logger.warning(f"AI prompt analysis failed: {str(e)}, defaulting to single file")
- return {"is_multi_file": False, "strategy": "single", "criteria": None}
-
- def _validateResponseStructure(self, response: Dict[str, Any], prompt_analysis: Dict[str, Any]) -> bool:
- """Validate that AI response matches the expected structure."""
- try:
- if not isinstance(response, dict):
- logger.warning(f"Response validation failed: Response is not a dict, got {type(response)}")
- return False
-
- # Check for multi-file structure
- if prompt_analysis.get("is_multi_file", False):
- has_documents = "documents" in response
- is_documents_list = isinstance(response.get("documents"), list)
- logger.info(f"Multi-file validation: has_documents={has_documents}, is_documents_list={is_documents_list}")
- if has_documents and is_documents_list:
- logger.info(f"Multi-file validation passed: {len(response['documents'])} documents found")
- else:
- logger.warning(f"Multi-file validation failed: documents key present={has_documents}, documents is list={is_documents_list}")
- logger.warning(f"Available keys: {list(response.keys())}")
- return has_documents and is_documents_list
- else:
- has_sections = "sections" in response
- is_sections_list = isinstance(response.get("sections"), list)
- logger.info(f"Single-file validation: has_sections={has_sections}, is_sections_list={is_sections_list}")
- return has_sections and is_sections_list
- except Exception as e:
- logger.warning(f"Response validation failed with exception: {str(e)}")
- return False
-
- async def _callAiWithDocumentGeneration(
- self,
- prompt: str,
- documents: Optional[List[ChatDocument]],
- options: AiCallOptions,
- outputFormat: str,
- title: Optional[str]
- ) -> Dict[str, Any]:
- """
- Handle AI calls with document generation in specific output format.
- Now supports both single-file and multi-file generation.
-
- Args:
- prompt: The main prompt for the AI call
- documents: Optional list of documents to process
- options: AI call configuration options
- outputFormat: Target output format (html, pdf, docx, txt, md, json, csv, xlsx)
- title: Optional title for generated documents
-
- Returns:
- Dict with generated documents and metadata
- """
- try:
- # Use AI to analyze prompt intent
- prompt_analysis = await self._analyzePromptIntent(prompt, self)
- logger.info(f"Prompt analysis result: {prompt_analysis}")
-
- if prompt_analysis.get("is_multi_file", False):
- return await self._callAiWithMultiFileGeneration(
- prompt, documents, options, outputFormat, title, prompt_analysis
- )
- else:
- return await self._callAiWithSingleFileGeneration(
- prompt, documents, options, outputFormat, title
- )
-
- except Exception as e:
- logger.error(f"Error in document generation: {str(e)}")
- return {
- "success": False,
- "error": str(e),
- "content": "",
- "rendered_content": "",
- "mime_type": "text/plain",
- "filename": f"error_{outputFormat}",
- "format": outputFormat,
- "title": title or "Error",
- "documents": []
- }
-
- async def _callAiWithSingleFileGeneration(
- self,
- prompt: str,
- documents: Optional[List[ChatDocument]],
- options: AiCallOptions,
- outputFormat: str,
- title: Optional[str]
- ) -> Dict[str, Any]:
- """Handle single-file document generation (existing functionality)."""
- try:
- # Get format-specific extraction prompt from generation service
- from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
- generation_service = GenerationService(self.services)
-
- # Use default title if not provided
- if not title:
- title = "AI Generated Document"
-
- # Get format-specific extraction prompt
- extractionPrompt = await generation_service.getExtractionPrompt(
- outputFormat=outputFormat,
- userPrompt=prompt,
- title=title,
- aiService=self
- )
-
- # Process documents with format-specific prompt using JSON mode
- # This ensures structured JSON output instead of text
- aiResponseJson = await self._callAiJson(extractionPrompt, documents, options)
-
- # Validate JSON response
- if not isinstance(aiResponseJson, dict) or "sections" not in aiResponseJson:
- raise Exception("AI response is not valid JSON document structure")
-
- # Generate filename from document metadata
- parsedFilename = None
- try:
- if aiResponseJson.get("metadata", {}).get("title"):
- title = aiResponseJson["metadata"]["title"]
- # Clean title for filename
- import re
- parsed = re.sub(r"[^a-zA-Z0-9._-]", "-", title)
- parsed = re.sub(r"-+", "-", parsed).strip('-')
- if parsed:
- parsedFilename = f"{parsed}.{outputFormat}"
- except Exception:
- parsedFilename = None
-
- # Render the JSON content to the specified format
- renderedContent, mimeType = await generation_service.renderReport(
- extractedContent=aiResponseJson,
- outputFormat=outputFormat,
- title=title,
- userPrompt=prompt,
- aiService=self
- )
-
- # Generate meaningful filename (use AI-provided if valid, else fallback)
- from datetime import datetime, UTC
- timestamp = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
- if parsedFilename and parsedFilename.lower().endswith(f".{outputFormat.lower()}"):
- filename = parsedFilename
- else:
- safeTitle = ''.join(c if c.isalnum() else '-' for c in (title or 'document')).strip('-')
- filename = f"{safeTitle or 'document'}-{timestamp}.{outputFormat}"
-
- # Return structured result with document information
- return {
- "success": True,
- "content": aiResponseJson, # Structured JSON document
- "rendered_content": renderedContent, # Formatted content
- "mime_type": mimeType,
- "filename": filename,
- "format": outputFormat,
- "title": title,
- "documents": [{
- "documentName": filename,
- "documentData": renderedContent,
- "mimeType": mimeType
- }],
- "is_multi_file": False
- }
-
- except Exception as e:
- logger.error(f"Error in single-file document generation: {str(e)}")
- raise
-
- async def _callAiWithMultiFileGeneration(
- self,
- prompt: str,
- documents: Optional[List[ChatDocument]],
- options: AiCallOptions,
- outputFormat: str,
- title: Optional[str],
- prompt_analysis: Dict[str, Any]
- ) -> Dict[str, Any]:
- """Handle multi-file document generation using AI analysis."""
- try:
- # Get multi-file extraction prompt based on AI analysis
- from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
- generation_service = GenerationService(self.services)
-
- # Use default title if not provided
- if not title:
- title = "AI Generated Documents"
-
- # Get adaptive extraction prompt
- extraction_prompt = await generation_service.getAdaptiveExtractionPrompt(
- outputFormat=outputFormat,
- userPrompt=prompt,
- title=title,
- promptAnalysis=prompt_analysis,
- aiService=self
- )
-
- logger.info(f"Adaptive extraction prompt length: {len(extraction_prompt)} characters")
- logger.debug(f"Adaptive extraction prompt preview: {extraction_prompt[:500]}...")
-
- # Process with adaptive JSON schema - use the existing pipeline but with adaptive prompt
- logger.info(f"Using adaptive prompt with existing pipeline: {len(extraction_prompt)} chars")
- logger.debug(f"Processing documents: {len(documents) if documents else 0} documents")
-
- # Use the existing pipeline but replace the prompt with our adaptive one
- # This ensures proper document processing while using the multi-file prompt
- ai_response = await self._processDocumentsPerChunkJsonWithPrompt(documents, extraction_prompt, options)
-
- logger.info(f"AI response type: {type(ai_response)}")
- logger.info(f"AI response keys: {list(ai_response.keys()) if isinstance(ai_response, dict) else 'Not a dict'}")
- logger.debug(f"AI response preview: {str(ai_response)[:500]}...")
-
- # Validate response structure
- if not self._validateResponseStructure(ai_response, prompt_analysis):
- # Fallback to single-file if multi-file fails
- logger.warning(f"Multi-file processing failed - Invalid response structure. Expected multi-file but got: {list(ai_response.keys()) if isinstance(ai_response, dict) else type(ai_response)}")
- logger.warning(f"Prompt analysis: {prompt_analysis}")
- logger.warning("Falling back to single-file generation")
- return await self._callAiWithSingleFileGeneration(
- prompt, documents, options, outputFormat, title
- )
-
- # Process multiple documents
- generated_documents = []
- for i, doc_data in enumerate(ai_response.get("documents", [])):
- # Transform AI-generated sections to renderer-compatible format
- transformed_sections = []
- for section in doc_data.get("sections", []):
- # Convert AI format to renderer format
- transformed_section = {
- "id": section.get("id", f"section_{len(transformed_sections) + 1}"),
- "type": section.get("content_type", "paragraph"),
- "data": {
- "text": "",
- "elements": section.get("elements", [])
- },
- "order": section.get("order", len(transformed_sections) + 1)
- }
-
- # Extract text from elements for simple text-based sections
- if section.get("content_type") in ["paragraph", "heading"]:
- text_parts = []
- for element in section.get("elements", []):
- if "text" in element:
- text_parts.append(element["text"])
- transformed_section["data"]["text"] = "\n".join(text_parts)
-
- transformed_sections.append(transformed_section)
-
- # Create complete document structure for rendering
- complete_document = {
- "metadata": {
- "title": doc_data["title"],
- "source_document": "multi_file_generation",
- "document_id": doc_data.get("id", f"doc_{i+1}"),
- "filename": doc_data.get("filename", f"document_{i+1}"),
- "split_strategy": prompt_analysis.get("strategy", "custom")
- },
- "sections": transformed_sections,
- "summary": f"Generated document: {doc_data['title']}",
- "tags": ["multi_file", "ai_generated"]
- }
-
- rendered_content, mime_type = await generation_service.renderReport(
- extractedContent=complete_document,
- outputFormat=outputFormat,
- title=doc_data["title"],
- userPrompt=prompt,
- aiService=self
- )
-
- # Generate proper filename with correct extension
- base_filename = doc_data.get("filename", f"document_{i+1}")
- # Remove any existing extension and add the correct one
- if '.' in base_filename:
- base_filename = base_filename.rsplit('.', 1)[0]
-
- # Add proper extension based on output format
- if outputFormat.lower() == "docx":
- filename = f"{base_filename}.docx"
- elif outputFormat.lower() == "pdf":
- filename = f"{base_filename}.pdf"
- elif outputFormat.lower() == "html":
- filename = f"{base_filename}.html"
- else:
- filename = f"{base_filename}.{outputFormat}"
-
- generated_documents.append({
- "documentName": filename,
- "documentData": rendered_content,
- "mimeType": mime_type
- })
-
- # Save debug files for multi-file generation - only if debug enabled
- debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
- if debug_enabled:
- try:
- import os
- from datetime import datetime, UTC
- ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
- debug_root = "./test-chat/ai"
- debug_dir = os.path.join(debug_root, f"multifile_output_{ts}")
- os.makedirs(debug_dir, exist_ok=True)
-
- # Save metadata
- with open(os.path.join(debug_dir, "metadata.txt"), "w", encoding="utf-8") as f:
- f.write(f"title: {title}\n")
- f.write(f"format: {outputFormat}\n")
- f.write(f"documents_count: {len(generated_documents)}\n")
- f.write(f"split_strategy: {prompt_analysis.get('strategy', 'custom')}\n")
- f.write(f"prompt_analysis: {prompt_analysis}\n")
-
- # Save each generated document
- for i, doc in enumerate(generated_documents):
- doc_filename = doc["documentName"]
- doc_data = doc["documentData"]
- doc_mime = doc["mimeType"]
-
- # Determine file extension
- if outputFormat.lower() == "docx":
- file_ext = ".docx"
- elif outputFormat.lower() == "pdf":
- file_ext = ".pdf"
- elif outputFormat.lower() == "html":
- file_ext = ".html"
- else:
- file_ext = f".{outputFormat}"
-
- # Save the rendered document
- output_path = os.path.join(debug_dir, f"document_{i+1}_{doc_filename}")
-
- if file_ext in ['.md', '.txt', '.html', '.json', '.csv']:
- # Text-based formats
- with open(output_path, 'w', encoding='utf-8') as f:
- f.write(doc_data)
- else:
- # Binary formats - decode from base64 if needed
- try:
- import base64
- doc_bytes = base64.b64decode(doc_data)
- with open(output_path, 'wb') as f:
- f.write(doc_bytes)
- except Exception:
- # If not base64, save as text
- with open(output_path, 'w', encoding='utf-8') as f:
- f.write(doc_data)
-
- logger.info(f"💾 Debug: Saved multi-file document {i+1}: {output_path}")
-
- logger.info(f"💾 Debug: Multi-file output saved to: {debug_dir}")
-
- except Exception as e:
- logger.warning(f"Failed to save multi-file debug output: {e}")
-
- return {
- "success": True,
- "content": ai_response,
- "rendered_content": None, # Not applicable for multi-file
- "mime_type": None, # Not applicable for multi-file
- "filename": None, # Not applicable for multi-file
- "format": outputFormat,
- "title": title,
- "documents": generated_documents,
- "is_multi_file": True,
- "split_strategy": prompt_analysis.get("strategy", "custom")
- }
-
- except Exception as e:
- logger.error(f"Error in multi-file document generation: {str(e)}")
- # Fallback to single-file
- return await self._callAiWithSingleFileGeneration(
- prompt, documents, options, outputFormat, title
- )
-
diff --git a/modules/services/serviceAi/subCoreAi.py b/modules/services/serviceAi/subCoreAi.py
new file mode 100644
index 00000000..4cd13f8a
--- /dev/null
+++ b/modules/services/serviceAi/subCoreAi.py
@@ -0,0 +1,596 @@
+import logging
+from typing import Dict, Any, List, Optional, Tuple, Union
+from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
+from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, ModelCapabilities, OperationType, Priority
+from modules.interfaces.interfaceAiObjects import AiObjects
+
+logger = logging.getLogger(__name__)
+
+
+class SubCoreAi:
+ """Core AI operations including image analysis, text generation, and planning calls."""
+
+ def __init__(self, services, aiObjects):
+ """Initialize core AI operations.
+
+ Args:
+ services: Service center instance for accessing other services
+ aiObjects: Initialized AiObjects instance
+ """
+ self.services = services
+ self.aiObjects = aiObjects
+
+ # AI Processing Call
+ async def callAi(
+ self,
+ prompt: str,
+ documents: Optional[List[ChatDocument]] = None,
+ placeholders: Optional[List[PromptPlaceholder]] = None,
+ options: Optional[AiCallOptions] = None,
+ outputFormat: Optional[str] = None,
+ title: Optional[str] = None,
+ documentProcessor=None,
+ documentGenerator=None
+ ) -> Union[str, Dict[str, Any]]:
+ """
+ Unified AI call interface that automatically routes to appropriate handler.
+
+ Args:
+ prompt: The main prompt for the AI call
+ documents: Optional list of documents to process
+ placeholders: Optional list of placeholder replacements for planning calls
+ options: AI call configuration options
+ outputFormat: Optional output format (html, pdf, docx, txt, md, json, csv, xlsx) for document generation
+ title: Optional title for generated documents
+ documentProcessor: Document processing service instance
+ documentGenerator: Document generation service instance
+
+ Returns:
+ AI response as string, or dict with documents if outputFormat is specified
+
+ Raises:
+ Exception: If all available models fail
+ """
+ if options is None:
+ options = AiCallOptions()
+
+ # Normalize placeholders from List[PromptPlaceholder]
+ placeholders_dict: Dict[str, str] = {}
+ placeholders_meta: Dict[str, bool] = {}
+ if placeholders:
+ placeholders_dict = {p.label: p.content for p in placeholders}
+ placeholders_meta = {p.label: bool(getattr(p, 'summaryAllowed', False)) for p in placeholders}
+
+ # Auto-determine call type based on documents and operation type
+ call_type = self._determineCallType(documents, options.operationType)
+ options.callType = call_type
+
+ try:
+ # Build the full prompt that will be sent to AI
+ if placeholders:
+ full_prompt = prompt
+ for p in placeholders:
+ placeholder = f"{{{{KEY:{p.label}}}}}"
+ full_prompt = full_prompt.replace(placeholder, p.content)
+ else:
+ full_prompt = prompt
+
+ self._writeAiResponseDebug(
+ label='ai_prompt_debug',
+ content=full_prompt,
+ partIndex=1,
+ modelName=None,
+ continuation=False
+ )
+ except Exception:
+ pass
+
+ # Handle document generation with specific output format
+ if outputFormat and documentGenerator:
+ result = await documentGenerator.callAiWithDocumentGeneration(prompt, documents, options, outputFormat, title)
+ # Log AI response for debugging
+ try:
+ if isinstance(result, dict) and 'content' in result:
+ self._writeAiResponseDebug(
+ label='ai_document_generation',
+ content=result['content'],
+ partIndex=1,
+ modelName=None, # Document generation doesn't return model info
+ continuation=False
+ )
+ except Exception:
+ pass
+ return result
+
+ if call_type == "planning":
+ result = await self._callAiPlanning(prompt, placeholders_dict, placeholders_meta, options)
+ # Log AI response for debugging
+ try:
+ self._writeAiResponseDebug(
+ label='ai_planning',
+ content=result or "",
+ partIndex=1,
+ modelName=None, # Planning doesn't return model info
+ continuation=False
+ )
+ except Exception:
+ pass
+ return result
+ else:
+ # Set processDocumentsIndividually from the legacy parameter if not set in options
+ if options.processDocumentsIndividually is None and documents:
+ options.processDocumentsIndividually = False # Default to batch processing
+
+ # For text calls, we need to build the full prompt with placeholders here
+ # since _callAiText doesn't handle placeholders directly
+ if placeholders_dict:
+ full_prompt = self._buildPromptWithPlaceholders(prompt, placeholders_dict)
+ else:
+ full_prompt = prompt
+
+ if documentProcessor:
+ result = await documentProcessor.callAiText(full_prompt, documents, options)
+ else:
+ # Fallback to direct AI call if no document processor available
+ request = AiCallRequest(
+ prompt=full_prompt,
+ context="",
+ options=options
+ )
+ response = await self.aiObjects.call(request)
+ result = response.content
+
+ # Log AI response for debugging (additional logging for text calls)
+ try:
+ self._writeAiResponseDebug(
+ label='ai_text_main',
+ content=result or "",
+ partIndex=1,
+ modelName=None, # Text calls already log internally
+ continuation=False
+ )
+ except Exception:
+ pass
+ return result
+
+ # AI Image Analysis
+ async def readImage(
+ self,
+ prompt: str,
+ imageData: Union[str, bytes],
+ mimeType: str = None,
+ options: Optional[AiCallOptions] = None,
+ ) -> str:
+ """Call AI for image analysis using interface.callImage()."""
+ try:
+ # Check if imageData is valid
+ if not imageData:
+ error_msg = "No image data provided"
+ self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE")
+ logger.error(f"Error in AI image analysis: {error_msg}")
+ return f"Error: {error_msg}"
+
+ self.services.utils.debugLogToFile(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}", "AI_SERVICE")
+ logger.info(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}")
+
+ # Always use IMAGE_ANALYSIS operation type for image processing
+ if options is None:
+ options = AiCallOptions(operationType=OperationType.IMAGE_ANALYSIS)
+ else:
+ # Override the operation type to ensure image analysis
+ options.operationType = OperationType.IMAGE_ANALYSIS
+
+ self.services.utils.debugLogToFile(f"Calling aiObjects.callImage with operationType: {options.operationType}", "AI_SERVICE")
+ logger.info(f"Calling aiObjects.callImage with operationType: {options.operationType}")
+ result = await self.aiObjects.callImage(prompt, imageData, mimeType, options)
+
+ # Debug the result
+ self.services.utils.debugLogToFile(f"Raw AI result type: {type(result)}, value: {repr(result)}", "AI_SERVICE")
+
+ # Check if result is valid
+ if not result or (isinstance(result, str) and not result.strip()):
+ error_msg = f"No response from AI image analysis (result: {repr(result)})"
+ self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE")
+ logger.error(f"Error in AI image analysis: {error_msg}")
+ return f"Error: {error_msg}"
+
+ self.services.utils.debugLogToFile(f"callImage returned: {result[:200]}..." if len(result) > 200 else result, "AI_SERVICE")
+ logger.info(f"callImage returned: {result[:200]}..." if len(result) > 200 else result)
+ return result
+ except Exception as e:
+ self.services.utils.debugLogToFile(f"Error in AI image analysis: {str(e)}", "AI_SERVICE")
+ logger.error(f"Error in AI image analysis: {str(e)}")
+ return f"Error: {str(e)}"
+
+ # AI Image Generation
+ async def generateImage(
+ self,
+ prompt: str,
+ size: str = "1024x1024",
+ quality: str = "standard",
+ style: str = "vivid",
+ options: Optional[AiCallOptions] = None,
+ ) -> Dict[str, Any]:
+ """Generate an image using AI using interface.generateImage()."""
+ try:
+ return await self.aiObjects.generateImage(prompt, size, quality, style, options)
+ except Exception as e:
+ logger.error(f"Error in AI image generation: {str(e)}")
+ return {"success": False, "error": str(e)}
+
+ def _determineCallType(self, documents: Optional[List[ChatDocument]], operation_type: str) -> str:
+ """
+ Determine call type based on documents and operation type.
+
+ Criteria: no documents AND operationType is "generate_plan" -> planning
+ All other cases -> text
+ """
+ has_documents = documents is not None and len(documents) > 0
+ is_planning_operation = operation_type == OperationType.GENERATE_PLAN
+
+ if not has_documents and is_planning_operation:
+ return "planning"
+ else:
+ return "text"
+
+ async def _callAiPlanning(
+ self,
+ prompt: str,
+ placeholders: Optional[Dict[str, str]],
+ placeholdersMeta: Optional[Dict[str, bool]],
+ options: AiCallOptions
+ ) -> str:
+ """
+ Handle planning calls with placeholder system and selective summarization.
+ """
+ # Build full prompt with placeholders; if too large, summarize summaryAllowed placeholders proportionally
+ effective_placeholders = placeholders or {}
+ full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders)
+
+ if options.compressPrompt and placeholdersMeta:
+ # Determine model capacity
+ try:
+ caps = self._getModelCapabilitiesForContent(full_prompt, None, options)
+ max_bytes = caps.get("maxContextBytes", len(full_prompt.encode("utf-8")))
+ except Exception:
+ max_bytes = len(full_prompt.encode("utf-8"))
+
+ current_bytes = len(full_prompt.encode("utf-8"))
+ if current_bytes > max_bytes:
+ # Compute total bytes contributed by allowed placeholders (approximate by content length)
+ allowed_labels = [l for l, allow in placeholdersMeta.items() if allow]
+ allowed_sizes = {l: len((effective_placeholders.get(l) or "").encode("utf-8")) for l in allowed_labels}
+ total_allowed = sum(allowed_sizes.values())
+
+ overage = current_bytes - max_bytes
+ if total_allowed > 0 and overage > 0:
+ # Target total for allowed after reduction
+ target_allowed = max(total_allowed - overage, 0)
+ # Global ratio to apply across allowed placeholders
+ ratio = target_allowed / total_allowed if total_allowed > 0 else 1.0
+ ratio = max(0.0, min(1.0, ratio))
+
+ reduced: Dict[str, str] = {}
+ for label, content in effective_placeholders.items():
+ if label in allowed_labels and isinstance(content, str) and len(content) > 0:
+ old_len = len(content)
+ # Reduce by proportional ratio on characters (fallback if empty)
+ reduction_factor = ratio if old_len > 0 else 1.0
+ reduced[label] = self._reduceText(content, reduction_factor)
+ else:
+ reduced[label] = content
+
+ effective_placeholders = reduced
+ full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders)
+
+ # If still slightly over, perform a second-pass fine adjustment with updated ratio
+ current_bytes = len(full_prompt.encode("utf-8"))
+ if current_bytes > max_bytes and total_allowed > 0:
+ overage2 = current_bytes - max_bytes
+ # Recompute allowed sizes after first reduction
+ allowed_sizes2 = {l: len((effective_placeholders.get(l) or "").encode("utf-8")) for l in allowed_labels}
+ total_allowed2 = sum(allowed_sizes2.values())
+ if total_allowed2 > 0 and overage2 > 0:
+ target_allowed2 = max(total_allowed2 - overage2, 0)
+ ratio2 = target_allowed2 / total_allowed2
+ ratio2 = max(0.0, min(1.0, ratio2))
+ reduced2: Dict[str, str] = {}
+ for label, content in effective_placeholders.items():
+ if label in allowed_labels and isinstance(content, str) and len(content) > 0:
+ old_len = len(content)
+ reduction_factor = ratio2 if old_len > 0 else 1.0
+ reduced2[label] = self._reduceText(content, reduction_factor)
+ else:
+ reduced2[label] = content
+ effective_placeholders = reduced2
+ full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders)
+
+
+ # Make AI call using AiObjects (let it handle model selection)
+ request = AiCallRequest(
+ prompt=full_prompt,
+ context="", # Context is already included in the prompt
+ options=options
+ )
+ response = await self.aiObjects.call(request)
+ try:
+ logger.debug(f"AI model selected (planning): {getattr(response, 'modelName', 'unknown')}")
+ except Exception:
+ pass
+ return response.content
+
+ async def _callAiDirect(
+ self,
+ prompt: str,
+ documents: Optional[List[ChatDocument]],
+ options: AiCallOptions,
+ documentProcessor=None
+ ) -> Dict[str, Any]:
+ """
+ Call AI directly with prompt and documents for JSON output.
+ Used for multi-file generation - uses the existing generation pipeline.
+ """
+ # Use the existing generation pipeline that already works
+ # This ensures proper document processing and content extraction
+ logger.info(f"Using existing generation pipeline for {len(documents) if documents else 0} documents")
+
+ if documentProcessor:
+ # Process documents with JSON merging using the existing pipeline
+ result = await documentProcessor.processDocumentsPerChunkJson(documents, prompt, options)
+ else:
+ # Fallback to simple AI call
+ request = AiCallRequest(
+ prompt=prompt,
+ context="",
+ options=options
+ )
+ response = await self.aiObjects.call(request)
+ result = {"metadata": {"title": "AI Response"}, "sections": [{"id": "section_1", "type": "paragraph", "data": {"text": response.content}}]}
+
+ # Convert single-file result to multi-file format if needed
+ if "sections" in result and "documents" not in result:
+ logger.info("Converting single-file result to multi-file format")
+ # This is a single-file result, convert it to multi-file format
+ return {
+ "metadata": result.get("metadata", {"title": "Converted Document"}),
+ "documents": [{
+ "id": "doc_1",
+ "title": result.get("metadata", {}).get("title", "Document"),
+ "filename": "document.txt",
+ "sections": result.get("sections", [])
+ }]
+ }
+
+ return result
+
+ def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]:
+ """
+ Get model capabilities for content processing, including appropriate size limits for chunking.
+ """
+ # Estimate total content size
+ prompt_size = len(prompt.encode('utf-8'))
+ document_size = 0
+ if documents:
+ # Rough estimate of document content size
+ for doc in documents:
+ document_size += doc.fileSize or 0
+
+ total_size = prompt_size + document_size
+
+ # Use AiObjects to select the best model for this content size
+ # We'll simulate the model selection by checking available models
+ from modules.interfaces.interfaceAiObjects import aiModels
+
+ # Find the best model for this content size and operation
+ best_model = None
+ best_context_length = 0
+
+ for model_name, model_info in aiModels.items():
+ context_length = model_info.get("contextLength", 0)
+
+ # Skip models with no context length or too small for content
+ if context_length == 0:
+ continue
+
+ # Check if model supports the operation type
+ capabilities = model_info.get("capabilities", [])
+ if options.operationType == OperationType.IMAGE_ANALYSIS and "image_analysis" not in capabilities:
+ continue
+ elif options.operationType == OperationType.IMAGE_GENERATION and "image_generation" not in capabilities:
+ continue
+ elif options.operationType == OperationType.WEB_RESEARCH and "web_search" not in capabilities:
+ continue
+ elif "text_generation" not in capabilities:
+ continue
+
+ # Prefer models that can handle the content without chunking, but allow chunking if needed
+ if context_length >= total_size * 0.8: # 80% of content size
+ if context_length > best_context_length:
+ best_model = model_info
+ best_context_length = context_length
+ elif best_model is None: # Fallback to largest available model
+ if context_length > best_context_length:
+ best_model = model_info
+ best_context_length = context_length
+
+ # Fallback to a reasonable default if no model found
+ if best_model is None:
+ best_model = {
+ "contextLength": 128000, # GPT-4o default
+ "llmName": "gpt-4o"
+ }
+
+ # Calculate appropriate sizes
+ # Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters)
+ context_length_bytes = int(best_model["contextLength"] * 4)
+ max_context_bytes = int(context_length_bytes * 0.9) # 90% of context length
+ text_chunk_size = int(max_context_bytes * 0.7) # 70% of max context for text chunks
+ image_chunk_size = int(max_context_bytes * 0.8) # 80% of max context for image chunks
+
+ logger.debug(f"Selected model: {best_model.get('llmName', 'unknown')} with context length: {best_model['contextLength']}")
+ logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes")
+ logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes")
+
+ return {
+ "maxContextBytes": max_context_bytes,
+ "textChunkSize": text_chunk_size,
+ "imageChunkSize": image_chunk_size
+ }
+
+ def _getModelsForOperation(self, operation_type: str, options: AiCallOptions) -> List[ModelCapabilities]:
+ """
+ Get models capable of handling the specific operation with capability filtering.
+ """
+ # Use the actual AI objects model selection instead of hardcoded default
+ if hasattr(self, 'aiObjects') and self.aiObjects:
+ # Let AiObjects handle the model selection
+ return []
+ else:
+ # Fallback to default model if AiObjects not available
+ default_model = ModelCapabilities(
+ name="default",
+ maxTokens=4000,
+ capabilities=["text", "reasoning"] if operation_type == "planning" else ["text"],
+ costPerToken=0.001,
+ processingTime=1.0,
+ isAvailable=True
+ )
+ return [default_model]
+
+ def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
+ """
+ Build full prompt by replacing placeholders with their content.
+ Uses the new {{KEY:placeholder}} format.
+ """
+ if not placeholders:
+ return prompt
+
+ full_prompt = prompt
+ for placeholder, content in placeholders.items():
+ # Replace both old format {{placeholder}} and new format {{KEY:placeholder}}
+ full_prompt = full_prompt.replace(f"{{{{{placeholder}}}}}", content)
+ full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", content)
+
+ return full_prompt
+
+ def _writeAiResponseDebug(self, label: str, content: str, partIndex: int = 1, modelName: str = None, continuation: bool = None) -> None:
+ """Persist raw AI response parts for debugging under test-chat/ai - only if debug enabled."""
+ try:
+ # Check if debug logging is enabled
+ debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+ if not debug_enabled:
+ return
+
+ import os
+ from datetime import datetime, UTC
+ # Base dir: gateway/test-chat/ai (go up 4 levels from this file)
+ # .../gateway/modules/services/serviceAi/subCoreAi.py -> up to gateway root
+ gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+ outDir = os.path.join(gatewayDir, 'test-chat', 'ai')
+ os.makedirs(outDir, exist_ok=True)
+ ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
+ suffix = []
+ if partIndex is not None:
+ suffix.append(f"part{partIndex}")
+ if continuation is not None:
+ suffix.append(f"cont_{str(continuation).lower()}")
+ if modelName:
+ safeModel = ''.join(c if c.isalnum() or c in ('-', '_') else '-' for c in modelName)
+ suffix.append(safeModel)
+ suffixStr = ('_' + '_'.join(suffix)) if suffix else ''
+ fname = f"{ts}_{label}{suffixStr}.txt"
+ fpath = os.path.join(outDir, fname)
+ with open(fpath, 'w', encoding='utf-8') as f:
+ f.write(content or '')
+ except Exception:
+ # Do not raise; best-effort debug write
+ pass
+
+ def _exceedsTokenLimit(self, text: str, model: ModelCapabilities, safety_margin: float) -> bool:
+ """
+ Check if text exceeds model token limit with safety margin.
+ """
+ # Simple character-based estimation (4 chars per token)
+ estimated_tokens = len(text) // 4
+ max_tokens = int(model.maxTokens * (1 - safety_margin))
+ return estimated_tokens > max_tokens
+
+ def _reducePlanningPrompt(
+ self,
+ full_prompt: str,
+ placeholders: Optional[Dict[str, str]],
+ model: ModelCapabilities,
+ options: AiCallOptions
+ ) -> str:
+ """
+ Reduce planning prompt size by summarizing placeholders while preserving prompt structure.
+ """
+ if not placeholders:
+ return self._reduceText(full_prompt, 0.7)
+
+ # Reduce placeholders while preserving prompt
+ reduced_placeholders = {}
+ for placeholder, content in placeholders.items():
+ if len(content) > 1000: # Only reduce long content
+ reduction_factor = 0.7
+ reduced_content = self._reduceText(content, reduction_factor)
+ reduced_placeholders[placeholder] = reduced_content
+ else:
+ reduced_placeholders[placeholder] = content
+
+ return self._buildPromptWithPlaceholders(full_prompt, reduced_placeholders)
+
+ def _reduceTextPrompt(
+ self,
+ prompt: str,
+ context: str,
+ model: ModelCapabilities,
+ options: AiCallOptions
+ ) -> str:
+ """
+ Reduce text prompt size using typeGroup-aware chunking and merging.
+ """
+ max_size = int(model.maxTokens * (1 - options.safetyMargin))
+
+ if options.compressPrompt:
+ # Reduce both prompt and context
+ target_size = max_size
+ current_size = len(prompt) + len(context)
+ reduction_factor = (target_size * 0.7) / current_size
+
+ if reduction_factor < 1.0:
+ prompt = self._reduceText(prompt, reduction_factor)
+ context = self._reduceText(context, reduction_factor)
+ else:
+ # Only reduce context, preserve prompt integrity
+ max_context_size = max_size - len(prompt)
+ if len(context) > max_context_size:
+ reduction_factor = max_context_size / len(context)
+ context = self._reduceText(context, reduction_factor)
+
+ return prompt + "\n\n" + context if context else prompt
+
+ def _extractTextFromContentParts(self, extracted_content) -> str:
+ """
+ Extract text content from ExtractionService ContentPart objects.
+ """
+ if not extracted_content or not hasattr(extracted_content, 'parts'):
+ return ""
+
+ text_parts = []
+ for part in extracted_content.parts:
+ if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']:
+ if hasattr(part, 'data') and part.data:
+ text_parts.append(part.data)
+
+ return "\n\n".join(text_parts)
+
+ def _reduceText(self, text: str, reduction_factor: float) -> str:
+ """
+ Reduce text size by the specified factor.
+ """
+ if reduction_factor >= 1.0:
+ return text
+
+ target_length = int(len(text) * reduction_factor)
+ return text[:target_length] + "... [reduced]"
diff --git a/modules/services/serviceAi/subDocumentGeneration.py b/modules/services/serviceAi/subDocumentGeneration.py
new file mode 100644
index 00000000..750616e4
--- /dev/null
+++ b/modules/services/serviceAi/subDocumentGeneration.py
@@ -0,0 +1,459 @@
+import logging
+from typing import Dict, Any, List, Optional, Tuple, Union
+from modules.datamodels.datamodelChat import ChatDocument
+from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+
+logger = logging.getLogger(__name__)
+
+
+class SubDocumentGeneration:
+ """Document generation operations including single-file and multi-file generation."""
+
+ def __init__(self, services, aiObjects, documentProcessor):
+ """Initialize document generation service.
+
+ Args:
+ services: Service center instance for accessing other services
+ aiObjects: Initialized AiObjects instance
+ documentProcessor: Document processing service instance
+ """
+ self.services = services
+ self.aiObjects = aiObjects
+ self.documentProcessor = documentProcessor
+
+ async def callAiWithDocumentGeneration(
+ self,
+ prompt: str,
+ documents: Optional[List[ChatDocument]],
+ options: AiCallOptions,
+ outputFormat: str,
+ title: Optional[str]
+ ) -> Dict[str, Any]:
+ """
+ Handle AI calls with document generation in specific output format.
+ Now supports both single-file and multi-file generation.
+
+ Args:
+ prompt: The main prompt for the AI call
+ documents: Optional list of documents to process
+ options: AI call configuration options
+ outputFormat: Target output format (html, pdf, docx, txt, md, json, csv, xlsx)
+ title: Optional title for generated documents
+
+ Returns:
+ Dict with generated documents and metadata
+ """
+ try:
+ # Use AI to analyze prompt intent
+ prompt_analysis = await self._analyzePromptIntent(prompt, self)
+ logger.info(f"Prompt analysis result: {prompt_analysis}")
+
+ if prompt_analysis.get("is_multi_file", False):
+ return await self._callAiWithMultiFileGeneration(
+ prompt, documents, options, outputFormat, title, prompt_analysis
+ )
+ else:
+ return await self._callAiWithSingleFileGeneration(
+ prompt, documents, options, outputFormat, title
+ )
+
+ except Exception as e:
+ logger.error(f"Error in document generation: {str(e)}")
+ return {
+ "success": False,
+ "error": str(e),
+ "content": "",
+ "rendered_content": "",
+ "mime_type": "text/plain",
+ "filename": f"error_{outputFormat}",
+ "format": outputFormat,
+ "title": title or "Error",
+ "documents": []
+ }
+
+ async def _callAiWithSingleFileGeneration(
+ self,
+ prompt: str,
+ documents: Optional[List[ChatDocument]],
+ options: AiCallOptions,
+ outputFormat: str,
+ title: Optional[str]
+ ) -> Dict[str, Any]:
+ """Handle single-file document generation (existing functionality)."""
+ try:
+ # Get format-specific extraction prompt from generation service
+ from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
+ generation_service = GenerationService(self.services)
+
+ # Use default title if not provided
+ if not title:
+ title = "AI Generated Document"
+
+ # Get format-specific extraction prompt
+ extractionPrompt = await generation_service.getExtractionPrompt(
+ outputFormat=outputFormat,
+ userPrompt=prompt,
+ title=title,
+ aiService=self
+ )
+
+ # Process documents with format-specific prompt using JSON mode
+ # This ensures structured JSON output instead of text
+ aiResponseJson = await self._callAiJson(extractionPrompt, documents, options)
+
+ # Validate JSON response
+ if not isinstance(aiResponseJson, dict) or "sections" not in aiResponseJson:
+ raise Exception("AI response is not valid JSON document structure")
+
+ # Generate filename from document metadata
+ parsedFilename = None
+ try:
+ if aiResponseJson.get("metadata", {}).get("title"):
+ title = aiResponseJson["metadata"]["title"]
+ # Clean title for filename
+ import re
+ parsed = re.sub(r"[^a-zA-Z0-9._-]", "-", title)
+ parsed = re.sub(r"-+", "-", parsed).strip('-')
+ if parsed:
+ parsedFilename = f"{parsed}.{outputFormat}"
+ except Exception:
+ parsedFilename = None
+
+ # Render the JSON content to the specified format
+ renderedContent, mimeType = await generation_service.renderReport(
+ extractedContent=aiResponseJson,
+ outputFormat=outputFormat,
+ title=title,
+ userPrompt=prompt,
+ aiService=self
+ )
+
+ # Generate meaningful filename (use AI-provided if valid, else fallback)
+ from datetime import datetime, UTC
+ timestamp = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+ if parsedFilename and parsedFilename.lower().endswith(f".{outputFormat.lower()}"):
+ filename = parsedFilename
+ else:
+ safeTitle = ''.join(c if c.isalnum() else '-' for c in (title or 'document')).strip('-')
+ filename = f"{safeTitle or 'document'}-{timestamp}.{outputFormat}"
+
+ # Return structured result with document information
+ return {
+ "success": True,
+ "content": aiResponseJson, # Structured JSON document
+ "rendered_content": renderedContent, # Formatted content
+ "mime_type": mimeType,
+ "filename": filename,
+ "format": outputFormat,
+ "title": title,
+ "documents": [{
+ "documentName": filename,
+ "documentData": renderedContent,
+ "mimeType": mimeType
+ }],
+ "is_multi_file": False
+ }
+
+ except Exception as e:
+ logger.error(f"Error in single-file document generation: {str(e)}")
+ raise
+
+ async def _callAiWithMultiFileGeneration(
+ self,
+ prompt: str,
+ documents: Optional[List[ChatDocument]],
+ options: AiCallOptions,
+ outputFormat: str,
+ title: Optional[str],
+ prompt_analysis: Dict[str, Any]
+ ) -> Dict[str, Any]:
+ """Handle multi-file document generation using AI analysis."""
+ try:
+ # Get multi-file extraction prompt based on AI analysis
+ from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
+ generation_service = GenerationService(self.services)
+
+ # Use default title if not provided
+ if not title:
+ title = "AI Generated Documents"
+
+ # Get adaptive extraction prompt
+ extraction_prompt = await generation_service.getAdaptiveExtractionPrompt(
+ outputFormat=outputFormat,
+ userPrompt=prompt,
+ title=title,
+ promptAnalysis=prompt_analysis,
+ aiService=self
+ )
+
+ logger.info(f"Adaptive extraction prompt length: {len(extraction_prompt)} characters")
+ logger.debug(f"Adaptive extraction prompt preview: {extraction_prompt[:500]}...")
+
+ # Process with adaptive JSON schema - use the existing pipeline but with adaptive prompt
+ logger.info(f"Using adaptive prompt with existing pipeline: {len(extraction_prompt)} chars")
+ logger.debug(f"Processing documents: {len(documents) if documents else 0} documents")
+
+ # Use the existing pipeline but replace the prompt with our adaptive one
+ # This ensures proper document processing while using the multi-file prompt
+ ai_response = await self.documentProcessor.processDocumentsPerChunkJsonWithPrompt(documents, extraction_prompt, options)
+
+ logger.info(f"AI response type: {type(ai_response)}")
+ logger.info(f"AI response keys: {list(ai_response.keys()) if isinstance(ai_response, dict) else 'Not a dict'}")
+ logger.debug(f"AI response preview: {str(ai_response)[:500]}...")
+
+ # Validate response structure
+ if not self._validateResponseStructure(ai_response, prompt_analysis):
+ # Fallback to single-file if multi-file fails
+ logger.warning(f"Multi-file processing failed - Invalid response structure. Expected multi-file but got: {list(ai_response.keys()) if isinstance(ai_response, dict) else type(ai_response)}")
+ logger.warning(f"Prompt analysis: {prompt_analysis}")
+ logger.warning("Falling back to single-file generation")
+ return await self._callAiWithSingleFileGeneration(
+ prompt, documents, options, outputFormat, title
+ )
+
+ # Process multiple documents
+ generated_documents = []
+ for i, doc_data in enumerate(ai_response.get("documents", [])):
+ # Transform AI-generated sections to renderer-compatible format
+ transformed_sections = []
+ for section in doc_data.get("sections", []):
+ # Convert AI format to renderer format
+ transformed_section = {
+ "id": section.get("id", f"section_{len(transformed_sections) + 1}"),
+ "type": section.get("content_type", "paragraph"),
+ "data": {
+ "text": "",
+ "elements": section.get("elements", [])
+ },
+ "order": section.get("order", len(transformed_sections) + 1)
+ }
+
+ # Extract text from elements for simple text-based sections
+ if section.get("content_type") in ["paragraph", "heading"]:
+ text_parts = []
+ for element in section.get("elements", []):
+ if "text" in element:
+ text_parts.append(element["text"])
+ transformed_section["data"]["text"] = "\n".join(text_parts)
+
+ transformed_sections.append(transformed_section)
+
+ # Create complete document structure for rendering
+ complete_document = {
+ "metadata": {
+ "title": doc_data["title"],
+ "source_document": "multi_file_generation",
+ "document_id": doc_data.get("id", f"doc_{i+1}"),
+ "filename": doc_data.get("filename", f"document_{i+1}"),
+ "split_strategy": prompt_analysis.get("strategy", "custom")
+ },
+ "sections": transformed_sections,
+ "summary": f"Generated document: {doc_data['title']}",
+ "tags": ["multi_file", "ai_generated"]
+ }
+
+ rendered_content, mime_type = await generation_service.renderReport(
+ extractedContent=complete_document,
+ outputFormat=outputFormat,
+ title=doc_data["title"],
+ userPrompt=prompt,
+ aiService=self
+ )
+
+ # Generate proper filename with correct extension
+ base_filename = doc_data.get("filename", f"document_{i+1}")
+ # Remove any existing extension and add the correct one
+ if '.' in base_filename:
+ base_filename = base_filename.rsplit('.', 1)[0]
+
+ # Add proper extension based on output format
+ if outputFormat.lower() == "docx":
+ filename = f"{base_filename}.docx"
+ elif outputFormat.lower() == "pdf":
+ filename = f"{base_filename}.pdf"
+ elif outputFormat.lower() == "html":
+ filename = f"{base_filename}.html"
+ else:
+ filename = f"{base_filename}.{outputFormat}"
+
+ generated_documents.append({
+ "documentName": filename,
+ "documentData": rendered_content,
+ "mimeType": mime_type
+ })
+
+ # Save debug files for multi-file generation - only if debug enabled
+ debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+ if debug_enabled:
+ try:
+ import os
+ from datetime import datetime, UTC
+ ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+ debug_root = "./test-chat/ai"
+ debug_dir = os.path.join(debug_root, f"multifile_output_{ts}")
+ os.makedirs(debug_dir, exist_ok=True)
+
+ # Save metadata
+ with open(os.path.join(debug_dir, "metadata.txt"), "w", encoding="utf-8") as f:
+ f.write(f"title: {title}\n")
+ f.write(f"format: {outputFormat}\n")
+ f.write(f"documents_count: {len(generated_documents)}\n")
+ f.write(f"split_strategy: {prompt_analysis.get('strategy', 'custom')}\n")
+ f.write(f"prompt_analysis: {prompt_analysis}\n")
+
+ # Save each generated document
+ for i, doc in enumerate(generated_documents):
+ doc_filename = doc["documentName"]
+ doc_data = doc["documentData"]
+ doc_mime = doc["mimeType"]
+
+ # Determine file extension
+ if outputFormat.lower() == "docx":
+ file_ext = ".docx"
+ elif outputFormat.lower() == "pdf":
+ file_ext = ".pdf"
+ elif outputFormat.lower() == "html":
+ file_ext = ".html"
+ else:
+ file_ext = f".{outputFormat}"
+
+ # Save the rendered document
+ output_path = os.path.join(debug_dir, f"document_{i+1}_{doc_filename}")
+
+ if file_ext in ['.md', '.txt', '.html', '.json', '.csv']:
+ # Text-based formats
+ with open(output_path, 'w', encoding='utf-8') as f:
+ f.write(doc_data)
+ else:
+ # Binary formats - decode from base64 if needed
+ try:
+ import base64
+ doc_bytes = base64.b64decode(doc_data)
+ with open(output_path, 'wb') as f:
+ f.write(doc_bytes)
+ except Exception:
+ # If not base64, save as text
+ with open(output_path, 'w', encoding='utf-8') as f:
+ f.write(doc_data)
+
+ logger.info(f"💾 Debug: Saved multi-file document {i+1}: {output_path}")
+
+ logger.info(f"💾 Debug: Multi-file output saved to: {debug_dir}")
+
+ except Exception as e:
+ logger.warning(f"Failed to save multi-file debug output: {e}")
+
+ return {
+ "success": True,
+ "content": ai_response,
+ "rendered_content": None, # Not applicable for multi-file
+ "mime_type": None, # Not applicable for multi-file
+ "filename": None, # Not applicable for multi-file
+ "format": outputFormat,
+ "title": title,
+ "documents": generated_documents,
+ "is_multi_file": True,
+ "split_strategy": prompt_analysis.get("strategy", "custom")
+ }
+
+ except Exception as e:
+ logger.error(f"Error in multi-file document generation: {str(e)}")
+ # Fallback to single-file
+ return await self._callAiWithSingleFileGeneration(
+ prompt, documents, options, outputFormat, title
+ )
+
+ async def _callAiJson(
+ self,
+ prompt: str,
+ documents: Optional[List[ChatDocument]],
+ options: AiCallOptions
+ ) -> Dict[str, Any]:
+ """
+ Handle AI calls with document processing for JSON output.
+ Returns structured JSON document instead of text.
+ """
+ # Process documents with JSON merging
+ return await self.documentProcessor.processDocumentsPerChunkJson(documents, prompt, options)
+
+ async def _analyzePromptIntent(self, prompt: str, ai_service=None) -> Dict[str, Any]:
+ """Use AI to analyze user prompt and determine processing requirements."""
+ if not ai_service:
+ return {"is_multi_file": False, "strategy": "single", "criteria": None}
+
+ try:
+ analysis_prompt = f"""
+Analyze this user request and determine if it requires multiple file output or single file output.
+
+User request: "{prompt}"
+
+Respond with JSON only in this exact format:
+{{
+ "is_multi_file": true/false,
+ "strategy": "single|per_entity|by_section|by_criteria|custom",
+ "criteria": "description of how to split content",
+ "file_naming_pattern": "suggested pattern for filenames",
+ "reasoning": "brief explanation of the analysis"
+}}
+
+Consider:
+- Does the user want separate files for different entities (customers, products, etc.)?
+- Does the user want to split content into multiple documents?
+- What would be the most logical way to organize the content?
+- What language is the request in? (analyze in the original language)
+
+Return only the JSON response.
+"""
+
+ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+ request_options = AiCallOptions()
+ request_options.operationType = OperationType.GENERAL
+
+ request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
+ response = await ai_service.aiObjects.call(request)
+
+ if response and response.content:
+ import json
+ import re
+
+ # Extract JSON from response
+ result = response.content.strip()
+ json_match = re.search(r'\{.*\}', result, re.DOTALL)
+ if json_match:
+ result = json_match.group(0)
+
+ analysis = json.loads(result)
+ return analysis
+ else:
+ return {"is_multi_file": False, "strategy": "single", "criteria": None}
+
+ except Exception as e:
+ logger.warning(f"AI prompt analysis failed: {str(e)}, defaulting to single file")
+ return {"is_multi_file": False, "strategy": "single", "criteria": None}
+
+ def _validateResponseStructure(self, response: Dict[str, Any], prompt_analysis: Dict[str, Any]) -> bool:
+ """Validate that AI response matches the expected structure."""
+ try:
+ if not isinstance(response, dict):
+ logger.warning(f"Response validation failed: Response is not a dict, got {type(response)}")
+ return False
+
+ # Check for multi-file structure
+ if prompt_analysis.get("is_multi_file", False):
+ has_documents = "documents" in response
+ is_documents_list = isinstance(response.get("documents"), list)
+ logger.info(f"Multi-file validation: has_documents={has_documents}, is_documents_list={is_documents_list}")
+ if has_documents and is_documents_list:
+ logger.info(f"Multi-file validation passed: {len(response['documents'])} documents found")
+ else:
+ logger.warning(f"Multi-file validation failed: documents key present={has_documents}, documents is list={is_documents_list}")
+ logger.warning(f"Available keys: {list(response.keys())}")
+ return has_documents and is_documents_list
+ else:
+ has_sections = "sections" in response
+ is_sections_list = isinstance(response.get("sections"), list)
+ logger.info(f"Single-file validation: has_sections={has_sections}, is_sections_list={is_sections_list}")
+ return has_sections and is_sections_list
+ except Exception as e:
+ logger.warning(f"Response validation failed with exception: {str(e)}")
+ return False
diff --git a/modules/services/serviceAi/subDocumentProcessing.py b/modules/services/serviceAi/subDocumentProcessing.py
new file mode 100644
index 00000000..e9e087d2
--- /dev/null
+++ b/modules/services/serviceAi/subDocumentProcessing.py
@@ -0,0 +1,1042 @@
+import logging
+from typing import Dict, Any, List, Optional, Tuple, Union
+from modules.datamodels.datamodelChat import ChatDocument
+from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, ModelCapabilities, OperationType, Priority
+from modules.datamodels.datamodelExtraction import ChunkResult, ContentExtracted
+from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService
+
+logger = logging.getLogger(__name__)
+
+
+class SubDocumentProcessing:
+ """Document processing operations including chunking, processing, and merging."""
+
+ def __init__(self, services, aiObjects):
+ """Initialize document processing service.
+
+ Args:
+ services: Service center instance for accessing other services
+ aiObjects: Initialized AiObjects instance
+ """
+ self.services = services
+ self.aiObjects = aiObjects
+ self._extractionService = None
+
+ @property
+ def extractionService(self):
+ """Lazy initialization of extraction service."""
+ if self._extractionService is None:
+ logger.info("Lazy initializing ExtractionService...")
+ self._extractionService = ExtractionService(self.services)
+ return self._extractionService
+
+ def _calculateMaxContextBytes(self, options: Optional[AiCallOptions]) -> int:
+ """Calculate maximum context bytes based on model capabilities and options."""
+ if options and options.maxContextBytes:
+ return options.maxContextBytes
+
+ # Default model capabilities (this should be enhanced with actual model registry)
+ defaultMaxTokens = 4000
+ safetyMargin = options.safetyMargin if options else 0.1
+
+ # Calculate bytes (4 chars per token estimation)
+ maxContextBytes = int(defaultMaxTokens * (1 - safetyMargin) * 4)
+
+ return maxContextBytes
+
+ async def processDocumentsPerChunk(
+ self,
+ documents: List[ChatDocument],
+ prompt: str,
+ options: Optional[AiCallOptions] = None
+ ) -> str:
+ """
+ Process documents with per-chunk AI calls and merge results.
+ FIXED: Now preserves chunk relationships and document structure.
+
+ Args:
+ documents: List of ChatDocument objects to process
+ prompt: AI prompt for processing
+ options: AI call options
+
+ Returns:
+ Merged AI results as string with preserved document structure
+ """
+ if not documents:
+ return ""
+
+ # Get model capabilities for size calculation
+ model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options)
+
+ # Build extraction options for chunking with intelligent merging
+ extractionOptions: Dict[str, Any] = {
+ "prompt": prompt,
+ "operationType": options.operationType if options else "general",
+ "processDocumentsIndividually": True, # Process each document separately
+ "maxSize": model_capabilities["maxContextBytes"],
+ "chunkAllowed": True,
+ "textChunkSize": model_capabilities["textChunkSize"],
+ "imageChunkSize": model_capabilities["imageChunkSize"],
+ "imageMaxPixels": 1024 * 1024,
+ "imageQuality": 85,
+ "mergeStrategy": {
+ "useIntelligentMerging": True, # Enable intelligent token-aware merging
+ "modelCapabilities": model_capabilities,
+ "prompt": prompt,
+ "groupBy": "typeGroup",
+ "orderBy": "id",
+ "mergeType": "concatenate"
+ },
+ }
+
+ logger.debug(f"Per-chunk extraction options: prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
+
+ try:
+ # Extract content with chunking
+ extractionResult = self.extractionService.extractContent(documents, extractionOptions)
+
+ if not isinstance(extractionResult, list):
+ return "[Error: No extraction results]"
+
+ # FIXED: Process chunks with proper mapping
+ chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options)
+
+ # FIXED: Merge with preserved chunk relationships
+ mergedContent = self._mergeChunkResults(chunkResults, options)
+
+ return mergedContent
+
+ except Exception as e:
+ logger.error(f"Error in per-chunk processing: {str(e)}")
+ return f"[Error in per-chunk processing: {str(e)}]"
+
+ async def processDocumentsPerChunkJson(
+ self,
+ documents: List[ChatDocument],
+ prompt: str,
+ options: Optional[AiCallOptions] = None
+ ) -> Dict[str, Any]:
+ """
+ Process documents with per-chunk AI calls and merge results in JSON mode.
+ Returns structured JSON document instead of text.
+ """
+ if not documents:
+ return {"metadata": {"title": "Empty Document"}, "sections": []}
+
+ # Get model capabilities for size calculation
+ model_capabilities = self._getModelCapabilitiesForContent(prompt, documents, options)
+
+ # Build extraction options for chunking with intelligent merging
+ extractionOptions: Dict[str, Any] = {
+ "prompt": prompt,
+ "operationType": options.operationType if options else "general",
+ "processDocumentsIndividually": True, # Process each document separately
+ "maxSize": model_capabilities["maxContextBytes"],
+ "chunkAllowed": True,
+ "textChunkSize": model_capabilities["textChunkSize"],
+ "imageChunkSize": model_capabilities["imageChunkSize"],
+ "imageMaxPixels": 1024 * 1024,
+ "imageQuality": 85,
+ "mergeStrategy": {
+ "useIntelligentMerging": True, # Enable intelligent token-aware merging
+ "modelCapabilities": model_capabilities,
+ "prompt": prompt,
+ "groupBy": "typeGroup",
+ "orderBy": "id",
+ "mergeType": "concatenate"
+ },
+ }
+
+ logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
+
+ try:
+ # Extract content with chunking
+ extractionResult = self.extractionService.extractContent(documents, extractionOptions)
+
+ if not isinstance(extractionResult, list):
+ return {"metadata": {"title": "Error Document"}, "sections": []}
+
+ # Process chunks with proper mapping
+ chunkResults = await self._processChunksWithMapping(extractionResult, prompt, options, generate_json=True)
+
+ # Merge with JSON mode
+ mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options)
+
+ return mergedJsonDocument
+
+ except Exception as e:
+ logger.error(f"Error in per-chunk processing (JSON mode): {str(e)}")
+ return {"metadata": {"title": "Error Document"}, "sections": []}
+
+ async def processDocumentsPerChunkJsonWithPrompt(
+ self,
+ documents: List[ChatDocument],
+ custom_prompt: str,
+ options: Optional[AiCallOptions] = None
+ ) -> Dict[str, Any]:
+ """
+ Process documents with per-chunk AI calls and merge results in JSON mode.
+ Uses a custom prompt instead of the default extraction prompt.
+ """
+ if not documents:
+ return {"metadata": {"title": "Empty Document"}, "sections": []}
+
+ # Get model capabilities for size calculation
+ model_capabilities = self._getModelCapabilitiesForContent(custom_prompt, documents, options)
+
+ # Build extraction options for chunking with intelligent merging
+ extractionOptions: Dict[str, Any] = {
+ "prompt": custom_prompt, # Use the custom prompt instead of default
+ "operationType": options.operationType if options else "general",
+ "processDocumentsIndividually": True, # Process each document separately
+ "maxSize": model_capabilities["maxContextBytes"],
+ "chunkAllowed": True,
+ "textChunkSize": model_capabilities["textChunkSize"],
+ "imageChunkSize": model_capabilities["imageChunkSize"],
+ "imageMaxPixels": 1024 * 1024,
+ "imageQuality": 85,
+ "mergeStrategy": {
+ "useIntelligentMerging": True, # Enable intelligent token-aware merging
+ "modelCapabilities": model_capabilities,
+ "prompt": custom_prompt, # Use the custom prompt
+ "groupBy": "typeGroup",
+ "orderBy": "id",
+ "mergeType": "concatenate"
+ },
+ }
+
+ logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
+
+ try:
+ # Extract content with chunking
+ extractionResult = self.extractionService.extractContent(documents, extractionOptions)
+
+ if not isinstance(extractionResult, list):
+ return {"metadata": {"title": "Error Document"}, "sections": []}
+
+ # Process chunks with proper mapping
+ logger.info(f"Processing {len(extractionResult)} chunks with custom prompt")
+ logger.debug(f"Custom prompt preview: {custom_prompt[:200]}...")
+
+ # Debug: Show what content is being processed (before filtering)
+ for i, ec in enumerate(extractionResult):
+ if hasattr(ec, 'parts'):
+ for j, part in enumerate(ec.parts):
+ if not (hasattr(part, 'data') and part.data):
+ # Check if this is an empty container chunk (which is expected)
+ part_type = getattr(part, 'typeGroup', None)
+ part_mime = getattr(part, 'mimeType', '')
+
+ is_empty_container = (
+ part_type == "container" and
+ part_mime and
+ 'document' in part_mime.lower()
+ )
+
+ if not is_empty_container:
+ logger.warning(f"Part {j} has no data - typeGroup='{part_type}', mimeType='{part_mime}'")
+
+ chunkResults = await self._processChunksWithMapping(extractionResult, custom_prompt, options, generate_json=True)
+
+ # Debug: Show what chunks were actually processed (after filtering)
+ logger.info(f"After filtering: {len(chunkResults)} chunks will be processed")
+
+ # Merge with JSON mode
+ mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options)
+
+ # Debug: Show what the AI actually returned
+ logger.info(f"AI returned document with keys: {list(mergedJsonDocument.keys())}")
+ if 'documents' in mergedJsonDocument:
+ logger.info(f"Number of documents: {len(mergedJsonDocument['documents'])}")
+ elif 'sections' in mergedJsonDocument:
+ logger.info(f"Number of sections: {len(mergedJsonDocument['sections'])}")
+
+ return mergedJsonDocument
+
+ except Exception as e:
+ logger.error(f"Error in per-chunk JSON processing: {str(e)}")
+ return {"metadata": {"title": "Error Document"}, "sections": []}
+
+ async def callAiText(
+ self,
+ prompt: str,
+ documents: Optional[List[ChatDocument]],
+ options: AiCallOptions
+ ) -> str:
+ """
+ Handle text calls with document processing through ExtractionService.
+ UNIFIED PROCESSING: Always use per-chunk processing for consistency.
+ """
+ # UNIFIED PROCESSING: Always use per-chunk processing for consistency
+ # This ensures MIME-type checking, chunk mapping, and parallel processing
+ return await self.processDocumentsPerChunk(documents, prompt, options)
+
+ async def _processChunksWithMapping(
+ self,
+ extractionResult: List[ContentExtracted],
+ prompt: str,
+ options: Optional[AiCallOptions] = None,
+ generate_json: bool = False
+ ) -> List[ChunkResult]:
+ """Process chunks with proper mapping to preserve relationships."""
+ from modules.datamodels.datamodelExtraction import ChunkResult
+ import asyncio
+ import time
+
+ # Collect all chunks that need processing with proper indexing
+ chunks_to_process = []
+ chunk_index = 0
+
+ for ec in extractionResult:
+ # Get document MIME type from metadata
+ document_mime_type = None
+ for part in ec.parts:
+ if part.metadata and 'documentMimeType' in part.metadata:
+ document_mime_type = part.metadata['documentMimeType']
+ break
+
+ for part in ec.parts:
+ if part.typeGroup in ("text", "table", "structure", "image", "container", "binary"):
+ # Skip empty container chunks (they're just metadata containers)
+ if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0):
+ logger.debug(f"Skipping empty container chunk: mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}")
+ continue
+
+ chunks_to_process.append({
+ 'part': part,
+ 'chunk_index': chunk_index,
+ 'document_id': ec.id,
+ 'document_mime_type': document_mime_type
+ })
+ chunk_index += 1
+
+ logger.info(f"Processing {len(chunks_to_process)} chunks with proper mapping")
+
+ # Process chunks in parallel with proper mapping
+ async def process_single_chunk(chunk_info: Dict) -> ChunkResult:
+ part = chunk_info['part']
+ chunk_index = chunk_info['chunk_index']
+ document_id = chunk_info['document_id']
+ document_mime_type = chunk_info.get('document_mime_type', part.mimeType)
+
+ start_time = time.time()
+
+ try:
+ # FIXED: Check MIME type first, then fallback to typeGroup
+ is_image = (
+ (document_mime_type and document_mime_type.startswith('image/')) or
+ (part.mimeType and part.mimeType.startswith('image/')) or
+ (part.typeGroup == "image")
+ )
+
+ # Debug logging
+ self.services.utils.debugLogToFile(f"Chunk {chunk_index}: document_mime_type={document_mime_type}, part.mimeType={part.mimeType}, part.typeGroup={part.typeGroup}, is_image={is_image}", "AI_SERVICE")
+ logger.info(f"Chunk {chunk_index}: document_mime_type={document_mime_type}, part.mimeType={part.mimeType}, part.typeGroup={part.typeGroup}, is_image={is_image}")
+
+ if is_image:
+ # Use the same extraction prompt for image analysis (contains table JSON format)
+ self.services.utils.debugLogToFile(f"Processing image chunk {chunk_index}: mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE")
+
+ # Check if image data is available
+ if not part.data:
+ error_msg = f"No image data available for chunk {chunk_index}"
+ logger.warning(error_msg)
+ ai_result = f"Error: {error_msg}"
+ else:
+ try:
+ # Import here to avoid circular imports
+ from modules.services.serviceAi.subCoreAi import SubCoreAi
+ core_ai = SubCoreAi(self.services, self.aiObjects)
+
+ ai_result = await core_ai.readImage(
+ prompt=prompt,
+ imageData=part.data,
+ mimeType=part.mimeType,
+ options=options
+ )
+
+ self.services.utils.debugLogToFile(f"Image analysis result for chunk {chunk_index}: length={len(ai_result) if ai_result else 0}, preview={ai_result[:200] if ai_result else 'None'}...", "AI_SERVICE")
+
+ # Check if result is empty or None
+ if not ai_result or not ai_result.strip():
+ logger.warning(f"Image chunk {chunk_index} returned empty response from AI")
+ ai_result = "No content detected in image"
+
+ except Exception as e:
+ logger.error(f"Error processing image chunk {chunk_index}: {str(e)}")
+ ai_result = f"Error analyzing image: {str(e)}"
+
+ # If generating JSON, clean image analysis result
+ if generate_json:
+ try:
+ import json
+ import re
+
+ # Clean the response - remove markdown code blocks if present
+ cleaned_result = ai_result.strip()
+
+ # Remove various markdown patterns
+ if cleaned_result.startswith('```json'):
+ cleaned_result = re.sub(r'^```json\s*', '', cleaned_result)
+ cleaned_result = re.sub(r'\s*```$', '', cleaned_result)
+ elif cleaned_result.startswith('```'):
+ cleaned_result = re.sub(r'^```\s*', '', cleaned_result)
+ cleaned_result = re.sub(r'\s*```$', '', cleaned_result)
+
+ # Remove any leading/trailing text that's not JSON
+ # Look for the first { and last } to extract JSON
+ first_brace = cleaned_result.find('{')
+ last_brace = cleaned_result.rfind('}')
+
+ if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
+ cleaned_result = cleaned_result[first_brace:last_brace + 1]
+
+ # Additional cleaning for common AI response issues
+ cleaned_result = cleaned_result.strip()
+
+ # Validate JSON
+ json.loads(cleaned_result)
+ ai_result = cleaned_result # Use cleaned version
+ self.services.utils.debugLogToFile(f"Image chunk {chunk_index} JSON validation successful", "AI_SERVICE")
+
+ except json.JSONDecodeError as e:
+ logger.warning(f"Image chunk {chunk_index} returned invalid JSON: {str(e)}")
+ logger.warning(f"Raw response was: '{ai_result[:500]}...'")
+
+ # Create fallback JSON with the actual response content (not the error message)
+ # Use the original AI response content, not the error message
+ fallback_content = ai_result if ai_result and ai_result.strip() else "No content detected"
+
+ self.services.utils.debugLogToFile(f"IMAGE FALLBACK CONTENT PREVIEW: '{fallback_content[:200]}...'", "AI_SERVICE")
+
+ ai_result = json.dumps({
+ "metadata": {"title": f"Image Analysis - Chunk {chunk_index}"},
+ "sections": [{
+ "id": f"image_section_{chunk_index}",
+ "type": "paragraph",
+ "data": {"text": fallback_content}
+ }]
+ })
+ self.services.utils.debugLogToFile(f"Created fallback JSON for image chunk {chunk_index} with actual content", "AI_SERVICE")
+ elif part.typeGroup in ("container", "binary"):
+ # Handle ALL container and binary content generically - let AI process any document type
+ self.services.utils.debugLogToFile(f"DEBUG: Chunk {chunk_index}: typeGroup={part.typeGroup}, mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE")
+
+ # Skip empty container chunks (they're just metadata containers)
+ if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0):
+ self.services.utils.debugLogToFile(f"DEBUG: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE")
+ logger.info(f"Chunk {chunk_index}: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}")
+ # Skip processing this chunk
+ pass
+ elif part.mimeType and part.data and len(part.data.strip()) > 0:
+ # Process any document container as text content
+ request_options = options if options is not None else AiCallOptions()
+ request_options.operationType = OperationType.GENERAL
+ self.services.utils.debugLogToFile(f"EXTRACTION CONTAINER CHUNK {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}", "AI_SERVICE")
+ logger.info(f"Chunk {chunk_index}: Processing {part.mimeType} container as text with generate_json={generate_json}")
+
+ # Log extraction prompt and context
+ self.services.utils.debugLogToFile(f"EXTRACTION PROMPT: {prompt}", "AI_SERVICE")
+ self.services.utils.debugLogToFile(f"EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters", "AI_SERVICE")
+
+ request = AiCallRequest(
+ prompt=prompt,
+ context=part.data,
+ options=request_options
+ )
+ response = await self.aiObjects.call(request)
+ ai_result = response.content
+
+ # Log extraction response
+ self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE")
+
+ # Save full extraction prompt and response to debug file - only if debug enabled
+ debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+ if debug_enabled:
+ try:
+ import os
+ from datetime import datetime, UTC
+ ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+ debug_root = "./test-chat/ai"
+ os.makedirs(debug_root, exist_ok=True)
+ with open(os.path.join(debug_root, f"{ts}_extraction_container_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f:
+ f.write(f"EXTRACTION PROMPT:\n{prompt}\n\n")
+ f.write(f"EXTRACTION CONTEXT:\n{part.data if part.data else 'No context'}\n\n")
+ f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n")
+ except Exception:
+ pass
+
+ # If generating JSON, validate the response
+ if generate_json:
+ try:
+ import json
+ import re
+
+ # Clean the response - remove markdown code blocks if present
+ cleaned_result = ai_result.strip()
+
+ # Remove various markdown patterns
+ if cleaned_result.startswith('```json'):
+ cleaned_result = re.sub(r'^```json\s*', '', cleaned_result)
+ cleaned_result = re.sub(r'\s*```$', '', cleaned_result)
+ elif cleaned_result.startswith('```'):
+ cleaned_result = re.sub(r'^```\s*', '', cleaned_result)
+ cleaned_result = re.sub(r'\s*```$', '', cleaned_result)
+
+ # Remove any leading/trailing text that's not JSON
+ # Look for the first { and last } to extract JSON
+ first_brace = cleaned_result.find('{')
+ last_brace = cleaned_result.rfind('}')
+
+ if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
+ cleaned_result = cleaned_result[first_brace:last_brace + 1]
+
+ # Additional cleaning for common AI response issues
+ cleaned_result = cleaned_result.strip()
+
+ # Validate JSON
+ json.loads(cleaned_result)
+ ai_result = cleaned_result # Use cleaned version
+
+ except json.JSONDecodeError as e:
+ logger.warning(f"Container chunk {chunk_index} ({part.mimeType}) returned invalid JSON: {str(e)}")
+ logger.warning(f"Raw response was: '{ai_result[:500]}...'")
+
+ # Create fallback JSON with the actual response content (not the error message)
+ # Use the original AI response content, not the error message
+ fallback_content = ai_result if ai_result and ai_result.strip() else "No content detected"
+
+ self.services.utils.debugLogToFile(f"FALLBACK CONTENT PREVIEW: '{fallback_content[:200]}...'", "AI_SERVICE")
+
+ ai_result = json.dumps({
+ "metadata": {"title": f"Document Analysis - Chunk {chunk_index}"},
+ "sections": [{
+ "id": f"analysis_section_{chunk_index}",
+ "type": "paragraph",
+ "data": {"text": fallback_content}
+ }]
+ })
+ self.services.utils.debugLogToFile(f"Created fallback JSON for container chunk {chunk_index} with actual content", "AI_SERVICE")
+ else:
+ # Skip empty or invalid container/binary content - don't create a result
+ self.services.utils.debugLogToFile(f"DEBUG: Chunk {chunk_index}: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE")
+ # Return None to indicate this chunk should be completely skipped
+ return None
+ else:
+ # Ensure options is not None and set correct operation type for text
+ request_options = options if options is not None else AiCallOptions()
+ # FIXED: Set operation type to general for text processing
+ request_options.operationType = OperationType.GENERAL
+ self.services.utils.debugLogToFile(f"EXTRACTION CHUNK {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}", "AI_SERVICE")
+ logger.info(f"Chunk {chunk_index}: Calling aiObjects.call with operationType={request_options.operationType}, generate_json={generate_json}")
+
+ # Log extraction context length
+ self.services.utils.debugLogToFile(f"EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters", "AI_SERVICE")
+
+ # Debug: Log the actual prompt being sent to AI
+ logger.debug(f"AI PROMPT PREVIEW: {prompt[:300]}...")
+ logger.debug(f"AI CONTEXT PREVIEW: {part.data[:200] if part.data else 'None'}...")
+
+ request = AiCallRequest(
+ prompt=prompt,
+ context=part.data,
+ options=request_options
+ )
+ response = await self.aiObjects.call(request)
+
+ # Debug: Log what AI actually returned
+ logger.debug(f"AI RESPONSE PREVIEW: {response.content[:300] if response.content else 'None'}...")
+ ai_result = response.content
+
+ # Log extraction response length
+ self.services.utils.debugLogToFile(f"EXTRACTION RESPONSE LENGTH: {len(ai_result) if ai_result else 0} characters", "AI_SERVICE")
+
+ # Save extraction response to debug file (without verbose prompt) - only if debug enabled
+ debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+ if debug_enabled:
+ try:
+ import os
+ from datetime import datetime, UTC
+ ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+ debug_root = "./test-chat/ai"
+ os.makedirs(debug_root, exist_ok=True)
+ with open(os.path.join(debug_root, f"{ts}_extraction_chunk_{chunk_index}.txt"), "w", encoding="utf-8") as f:
+ f.write(f"EXTRACTION RESPONSE:\n{ai_result if ai_result else 'No response'}\n")
+ except Exception:
+ pass
+
+ # If generating JSON, validate the response
+ if generate_json:
+ try:
+ import json
+ import re
+
+ # Clean the response - remove markdown code blocks and extra formatting
+ cleaned_result = ai_result.strip()
+
+ # Remove any markdown code block markers (```json, ```, etc.)
+ cleaned_result = re.sub(r'^```(?:json)?\s*', '', cleaned_result, flags=re.MULTILINE)
+ cleaned_result = re.sub(r'\s*```\s*$', '', cleaned_result, flags=re.MULTILINE)
+
+ # Remove any remaining ``` markers anywhere in the text
+ cleaned_result = re.sub(r'```', '', cleaned_result)
+
+ # Try to extract JSON from the response if it's embedded in other text
+ json_match = re.search(r'\{.*\}', cleaned_result, re.DOTALL)
+ if json_match:
+ cleaned_result = json_match.group(0)
+
+ # Validate JSON
+ json.loads(cleaned_result)
+ ai_result = cleaned_result # Use cleaned version
+
+ except json.JSONDecodeError as e:
+ logger.warning(f"Chunk {chunk_index} returned invalid JSON: {str(e)}")
+ # Create fallback JSON
+ ai_result = json.dumps({
+ "metadata": {"title": "Error Section"},
+ "sections": [{
+ "id": f"error_section_{chunk_index}",
+ "type": "paragraph",
+ "data": {"text": f"Error parsing JSON: {str(e)}"}
+ }]
+ })
+
+ processing_time = time.time() - start_time
+
+ logger.info(f"Chunk {chunk_index} processed: {len(ai_result)} chars in {processing_time:.2f}s")
+
+ return ChunkResult(
+ originalChunk=part,
+ aiResult=ai_result,
+ chunkIndex=chunk_index,
+ documentId=document_id,
+ processingTime=processing_time,
+ metadata={
+ "success": True,
+ "chunkSize": len(part.data) if part.data else 0,
+ "resultSize": len(ai_result),
+ "typeGroup": part.typeGroup
+ }
+ )
+
+ except Exception as e:
+ processing_time = time.time() - start_time
+ logger.warning(f"Error processing chunk {chunk_index}: {str(e)}")
+
+ return ChunkResult(
+ originalChunk=part,
+ aiResult=f"[Error processing chunk: {str(e)}]",
+ chunkIndex=chunk_index,
+ documentId=document_id,
+ processingTime=processing_time,
+ metadata={
+ "success": False,
+ "error": str(e),
+ "chunkSize": len(part.data) if part.data else 0,
+ "typeGroup": part.typeGroup
+ }
+ )
+
+ # Process chunks with concurrency control
+ max_concurrent = 5 # Default concurrency
+ if options and hasattr(options, 'maxConcurrentChunks'):
+ max_concurrent = options.maxConcurrentChunks
+ elif options and hasattr(options, 'maxParallelChunks'):
+ max_concurrent = options.maxParallelChunks
+
+ logger.info(f"Processing {len(chunks_to_process)} chunks with max concurrency: {max_concurrent}")
+ self.services.utils.debugLogToFile(f"DEBUG: Chunks to process: {len(chunks_to_process)}", "AI_SERVICE")
+ for i, chunk_info in enumerate(chunks_to_process):
+ self.services.utils.debugLogToFile(f"DEBUG: Chunk {i}: typeGroup={chunk_info['part'].typeGroup}, mimeType={chunk_info['part'].mimeType}, data_length={len(chunk_info['part'].data) if chunk_info['part'].data else 0}", "AI_SERVICE")
+
+ # Create semaphore for concurrency control
+ semaphore = asyncio.Semaphore(max_concurrent)
+
+ async def process_with_semaphore(chunk_info):
+ async with semaphore:
+ return await process_single_chunk(chunk_info)
+
+ # Process all chunks in parallel with concurrency control
+ tasks = [process_with_semaphore(chunk_info) for chunk_info in chunks_to_process]
+ self.services.utils.debugLogToFile(f"DEBUG: Created {len(tasks)} tasks for parallel processing", "AI_SERVICE")
+ chunk_results = await asyncio.gather(*tasks, return_exceptions=True)
+ self.services.utils.debugLogToFile(f"DEBUG: Got {len(chunk_results)} results from parallel processing", "AI_SERVICE")
+
+ # Handle any exceptions in the gather itself
+ processed_results = []
+ for i, result in enumerate(chunk_results):
+ if isinstance(result, Exception):
+ # Create error ChunkResult
+ chunk_info = chunks_to_process[i]
+ processed_results.append(ChunkResult(
+ originalChunk=chunk_info['part'],
+ aiResult=f"[Error in parallel processing: {str(result)}]",
+ chunkIndex=chunk_info['chunk_index'],
+ documentId=chunk_info['document_id'],
+ processingTime=0.0,
+ metadata={"success": False, "error": str(result)}
+ ))
+ elif result is not None:
+ # Only add non-None results (skip empty containers)
+ processed_results.append(result)
+
+ logger.info(f"Completed processing {len(processed_results)} chunks")
+ return processed_results
+
+ def _mergeChunkResults(
+ self,
+ chunkResults: List[ChunkResult],
+ options: Optional[AiCallOptions] = None
+ ) -> str:
+ """Merge chunk results while preserving document structure and chunk order."""
+
+ if not chunkResults:
+ return ""
+
+ # Get merging configuration from options
+ chunk_separator = "\n\n---\n\n"
+ include_document_headers = True
+ include_chunk_metadata = False
+
+ if options:
+ if hasattr(options, 'chunkSeparator'):
+ chunk_separator = options.chunkSeparator
+ elif hasattr(options, 'mergeStrategy') and options.mergeStrategy:
+ chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n---\n\n")
+
+ # Check for enhanced options
+ if hasattr(options, 'preserveChunkMetadata'):
+ include_chunk_metadata = options.preserveChunkMetadata
+
+ # Group chunk results by document
+ results_by_document = {}
+ for chunk_result in chunkResults:
+ doc_id = chunk_result.documentId
+ if doc_id not in results_by_document:
+ results_by_document[doc_id] = []
+ results_by_document[doc_id].append(chunk_result)
+
+ # Sort chunks within each document by chunk index
+ for doc_id in results_by_document:
+ results_by_document[doc_id].sort(key=lambda x: x.chunkIndex)
+
+ # Merge results for each document
+ merged_documents = []
+
+ for doc_id, doc_chunks in results_by_document.items():
+ # Build document header if enabled
+ doc_header = ""
+ if include_document_headers:
+ doc_header = f"\n\n=== DOCUMENT: {doc_id} ===\n\n"
+
+ # Merge chunks for this document
+ doc_content = ""
+ for i, chunk_result in enumerate(doc_chunks):
+ # Add chunk separator (except for first chunk)
+ if i > 0:
+ doc_content += chunk_separator
+
+ # Add chunk content with optional metadata
+ chunk_metadata = chunk_result.metadata
+ if chunk_metadata.get("success", False):
+ chunk_content = chunk_result.aiResult
+
+ # Add chunk metadata if enabled
+ if include_chunk_metadata:
+ chunk_info = f"[Chunk {chunk_result.chunkIndex} - {chunk_metadata.get('typeGroup', 'unknown')} - {chunk_metadata.get('chunkSize', 0)} chars]"
+ chunk_content = f"{chunk_info}\n{chunk_content}"
+
+ doc_content += chunk_content
+ else:
+ # Handle error chunks
+ error_msg = f"[ERROR in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}]"
+ doc_content += error_msg
+
+ merged_documents.append(doc_header + doc_content)
+
+ # Join all documents
+ final_result = "\n\n".join(merged_documents)
+
+ logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents")
+ return final_result.strip()
+
+ def _mergeChunkResultsClean(
+ self,
+ chunkResults: List[ChunkResult],
+ options: Optional[AiCallOptions] = None
+ ) -> str:
+ """Merge chunk results in CLEAN mode - no debug metadata or document headers."""
+
+ if not chunkResults:
+ return ""
+
+ # Get merging configuration from options
+ chunk_separator = "\n\n"
+ include_document_headers = False # CLEAN MODE: No document headers
+ include_chunk_metadata = False # CLEAN MODE: No chunk metadata
+
+ if options:
+ if hasattr(options, 'chunkSeparator'):
+ chunk_separator = options.chunkSeparator
+ elif hasattr(options, 'mergeStrategy') and options.mergeStrategy:
+ chunk_separator = options.mergeStrategy.get("chunkSeparator", "\n\n")
+
+ # Group chunk results by document
+ results_by_document = {}
+ for chunk_result in chunkResults:
+ doc_id = chunk_result.documentId
+ if doc_id not in results_by_document:
+ results_by_document[doc_id] = []
+ results_by_document[doc_id].append(chunk_result)
+
+ # Sort chunks within each document by chunk index
+ for doc_id in results_by_document:
+ results_by_document[doc_id].sort(key=lambda x: x.chunkIndex)
+
+ # Merge results for each document in CLEAN mode
+ merged_documents = []
+
+ for doc_id, doc_chunks in results_by_document.items():
+ # CLEAN MODE: No document headers
+ doc_header = ""
+
+ # Merge chunks for this document
+ doc_content = ""
+ for i, chunk_result in enumerate(doc_chunks):
+ # Add chunk separator (except for first chunk)
+ if i > 0:
+ doc_content += chunk_separator
+
+ # Add chunk content without metadata
+ chunk_metadata = chunk_result.metadata
+ if chunk_metadata.get("success", False):
+ chunk_content = chunk_result.aiResult
+
+ # CLEAN MODE: Skip container/binary chunks entirely
+ if chunk_content.startswith("[Skipped ") and "content:" in chunk_content:
+ continue # Skip container/binary chunks in clean mode
+
+ # CLEAN MODE: Skip empty or whitespace-only chunks
+ if not chunk_content.strip():
+ continue # Skip empty chunks in clean mode
+
+ # CLEAN MODE: No chunk metadata
+ doc_content += chunk_content
+ else:
+ # Handle error chunks silently in clean mode
+ continue
+
+ merged_documents.append(doc_header + doc_content)
+
+ # Join all documents
+ final_result = "\n\n".join(merged_documents)
+
+ return final_result.strip()
+
+ def _mergeChunkResultsJson(
+ self,
+ chunkResults: List[ChunkResult],
+ options: Optional[AiCallOptions] = None
+ ) -> Dict[str, Any]:
+ """Merge chunk results in JSON mode - returns structured JSON document."""
+ import json
+
+ if not chunkResults:
+ return {"metadata": {"title": "Empty Document"}, "sections": []}
+
+ # Group chunk results by document
+ results_by_document = {}
+ for chunk_result in chunkResults:
+ doc_id = chunk_result.documentId
+ if doc_id not in results_by_document:
+ results_by_document[doc_id] = []
+ results_by_document[doc_id].append(chunk_result)
+
+ # Sort chunks within each document by chunk index
+ for doc_id in results_by_document:
+ results_by_document[doc_id].sort(key=lambda x: x.chunkIndex)
+
+ # Merge JSON results for each document
+ all_documents = []
+ all_sections = []
+ document_titles = []
+ combined_metadata = {"title": "Merged Document", "splitStrategy": "by_section"}
+
+ for doc_id, doc_chunks in results_by_document.items():
+ # Process each chunk's JSON result
+ for chunk_result in doc_chunks:
+ chunk_metadata = chunk_result.metadata
+ if chunk_metadata.get("success", False):
+ try:
+ # Parse JSON from AI result
+ chunk_json = json.loads(chunk_result.aiResult)
+
+ # Check if this is a multi-file response (has "documents" key)
+ if isinstance(chunk_json, dict) and "documents" in chunk_json:
+ # This is a multi-file response - merge all documents
+ logger.debug(f"Processing multi-file response from chunk {chunk_result.chunkIndex} with {len(chunk_json['documents'])} documents")
+
+ # Add all documents from this chunk
+ for doc in chunk_json["documents"]:
+ # Add chunk context to document
+ doc["metadata"] = doc.get("metadata", {})
+ doc["metadata"]["source_chunk"] = chunk_result.chunkIndex
+ doc["metadata"]["source_document"] = doc_id
+ all_documents.append(doc)
+
+ # Update combined metadata
+ if "metadata" in chunk_json:
+ combined_metadata.update(chunk_json["metadata"])
+
+ # Extract sections from single-file response (fallback)
+ elif isinstance(chunk_json, dict) and "sections" in chunk_json:
+ for section in chunk_json["sections"]:
+ # Add document context to section
+ section["metadata"] = section.get("metadata", {})
+ section["metadata"]["source_document"] = doc_id
+ section["metadata"]["chunk_index"] = chunk_result.chunkIndex
+ all_sections.append(section)
+
+ # Extract document title
+ if isinstance(chunk_json, dict) and "metadata" in chunk_json:
+ title = chunk_json["metadata"].get("title", "")
+ if title and title not in document_titles:
+ document_titles.append(title)
+
+ except json.JSONDecodeError as e:
+ logger.warning(f"Failed to parse JSON from chunk {chunk_result.chunkIndex}: {str(e)}")
+ # Create a fallback section for invalid JSON
+ fallback_section = {
+ "id": f"error_section_{chunk_result.chunkIndex}",
+ "title": "Error Section",
+ "content_type": "paragraph",
+ "elements": [{
+ "text": f"Error parsing chunk {chunk_result.chunkIndex}: {str(e)}"
+ }],
+ "order": chunk_result.chunkIndex,
+ "metadata": {
+ "source_document": doc_id,
+ "chunk_index": chunk_result.chunkIndex,
+ "error": str(e)
+ }
+ }
+ all_sections.append(fallback_section)
+ else:
+ # Handle error chunks
+ error_section = {
+ "id": f"error_section_{chunk_result.chunkIndex}",
+ "title": "Error Section",
+ "content_type": "paragraph",
+ "elements": [{
+ "text": f"Error in chunk {chunk_result.chunkIndex}: {chunk_metadata.get('error', 'Unknown error')}"
+ }],
+ "order": chunk_result.chunkIndex,
+ "metadata": {
+ "source_document": doc_id,
+ "chunk_index": chunk_result.chunkIndex,
+ "error": chunk_metadata.get('error', 'Unknown error')
+ }
+ }
+ all_sections.append(error_section)
+
+ # Sort sections by order
+ all_sections.sort(key=lambda x: x.get("order", 0))
+
+ # If we have merged documents from multi-file responses, return them
+ if all_documents:
+ logger.info(f"Merged {len(all_documents)} documents from {len(chunkResults)} chunks")
+ return {
+ "metadata": combined_metadata,
+ "documents": all_documents
+ }
+
+ # Otherwise, create merged document with sections (single-file fallback)
+ merged_document = {
+ "metadata": {
+ "title": document_titles[0] if document_titles else "Merged Document",
+ "source_documents": list(results_by_document.keys()),
+ "extraction_method": "ai_json_extraction",
+ "version": "1.0"
+ },
+ "sections": all_sections,
+ "summary": f"Merged document from {len(results_by_document)} source documents",
+ "tags": ["merged", "ai_generated"]
+ }
+
+ logger.info(f"Merged {len(chunkResults)} chunks from {len(results_by_document)} documents (JSON mode)")
+ return merged_document
+
+ def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]:
+ """
+ Get model capabilities for content processing, including appropriate size limits for chunking.
+ """
+ # Estimate total content size
+ prompt_size = len(prompt.encode('utf-8'))
+ document_size = 0
+ if documents:
+ # Rough estimate of document content size
+ for doc in documents:
+ document_size += doc.fileSize or 0
+
+ total_size = prompt_size + document_size
+
+ # Use AiObjects to select the best model for this content size
+ # We'll simulate the model selection by checking available models
+ from modules.interfaces.interfaceAiObjects import aiModels
+
+ # Find the best model for this content size and operation
+ best_model = None
+ best_context_length = 0
+
+ for model_name, model_info in aiModels.items():
+ context_length = model_info.get("contextLength", 0)
+
+ # Skip models with no context length or too small for content
+ if context_length == 0:
+ continue
+
+ # Check if model supports the operation type
+ capabilities = model_info.get("capabilities", [])
+ if options.operationType == OperationType.IMAGE_ANALYSIS and "image_analysis" not in capabilities:
+ continue
+ elif options.operationType == OperationType.IMAGE_GENERATION and "image_generation" not in capabilities:
+ continue
+ elif options.operationType == OperationType.WEB_RESEARCH and "web_search" not in capabilities:
+ continue
+ elif "text_generation" not in capabilities:
+ continue
+
+ # Prefer models that can handle the content without chunking, but allow chunking if needed
+ if context_length >= total_size * 0.8: # 80% of content size
+ if context_length > best_context_length:
+ best_model = model_info
+ best_context_length = context_length
+ elif best_model is None: # Fallback to largest available model
+ if context_length > best_context_length:
+ best_model = model_info
+ best_context_length = context_length
+
+ # Fallback to a reasonable default if no model found
+ if best_model is None:
+ best_model = {
+ "contextLength": 128000, # GPT-4o default
+ "llmName": "gpt-4o"
+ }
+
+ # Calculate appropriate sizes
+ # Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters)
+ context_length_bytes = int(best_model["contextLength"] * 4)
+ max_context_bytes = int(context_length_bytes * 0.9) # 90% of context length
+ text_chunk_size = int(max_context_bytes * 0.7) # 70% of max context for text chunks
+ image_chunk_size = int(max_context_bytes * 0.8) # 80% of max context for image chunks
+
+ logger.debug(f"Selected model: {best_model.get('llmName', 'unknown')} with context length: {best_model['contextLength']}")
+ logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes")
+ logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes")
+
+ return {
+ "maxContextBytes": max_context_bytes,
+ "textChunkSize": text_chunk_size,
+ "imageChunkSize": image_chunk_size
+ }
diff --git a/modules/services/serviceAi/subUtilities.py b/modules/services/serviceAi/subUtilities.py
new file mode 100644
index 00000000..0f5bcc4d
--- /dev/null
+++ b/modules/services/serviceAi/subUtilities.py
@@ -0,0 +1,316 @@
+import logging
+from typing import Dict, Any, List, Optional, Tuple, Union
+from modules.datamodels.datamodelAi import ModelCapabilities, AiCallOptions
+
+logger = logging.getLogger(__name__)
+
+
+class SubUtilities:
+ """Utility functions for text processing, debugging, and helper operations."""
+
+ def __init__(self, services):
+ """Initialize utilities service.
+
+ Args:
+ services: Service center instance for accessing other services
+ """
+ self.services = services
+
+ def _writeTraceLog(self, contextText: str, data: Any) -> None:
+ """Write raw data to the central trace log file without truncation."""
+ try:
+ import os
+ import json
+ from datetime import datetime, UTC
+ # Only write if logger is in debug mode
+ if logger.level > logging.DEBUG:
+ return
+ # Get log directory from configuration via service center if possible
+ logDir = None
+ try:
+ logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
+ except Exception:
+ pass
+ if not logDir:
+ logDir = "./"
+ if not os.path.isabs(logDir):
+ # Make it relative to gateway directory
+ gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+ logDir = os.path.join(gatewayDir, logDir)
+ os.makedirs(logDir, exist_ok=True)
+ traceFile = os.path.join(logDir, "log_trace.log")
+ timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
+ traceEntry = f"[{timestamp}] {contextText}\n" + ("=" * 80) + "\n"
+ if data is None:
+ traceEntry += "No data provided\n"
+ else:
+ # Prefer exact text; if dict/list, pretty print JSON
+ try:
+ if isinstance(data, (dict, list)):
+ traceEntry += f"JSON Data:\n{json.dumps(data, indent=2, ensure_ascii=False)}\n"
+ else:
+ text = str(data)
+ traceEntry += f"Text Data:\n{text}\n"
+ except Exception:
+ traceEntry += f"Data (fallback): {str(data)}\n"
+ traceEntry += ("=" * 80) + "\n\n"
+ with open(traceFile, "a", encoding="utf-8") as f:
+ f.write(traceEntry)
+ except Exception:
+ # Swallow to avoid recursive logging issues
+ pass
+
+ def _writeAiResponseDebug(self, label: str, content: str, partIndex: int = 1, modelName: str = None, continuation: bool = None) -> None:
+ """Persist raw AI response parts for debugging under test-chat/ai - only if debug enabled."""
+ try:
+ # Check if debug logging is enabled
+ debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+ if not debug_enabled:
+ return
+
+ import os
+ from datetime import datetime, UTC
+ # Base dir: gateway/test-chat/ai (go up 4 levels from this file)
+ # .../gateway/modules/services/serviceAi/subUtilities.py -> up to gateway root
+ gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+ outDir = os.path.join(gatewayDir, 'test-chat', 'ai')
+ os.makedirs(outDir, exist_ok=True)
+ ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
+ suffix = []
+ if partIndex is not None:
+ suffix.append(f"part{partIndex}")
+ if continuation is not None:
+ suffix.append(f"cont_{str(continuation).lower()}")
+ if modelName:
+ safeModel = ''.join(c if c.isalnum() or c in ('-', '_') else '-' for c in modelName)
+ suffix.append(safeModel)
+ suffixStr = ('_' + '_'.join(suffix)) if suffix else ''
+ fname = f"{ts}_{label}{suffixStr}.txt"
+ fpath = os.path.join(outDir, fname)
+ with open(fpath, 'w', encoding='utf-8') as f:
+ f.write(content or '')
+ except Exception:
+ # Do not raise; best-effort debug write
+ pass
+
+ def _exceedsTokenLimit(self, text: str, model: ModelCapabilities, safety_margin: float) -> bool:
+ """
+ Check if text exceeds model token limit with safety margin.
+ """
+ # Simple character-based estimation (4 chars per token)
+ estimated_tokens = len(text) // 4
+ max_tokens = int(model.maxTokens * (1 - safety_margin))
+ return estimated_tokens > max_tokens
+
+ def _reduceText(self, text: str, reduction_factor: float) -> str:
+ """
+ Reduce text size by the specified factor.
+ """
+ if reduction_factor >= 1.0:
+ return text
+
+ target_length = int(len(text) * reduction_factor)
+ return text[:target_length] + "... [reduced]"
+
+ def _extractTextFromContentParts(self, extracted_content) -> str:
+ """
+ Extract text content from ExtractionService ContentPart objects.
+ """
+ if not extracted_content or not hasattr(extracted_content, 'parts'):
+ return ""
+
+ text_parts = []
+ for part in extracted_content.parts:
+ if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']:
+ if hasattr(part, 'data') and part.data:
+ text_parts.append(part.data)
+
+ return "\n\n".join(text_parts)
+
+ def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
+ """
+ Build full prompt by replacing placeholders with their content.
+ Uses the new {{KEY:placeholder}} format.
+ """
+ if not placeholders:
+ return prompt
+
+ full_prompt = prompt
+ for placeholder, content in placeholders.items():
+ # Replace both old format {{placeholder}} and new format {{KEY:placeholder}}
+ full_prompt = full_prompt.replace(f"{{{{{placeholder}}}}}", content)
+ full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", content)
+
+ return full_prompt
+
+ def _reducePlanningPrompt(
+ self,
+ full_prompt: str,
+ placeholders: Optional[Dict[str, str]],
+ model: ModelCapabilities,
+ options: AiCallOptions
+ ) -> str:
+ """
+ Reduce planning prompt size by summarizing placeholders while preserving prompt structure.
+ """
+ if not placeholders:
+ return self._reduceText(full_prompt, 0.7)
+
+ # Reduce placeholders while preserving prompt
+ reduced_placeholders = {}
+ for placeholder, content in placeholders.items():
+ if len(content) > 1000: # Only reduce long content
+ reduction_factor = 0.7
+ reduced_content = self._reduceText(content, reduction_factor)
+ reduced_placeholders[placeholder] = reduced_content
+ else:
+ reduced_placeholders[placeholder] = content
+
+ return self._buildPromptWithPlaceholders(full_prompt, reduced_placeholders)
+
+ def _reduceTextPrompt(
+ self,
+ prompt: str,
+ context: str,
+ model: ModelCapabilities,
+ options: AiCallOptions
+ ) -> str:
+ """
+ Reduce text prompt size using typeGroup-aware chunking and merging.
+ """
+ max_size = int(model.maxTokens * (1 - options.safetyMargin))
+
+ if options.compressPrompt:
+ # Reduce both prompt and context
+ target_size = max_size
+ current_size = len(prompt) + len(context)
+ reduction_factor = (target_size * 0.7) / current_size
+
+ if reduction_factor < 1.0:
+ prompt = self._reduceText(prompt, reduction_factor)
+ context = self._reduceText(context, reduction_factor)
+ else:
+ # Only reduce context, preserve prompt integrity
+ max_context_size = max_size - len(prompt)
+ if len(context) > max_context_size:
+ reduction_factor = max_context_size / len(context)
+ context = self._reduceText(context, reduction_factor)
+
+ return prompt + "\n\n" + context if context else prompt
+
+ async def _compressContent(self, content: str, targetSize: int, contentType: str) -> str:
+ """Compress content to target size."""
+ if len(content.encode("utf-8")) <= targetSize:
+ return content
+
+ try:
+ compressionPrompt = f"""
+ Komprimiere den folgenden {contentType} auf maximal {targetSize} Zeichen,
+ behalte aber alle wichtigen Informationen bei:
+
+ {content}
+
+ Gib nur den komprimierten Inhalt zurück, ohne zusätzliche Erklärungen.
+ """
+
+ # Service must not call connectors directly; use simple truncation fallback here
+ data = content.encode("utf-8")
+ return data[:targetSize].decode("utf-8", errors="ignore") + "... [truncated]"
+ except Exception as e:
+ logger.warning(f"AI compression failed, using truncation: {str(e)}")
+ return content[:targetSize] + "... [truncated]"
+
+ def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List], options: AiCallOptions) -> Dict[str, int]:
+ """
+ Get model capabilities for content processing, including appropriate size limits for chunking.
+ """
+ # Estimate total content size
+ prompt_size = len(prompt.encode('utf-8'))
+ document_size = 0
+ if documents:
+ # Rough estimate of document content size
+ for doc in documents:
+ document_size += getattr(doc, 'fileSize', 0) or 0
+
+ total_size = prompt_size + document_size
+
+ # Use AiObjects to select the best model for this content size
+ # We'll simulate the model selection by checking available models
+ from modules.interfaces.interfaceAiObjects import aiModels
+
+ # Find the best model for this content size and operation
+ best_model = None
+ best_context_length = 0
+
+ for model_name, model_info in aiModels.items():
+ context_length = model_info.get("contextLength", 0)
+
+ # Skip models with no context length or too small for content
+ if context_length == 0:
+ continue
+
+ # Check if model supports the operation type
+ capabilities = model_info.get("capabilities", [])
+ from modules.datamodels.datamodelAi import OperationType
+ if options.operationType == OperationType.IMAGE_ANALYSIS and "image_analysis" not in capabilities:
+ continue
+ elif options.operationType == OperationType.IMAGE_GENERATION and "image_generation" not in capabilities:
+ continue
+ elif options.operationType == OperationType.WEB_RESEARCH and "web_search" not in capabilities:
+ continue
+ elif "text_generation" not in capabilities:
+ continue
+
+ # Prefer models that can handle the content without chunking, but allow chunking if needed
+ if context_length >= total_size * 0.8: # 80% of content size
+ if context_length > best_context_length:
+ best_model = model_info
+ best_context_length = context_length
+ elif best_model is None: # Fallback to largest available model
+ if context_length > best_context_length:
+ best_model = model_info
+ best_context_length = context_length
+
+ # Fallback to a reasonable default if no model found
+ if best_model is None:
+ best_model = {
+ "contextLength": 128000, # GPT-4o default
+ "llmName": "gpt-4o"
+ }
+
+ # Calculate appropriate sizes
+ # Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters)
+ context_length_bytes = int(best_model["contextLength"] * 4)
+ max_context_bytes = int(context_length_bytes * 0.9) # 90% of context length
+ text_chunk_size = int(max_context_bytes * 0.7) # 70% of max context for text chunks
+ image_chunk_size = int(max_context_bytes * 0.8) # 80% of max context for image chunks
+
+ logger.debug(f"Selected model: {best_model.get('llmName', 'unknown')} with context length: {best_model['contextLength']}")
+ logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes")
+ logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes")
+
+ return {
+ "maxContextBytes": max_context_bytes,
+ "textChunkSize": text_chunk_size,
+ "imageChunkSize": image_chunk_size
+ }
+
+ def _getModelsForOperation(self, operation_type: str, options: AiCallOptions) -> List[ModelCapabilities]:
+ """
+ Get models capable of handling the specific operation with capability filtering.
+ """
+ # Use the actual AI objects model selection instead of hardcoded default
+ if hasattr(self, 'aiObjects') and self.aiObjects:
+ # Let AiObjects handle the model selection
+ return []
+ else:
+ # Fallback to default model if AiObjects not available
+ default_model = ModelCapabilities(
+ name="default",
+ maxTokens=4000,
+ capabilities=["text", "reasoning"] if operation_type == "planning" else ["text"],
+ costPerToken=0.001,
+ processingTime=1.0,
+ isAvailable=True
+ )
+ return [default_model]
diff --git a/modules/services/serviceAi/subWebResearch.py b/modules/services/serviceAi/subWebResearch.py
new file mode 100644
index 00000000..953324aa
--- /dev/null
+++ b/modules/services/serviceAi/subWebResearch.py
@@ -0,0 +1,384 @@
+import logging
+from typing import Dict, Any, List, Optional, Tuple, Union
+from modules.datamodels.datamodelWeb import (
+ WebResearchRequest,
+ WebResearchActionResult,
+ WebResearchDocumentData,
+ WebResearchActionDocument,
+ WebSearchResultItem,
+)
+from modules.interfaces.interfaceAiObjects import AiObjects
+from modules.shared.configuration import APP_CONFIG
+
+logger = logging.getLogger(__name__)
+
+
+class SubWebResearch:
+ """Web research operations including search, crawling, and analysis."""
+
+ def __init__(self, services, aiObjects):
+ """Initialize web research service.
+
+ Args:
+ services: Service center instance for accessing other services
+ aiObjects: Initialized AiObjects instance
+ """
+ self.services = services
+ self.aiObjects = aiObjects
+
+ async def webResearch(self, request: WebResearchRequest) -> WebResearchActionResult:
+ """Perform web research using interface functions."""
+ try:
+ logger.info(f"WEB RESEARCH STARTED")
+ logger.info(f"User Query: {request.user_prompt}")
+ logger.info(f"Max Results: {request.max_results}, Max Pages: {request.options.max_pages}")
+
+ # Global URL index to track all processed URLs across the entire research session
+ global_processed_urls = set()
+
+ # Step 1: Find relevant websites - either provided URLs or AI-determined main URLs
+ logger.info(f"=== STEP 1: INITIAL MAIN URLS LIST ===")
+
+ if request.urls:
+ # Use provided URLs as initial main URLs
+ websites = request.urls
+ logger.info(f"Using provided URLs ({len(websites)}):")
+ for i, url in enumerate(websites, 1):
+ logger.info(f" {i}. {url}")
+ else:
+ # Use AI to determine main URLs based on user's intention
+ logger.info(f"AI analyzing user intent: '{request.user_prompt}'")
+
+ # Use AI to generate optimized Tavily search query and search parameters
+ query_optimizer_prompt = f"""You are a search query optimizer.
+
+ USER QUERY: {request.user_prompt}
+
+ Your task: Create a search query and parameters for the USER QUERY given.
+
+ RULES:
+ 1. The search query MUST be related to the user query above
+ 2. Extract key terms from the user query
+ 3. Determine appropriate country/language based on the query context
+ 4. Keep search query short (2-6 words)
+
+ Return ONLY this JSON format:
+ {{
+ "user_prompt": "search query based on user query above",
+ "country": "Full English country name (ISO-3166; map codes via pycountry/i18n-iso-countries)",
+ "language": "language_code_or_null",
+ "topic": "general|news|academic_or_null",
+ "time_range": "d|w|m|y_or_null",
+ "selection_strategy": "single|multiple|specific_page",
+ "selection_criteria": "what URLs to prioritize",
+ "expected_url_patterns": ["pattern1", "pattern2"],
+ "estimated_result_count": number
+ }}"""
+
+ # Get AI response for query optimization
+ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions
+ ai_request = AiCallRequest(
+ prompt=query_optimizer_prompt,
+ options=AiCallOptions()
+ )
+ ai_response_obj = await self.aiObjects.call(ai_request)
+ ai_response = ai_response_obj.content
+ logger.debug(f"AI query optimizer response: {ai_response}")
+
+ # Parse AI response to extract search query
+ import json
+ try:
+ # Clean the response by removing markdown code blocks
+ cleaned_response = ai_response.strip()
+ if cleaned_response.startswith('```json'):
+ cleaned_response = cleaned_response[7:] # Remove ```json
+ if cleaned_response.endswith('```'):
+ cleaned_response = cleaned_response[:-3] # Remove ```
+ cleaned_response = cleaned_response.strip()
+
+ query_data = json.loads(cleaned_response)
+ search_query = query_data.get("user_prompt", request.user_prompt)
+ ai_country = query_data.get("country")
+ ai_language = query_data.get("language")
+ ai_topic = query_data.get("topic")
+ ai_time_range = query_data.get("time_range")
+ selection_strategy = query_data.get("selection_strategy", "multiple")
+ selection_criteria = query_data.get("selection_criteria", "relevant URLs")
+ expected_patterns = query_data.get("expected_url_patterns", [])
+ estimated_count = query_data.get("estimated_result_count", request.max_results)
+
+ logger.info(f"AI optimized search query: '{search_query}'")
+ logger.info(f"Selection strategy: {selection_strategy}")
+ logger.info(f"Selection criteria: {selection_criteria}")
+ logger.info(f"Expected URL patterns: {expected_patterns}")
+ logger.info(f"Estimated result count: {estimated_count}")
+
+ except json.JSONDecodeError:
+ logger.warning("Failed to parse AI response as JSON, using original query")
+ search_query = request.user_prompt
+ ai_country = None
+ ai_language = None
+ ai_topic = None
+ ai_time_range = None
+ selection_strategy = "multiple"
+
+ # Perform the web search with AI-determined parameters
+ search_kwargs = {
+ "query": search_query,
+ "max_results": request.max_results,
+ "search_depth": request.options.search_depth,
+ "auto_parameters": False # Use explicit parameters
+ }
+
+ # Add parameters only if they have valid values
+ def _normalizeCountry(c: Optional[str]) -> Optional[str]:
+ if not c:
+ return None
+ s = str(c).strip()
+ if not s or s.lower() in ['null', 'none', 'undefined']:
+ return None
+ # Map common codes to full English names when easy to do without extra deps
+ mapping = {
+ 'ch': 'Switzerland', 'che': 'Switzerland',
+ 'de': 'Germany', 'ger': 'Germany', 'deu': 'Germany',
+ 'at': 'Austria', 'aut': 'Austria',
+ 'us': 'United States', 'usa': 'United States', 'uni ted states': 'United States',
+ 'uk': 'United Kingdom', 'gb': 'United Kingdom', 'gbr': 'United Kingdom'
+ }
+ key = s.lower()
+ if key in mapping:
+ return mapping[key]
+ # If looks like full name, capitalize first letter only (Tavily accepts English names)
+ return s
+
+ norm_ai_country = _normalizeCountry(ai_country)
+ norm_req_country = _normalizeCountry(request.options.country)
+ if norm_ai_country:
+ search_kwargs["country"] = norm_ai_country
+ elif norm_req_country:
+ search_kwargs["country"] = norm_req_country
+
+ if ai_language and ai_language not in ['null', '', 'none', 'undefined']:
+ search_kwargs["language"] = ai_language
+ elif request.options.language and request.options.language not in ['null', '', 'none', 'undefined']:
+ search_kwargs["language"] = request.options.language
+
+ if ai_topic and ai_topic in ['general', 'news', 'academic']:
+ search_kwargs["topic"] = ai_topic
+ elif request.options.topic and request.options.topic in ['general', 'news', 'academic']:
+ search_kwargs["topic"] = request.options.topic
+
+ if ai_time_range and ai_time_range in ['d', 'w', 'm', 'y']:
+ search_kwargs["time_range"] = ai_time_range
+ elif request.options.time_range and request.options.time_range in ['d', 'w', 'm', 'y']:
+ search_kwargs["time_range"] = request.options.time_range
+
+ # Constrain by expected domains if provided by AI
+ try:
+ include_domains = []
+ for p in expected_patterns or []:
+ if not isinstance(p, str):
+ continue
+ # Extract bare domain from pattern or URL
+ import re
+ m = re.search(r"(?:https?://)?([^/\s]+)", p.strip())
+ if m:
+ domain = m.group(1).lower()
+ # strip leading www.
+ if domain.startswith('www.'):
+ domain = domain[4:]
+ include_domains.append(domain)
+ # Deduplicate
+ if include_domains:
+ seen = set()
+ uniq = []
+ for d in include_domains:
+ if d not in seen:
+ seen.add(d)
+ uniq.append(d)
+ search_kwargs["include_domains"] = uniq
+ except Exception:
+ pass
+
+ # Log the parameters being used
+ logger.info(f"Search parameters: country={search_kwargs.get('country', 'not_set')}, language={search_kwargs.get('language', 'not_set')}, topic={search_kwargs.get('topic', 'not_set')}, time_range={search_kwargs.get('time_range', 'not_set')}, include_domains={search_kwargs.get('include_domains', [])}")
+
+ search_results = await self.aiObjects.search_websites(**search_kwargs)
+
+ logger.debug(f"Web search returned {len(search_results)} results:")
+ for i, result in enumerate(search_results, 1):
+ logger.debug(f" {i}. {result.url} - {result.title}")
+
+ # Deduplicate while preserving order
+ seen = set()
+ search_urls = []
+ for r in search_results:
+ u = str(r.url)
+ if u not in seen:
+ seen.add(u)
+ search_urls.append(u)
+
+ logger.info(f"After initial deduplication: {len(search_urls)} unique URLs from {len(search_results)} search results")
+
+ if not search_urls:
+ logger.error("No relevant websites found")
+ return WebResearchActionResult(success=False, error="No relevant websites found")
+
+ # Now use AI to determine the main URLs based on user's intention
+ logger.info(f"AI selecting main URLs from {len(search_urls)} search results based on user intent")
+
+ # Create a prompt for AI to identify main URLs based on user's intention
+ ai_prompt = f"""
+ Select the most relevant URLs from these search results:
+
+ {chr(10).join([f"{i+1}. {url}" for i, url in enumerate(search_urls)])}
+
+ Return only the URLs that are most relevant for the user's query.
+ One URL per line.
+ """
+ # Create AI call request
+ ai_request = AiCallRequest(
+ prompt=ai_prompt,
+ options=AiCallOptions()
+ )
+ ai_response_obj = await self.aiObjects.call(ai_request)
+ ai_response = ai_response_obj.content
+ logger.debug(f"AI response for main URL selection: {ai_response}")
+
+ # Parse AI response to extract URLs
+ websites = []
+ for line in ai_response.strip().split('\n'):
+ line = line.strip()
+ if line and ('http://' in line or 'https://' in line):
+ # Extract URL from the line
+ for word in line.split():
+ if word.startswith('http://') or word.startswith('https://'):
+ websites.append(word.rstrip('.,;'))
+ break
+
+ if not websites:
+ logger.warning("AI did not identify any main URLs, using first few search results")
+ websites = search_urls[:3] # Fallback to first 3 search results
+
+ # Deduplicate while preserving order
+ seen = set()
+ unique_websites = []
+ for url in websites:
+ if url not in seen:
+ seen.add(url)
+ unique_websites.append(url)
+
+ websites = unique_websites
+ logger.info(f"After AI selection deduplication: {len(websites)} unique URLs from {len(websites)} AI-selected URLs")
+
+ logger.info(f"AI selected {len(websites)} main URLs (after deduplication):")
+ for i, url in enumerate(websites, 1):
+ logger.info(f" {i}. {url}")
+
+ # Step 2: Smart website selection using AI interface
+ logger.info(f"=== STEP 2: FILTERED URL LIST BY USER PROMPT'S INTENTION ===")
+ logger.info(f"AI analyzing {len(websites)} URLs for relevance to: '{request.user_prompt}'")
+
+ selectedWebsites, aiResponse = await self.aiObjects.selectRelevantWebsites(websites, request.user_prompt)
+
+ logger.debug(f"AI Response: {aiResponse}")
+ logger.debug(f"AI selected {len(selectedWebsites)} most relevant URLs:")
+ for i, url in enumerate(selectedWebsites, 1):
+ logger.debug(f" {i}. {url}")
+
+ # Show which were filtered out
+ filtered_out = [url for url in websites if url not in selectedWebsites]
+ if filtered_out:
+ logger.debug(f"Filtered out {len(filtered_out)} less relevant URLs:")
+ for i, url in enumerate(filtered_out, 1):
+ logger.debug(f" {i}. {url}")
+
+ # Step 3+4+5: Recursive crawling with configurable depth
+ # Get configuration parameters
+ max_depth = int(APP_CONFIG.get("Web_Research_MAX_DEPTH", "2"))
+ max_links_per_domain = int(APP_CONFIG.get("Web_Research_MAX_LINKS_PER_DOMAIN", "4"))
+ crawl_timeout_minutes = int(APP_CONFIG.get("Web_Research_CRAWL_TIMEOUT_MINUTES", "10"))
+ crawl_timeout_seconds = crawl_timeout_minutes * 60
+
+ # Use the configured max_depth or the request's pages_search_depth, whichever is smaller
+ effective_depth = min(max_depth, request.options.pages_search_depth)
+
+ logger.info(f"=== STEP 3+4+5: RECURSIVE CRAWLING (DEPTH {effective_depth}) ===")
+ logger.info(f"Starting recursive crawl of {len(selectedWebsites)} main websites...")
+ logger.info(f"Search depth: {effective_depth} levels (max configured: {max_depth})")
+ logger.info(f"Max links per domain: {max_links_per_domain}")
+ logger.info(f"Crawl timeout: {crawl_timeout_minutes} minutes")
+
+ # Use recursive crawling with URL index to avoid duplicates
+ import asyncio
+ try:
+ allContent = await asyncio.wait_for(
+ self.aiObjects.crawlRecursively(
+ urls=selectedWebsites,
+ max_depth=effective_depth,
+ extract_depth=request.options.extract_depth,
+ max_per_domain=max_links_per_domain,
+ global_processed_urls=global_processed_urls
+ ),
+ timeout=crawl_timeout_seconds
+ )
+ logger.info(f"Crawling completed within timeout: {len(allContent)} pages crawled")
+ except asyncio.TimeoutError:
+ logger.warning(f"Crawling timed out after {crawl_timeout_minutes} minutes, using partial results")
+ # crawlRecursively now handles timeouts gracefully and returns partial results
+ # Try to get the partial results that were collected
+ allContent = {}
+
+ if not allContent:
+ logger.error("Could not extract content from any websites")
+ return WebResearchActionResult(success=False, error="Could not extract content from any websites")
+
+ logger.info(f"=== WEB RESEARCH COMPLETED ===")
+ logger.info(f"Successfully crawled {len(allContent)} URLs total")
+ logger.info(f"Crawl depth: {effective_depth} levels")
+
+ # Create simple result with raw content
+ sources = [WebSearchResultItem(title=url, url=url) for url in selectedWebsites]
+
+ # Get all additional links (all URLs except main ones)
+ additional_links = [url for url in allContent.keys() if url not in selectedWebsites]
+
+ # Combine all content into a single result
+ combinedContent = ""
+ for url, content in allContent.items():
+ combinedContent += f"\n\n=== {url} ===\n{content}\n"
+
+ documentData = WebResearchDocumentData(
+ user_prompt=request.user_prompt,
+ websites_analyzed=len(allContent),
+ additional_links_found=len(additional_links),
+ analysis_result=combinedContent, # Raw content, no analysis
+ sources=sources,
+ additional_links=additional_links,
+ individual_content=allContent, # Individual URL -> content mapping
+ debug_info={
+ "crawl_depth": effective_depth,
+ "max_configured_depth": max_depth,
+ "max_links_per_domain": max_links_per_domain,
+ "crawl_timeout_minutes": crawl_timeout_minutes,
+ "total_urls_crawled": len(allContent),
+ "main_urls": len(selectedWebsites),
+ "additional_urls": len(additional_links)
+ }
+ )
+
+ document = WebResearchActionDocument(
+ documentName=f"web_research_{request.user_prompt[:50]}.json",
+ documentData=documentData,
+ mimeType="application/json"
+ )
+
+ return WebResearchActionResult(
+ success=True,
+ documents=[document],
+ resultLabel="web_research_results"
+ )
+
+ except Exception as e:
+ logger.error(f"Error in web research: {str(e)}")
+ return WebResearchActionResult(success=False, error=str(e))
diff --git a/test_extractor_formats.py b/test_extractor_formats.py
deleted file mode 100644
index 201622ff..00000000
--- a/test_extractor_formats.py
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script to demonstrate enhanced extractor format support.
-Shows all supported file extensions and MIME types for each extractor.
-"""
-
-import sys
-import os
-from pathlib import Path
-
-# Add the gateway module to the path
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'modules'))
-
-from modules.services.serviceExtraction.subRegistry import ExtractorRegistry
-
-def test_extractor_formats():
- """Test and display all supported formats from extractors."""
- print("🔍 Testing Plug-and-Play Extractor System")
- print("=" * 60)
-
- # Create registry
- registry = ExtractorRegistry()
-
- # Get all supported formats
- formats = registry.getAllSupportedFormats()
-
- print("\n📋 Supported File Extensions by Extractor:")
- print("-" * 50)
- for extractor_name, extensions in formats["extensions"].items():
- if extensions:
- print(f" {extractor_name:20} → {', '.join(extensions)}")
- else:
- print(f" {extractor_name:20} → (all extensions - fallback)")
-
- print("\n📋 Supported MIME Types by Extractor:")
- print("-" * 50)
- for extractor_name, mime_types in formats["mime_types"].items():
- if mime_types:
- print(f" {extractor_name:20} → {', '.join(mime_types)}")
- else:
- print(f" {extractor_name:20} → (all MIME types - fallback)")
-
- # Test individual extractors
- print("\n🧪 Testing Individual Extractors:")
- print("-" * 50)
-
- # Get all registered extractors
- for key, extractor in registry._map.items():
- if hasattr(extractor, 'getSupportedExtensions') and hasattr(extractor, 'getSupportedMimeTypes'):
- extensions = extractor.getSupportedExtensions()
- mime_types = extractor.getSupportedMimeTypes()
- print(f"\n {extractor.__class__.__name__}:")
- print(f" Extensions: {extensions}")
- print(f" MIME Types: {mime_types}")
-
- # Test detection with various file types
- print("\n🔬 Testing File Detection:")
- print("-" * 50)
-
- test_files = [
- # Document formats
- ("document.pdf", "application/pdf"),
- ("spreadsheet.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
- ("presentation.pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation"),
- ("document.docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
-
- # Text and code files
- ("readme.txt", "text/plain"),
- ("readme.md", "text/markdown"),
- ("app.log", "text/plain"),
- ("Main.java", "text/x-java-source"),
- ("script.js", "text/javascript"),
- ("component.tsx", "text/typescript"),
- ("main.py", "text/x-python"),
- ("config.yaml", "text/x-yaml"),
- ("package.json", "application/json"),
- ("data.csv", "text/csv"),
- ("config.xml", "application/xml"),
- ("webpage.html", "text/html"),
- ("styles.css", "text/css"),
- ("script.sh", "text/x-sh"),
- ("Dockerfile", "text/plain"),
- (".gitignore", "text/plain"),
- ("app.config", "text/plain"),
- ("database.sql", "text/x-sql"),
- ("schema.ddl", "application/sql"),
-
- # Images
- ("image.png", "image/png"),
- ("photo.jpg", "image/jpeg"),
-
- # Unknown
- ("unknown.xyz", "application/octet-stream")
- ]
-
- for filename, mime_type in test_files:
- extractor = registry.resolve(mime_type, filename)
- if extractor:
- print(f" {filename:25} ({mime_type:50}) → {extractor.__class__.__name__}")
- else:
- print(f" {filename:25} ({mime_type:50}) → No extractor found")
-
- print("\n✅ Plug-and-Play extractor system test completed!")
- print("\nKey improvements:")
- print(" • 🔌 TRUE PLUG-AND-PLAY: Just add extractor file, it auto-registers!")
- print(" • 📋 No more manual registration of file types")
- print(" • 🔍 Auto-discovery scans extractors directory")
- print(" • 📝 Each extractor declares its own supported formats")
- print(" • 🚀 Easy to add new file types - just create new extractor")
- print(" • 🧹 Clean, maintainable code with no redundancy")
- print("\nTo add a new file type:")
- print(" 1. Create extractorXyz.py in extractors/ directory")
- print(" 2. Implement Extractor interface with getSupportedExtensions()")
- print(" 3. That's it! No registry changes needed!")
-
-if __name__ == "__main__":
- test_extractor_formats()
diff --git a/test_image_processing.py b/test_image_processing.py
deleted file mode 100644
index ae993083..00000000
--- a/test_image_processing.py
+++ /dev/null
@@ -1,83 +0,0 @@
-"""
-Simple test to verify image processing works correctly.
-"""
-
-import asyncio
-import sys
-import os
-import base64
-import logging
-
-# Add the gateway module to the path
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'modules'))
-
-from modules.datamodels.datamodelAi import AiCallOptions, OperationType
-from modules.services.serviceAi.mainServiceAi import AiService
-
-# Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-
-async def test_image_processing():
- """Test image processing with a simple base64 image."""
- print("🧪 Testing image processing...")
- logger.info("🧪 Testing image processing...")
-
- try:
- print("🔧 Initializing AI service...")
- logger.info("🔧 Initializing AI service...")
-
- # Initialize AI service
- ai_service = await AiService.create()
- print("✅ AI service initialized successfully")
- logger.info("✅ AI service initialized successfully")
-
- # Create a simple test image (1x1 pixel PNG in base64)
- test_image_base64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=="
- print(f"📸 Test image base64 length: {len(test_image_base64)}")
- logger.info(f"📸 Test image base64 length: {len(test_image_base64)}")
-
- # Test the readImage method directly
- print("📸 Testing readImage method...")
- logger.info("📸 Testing readImage method...")
-
- result = await ai_service.readImage(
- prompt="What do you see in this image?",
- imageData=test_image_base64,
- mimeType="image/png"
- )
-
- print(f"✅ Image processing result: {result}")
- logger.info(f"✅ Image processing result: {result}")
-
- return True
-
- except Exception as e:
- print(f"❌ Image processing test failed: {str(e)}")
- logger.error(f"❌ Image processing test failed: {str(e)}")
- import traceback
- traceback.print_exc()
- logger.error(f"Traceback: {traceback.format_exc()}")
- return False
-
-async def main():
- """Main function to run the image processing test."""
- print("🎯 Starting Image Processing Test")
- print("=" * 60)
- logger.info("🎯 Starting Image Processing Test")
- logger.info("=" * 60)
-
- success = await test_image_processing()
-
- if success:
- print("🎉 Image processing test completed successfully!")
- logger.info("🎉 Image processing test completed successfully!")
- else:
- print("❌ Image processing test failed!")
- logger.error("❌ Image processing test failed!")
-
- print("=" * 60)
- logger.info("=" * 60)
-
-if __name__ == "__main__":
- asyncio.run(main())
diff --git a/test_multifile_processing.py b/test_multifile_processing.py
deleted file mode 100644
index 737127bf..00000000
--- a/test_multifile_processing.py
+++ /dev/null
@@ -1,263 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for multi-file processing implementation.
-This script tests the new multi-file functionality without breaking existing single-file processing.
-"""
-
-import asyncio
-import json
-import logging
-from typing import Dict, Any, List
-
-# Setup logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-async def test_multi_file_detection():
- """Test AI-powered multi-file detection."""
- print("=== Testing Multi-File Detection ===")
-
- # Mock AI service for testing
- class MockAiService:
- async def call(self, request):
- class MockResponse:
- def __init__(self, content):
- self.content = content
- return MockResponse('{"is_multi_file": true, "strategy": "per_entity", "criteria": "customer_id", "file_naming_pattern": "{customer_name}_data.docx", "reasoning": "User wants separate files for each customer"}')
-
- class MockAiObjects:
- def __init__(self):
- self.call = MockAiService().call
-
- # Import the AI service
- try:
- from modules.services.serviceAi.mainServiceAi import AiService
-
- # Create mock service center
- class MockServiceCenter:
- def __init__(self):
- self.utils = MockUtils()
-
- class MockUtils:
- def debugLogToFile(self, message, category):
- print(f"[{category}] {message}")
-
- # Create AI service instance
- ai_service = AiService(MockServiceCenter())
- ai_service.aiObjects = MockAiObjects()
-
- # Test prompts
- test_prompts = [
- "Create one file for each customer in the document",
- "Split the data into separate files by category",
- "Generate individual files for each product",
- "Create a single report with all data",
- "Erstelle eine Datei für jeden Kunden", # German
- "Créer un fichier par section" # French
- ]
-
- for prompt in test_prompts:
- print(f"\nTesting prompt: '{prompt}'")
- try:
- analysis = await ai_service._analyzePromptIntent(prompt, ai_service)
- print(f" Analysis: {analysis}")
-
- if analysis.get("is_multi_file"):
- print(f" ✓ Detected as multi-file with strategy: {analysis.get('strategy')}")
- else:
- print(f" ✓ Detected as single-file")
-
- except Exception as e:
- print(f" ✗ Error: {str(e)}")
-
- print("\n=== Multi-File Detection Test Complete ===")
- return True
-
- except ImportError as e:
- print(f"Import error: {e}")
- print("Make sure you're running from the gateway directory")
- return False
- except Exception as e:
- print(f"Error during testing: {e}")
- return False
-
-async def test_json_schema_validation():
- """Test JSON schema validation for both single and multi-file."""
- print("\n=== Testing JSON Schema Validation ===")
-
- try:
- from modules.services.serviceGeneration.subJsonSchema import (
- get_document_subJsonSchema,
- get_multi_document_subJsonSchema,
- get_adaptive_json_schema,
- validate_json_document
- )
-
- # Test single document schema
- single_doc_schema = get_document_subJsonSchema()
- print(f"✓ Single document schema loaded: {len(single_doc_schema)} properties")
-
- # Test multi-document schema
- multi_doc_schema = get_multi_document_subJsonSchema()
- print(f"✓ Multi-document schema loaded: {len(multi_doc_schema)} properties")
-
- # Test adaptive schema selection
- single_analysis = {"is_multi_file": False}
- multi_analysis = {"is_multi_file": True}
-
- single_schema = get_adaptive_json_schema(single_analysis)
- multi_schema = get_adaptive_json_schema(multi_analysis)
-
- print(f"✓ Adaptive schema selection working")
- print(f" Single-file schema type: {single_schema.get('type', 'unknown')}")
- print(f" Multi-file schema type: {multi_schema.get('type', 'unknown')}")
-
- # Test validation with sample data
- single_doc_data = {
- "metadata": {"title": "Test Document"},
- "sections": [
- {
- "id": "section_1",
- "content_type": "paragraph",
- "elements": [{"text": "Test content"}],
- "order": 1
- }
- ]
- }
-
- multi_doc_data = {
- "metadata": {
- "title": "Test Documents",
- "splitStrategy": "per_entity"
- },
- "documents": [
- {
- "id": "doc_1",
- "title": "Document 1",
- "filename": "doc1.docx",
- "sections": [
- {
- "id": "section_1",
- "content_type": "paragraph",
- "elements": [{"text": "Content 1"}],
- "order": 1
- }
- ]
- }
- ]
- }
-
- single_valid = validate_json_document(single_doc_data)
- multi_valid = validate_json_document(multi_doc_data)
-
- print(f"✓ Single document validation: {'PASS' if single_valid else 'FAIL'}")
- print(f"✓ Multi-document validation: {'PASS' if multi_valid else 'FAIL'}")
-
- print("\n=== JSON Schema Validation Test Complete ===")
- return True
-
- except ImportError as e:
- print(f"Import error: {e}")
- return False
- except Exception as e:
- print(f"Error during schema testing: {e}")
- return False
-
-async def test_prompt_builder():
- """Test adaptive prompt building."""
- print("\n=== Testing Prompt Builder ===")
-
- try:
- from modules.services.serviceGeneration.subPromptBuilder import (
- buildAdaptiveExtractionPrompt,
- buildGenericExtractionPrompt
- )
-
- # Mock services
- class MockServices:
- def __init__(self):
- self.utils = MockUtils()
-
- class MockUtils:
- def debugLogToFile(self, message, category):
- print(f"[{category}] {message}")
-
- services = MockServices()
-
- # Test adaptive prompt building
- prompt_analysis = {
- "is_multi_file": True,
- "strategy": "per_entity",
- "criteria": "customer_id",
- "file_naming_pattern": "{customer_name}_data.docx"
- }
-
- adaptive_prompt = await buildAdaptiveExtractionPrompt(
- outputFormat="docx",
- userPrompt="Create one file for each customer",
- title="Customer Data",
- promptAnalysis=prompt_analysis,
- aiService=None,
- services=services
- )
-
- print(f"✓ Adaptive prompt generated: {len(adaptive_prompt)} characters")
- print(f" Contains multi-file instructions: {'documents' in adaptive_prompt}")
-
- # Test generic prompt building
- generic_prompt = await buildGenericExtractionPrompt(
- outputFormat="docx",
- userPrompt="Create a single report",
- title="Report",
- aiService=None,
- services=services
- )
-
- print(f"✓ Generic prompt generated: {len(generic_prompt)} characters")
- print(f" Contains single-file instructions: {'sections' in generic_prompt}")
-
- print("\n=== Prompt Builder Test Complete ===")
- return True
-
- except ImportError as e:
- print(f"Import error: {e}")
- return False
- except Exception as e:
- print(f"Error during prompt builder testing: {e}")
- return False
-
-async def main():
- """Run all tests."""
- print("Starting Multi-File Processing Tests...")
- print("=" * 50)
-
- tests = [
- test_multi_file_detection,
- test_json_schema_validation,
- test_prompt_builder
- ]
-
- results = []
- for test in tests:
- try:
- result = await test()
- results.append(result)
- except Exception as e:
- print(f"Test failed with exception: {e}")
- results.append(False)
-
- print("\n" + "=" * 50)
- print("Test Results Summary:")
- print(f" Tests run: {len(tests)}")
- print(f" Passed: {sum(results)}")
- print(f" Failed: {len(tests) - sum(results)}")
-
- if all(results):
- print("\n🎉 All tests passed! Multi-file processing is ready.")
- else:
- print("\n⚠️ Some tests failed. Check the implementation.")
-
- return all(results)
-
-if __name__ == "__main__":
- asyncio.run(main())