From cfeaccdf020a520e49bca27b61f68ee015b0e663 Mon Sep 17 00:00:00 2001 From: Ida Dittrich Date: Thu, 27 Nov 2025 13:18:07 +0100 Subject: [PATCH] finished pagination feature with filter and search options --- docs/code-documentation/aicore-component.md | 1046 +++++++ .../architecture-overview.md | 209 ++ .../connectors-component.md | 1241 +++++++++ .../datamodels-interfaces-component.md | 1832 +++++++++++++ docs/code-documentation/features-component.md | 981 +++++++ .../gateway-development-framework.md | 2281 ++++++++++++++++ .../interactive-workflow-planning-ui.md | 1133 ++++++++ docs/code-documentation/security-api.md | 508 ++++ docs/code-documentation/security-component.md | 1399 ++++++++++ .../services-api-reference.md | 2399 +++++++++++++++++ docs/code-documentation/services-component.md | 1530 +++++++++++ .../code-documentation/workflows-component.md | 1245 +++++++++ .../workflow-routes-frontend.md | 1677 ++++++++++++ .../20251127-113948-147_m_1_0_0/message.json | 19 + .../message_text.txt | 1 + .../20251127-113957-841_m_1_1_0/message.json | 19 + .../message_text.txt | 6 + .../20251127-113958-980_m_1_1_0/message.json | 19 + .../message_text.txt | 3 + .../20251127-114024-001_m_1_1_1/message.json | 19 + .../message_text.txt | 4 + .../document_001_metadata.json | 12 + .../document_001_test_document_memo.docx | Bin 0 -> 36938 bytes .../document_002_metadata.json | 12 + .../document_002_structured_content.json | 106 + .../20251127-114110-763_m_1_1_2/message.json | 19 + .../message_text.txt | 4 + .../document_001_memo_analysis_report.txt | 24 + .../document_001_metadata.json | 12 + .../document_002_metadata.json | 12 + .../document_002_structured_content_1.json | 134 + .../20251127-114147-275_m_1_1_3/message.json | 19 + .../message_text.txt | 4 + .../document_001_memo_analysis_summary.txt | 10 + .../document_001_metadata.json | 12 + .../document_002_metadata.json | 12 + .../document_002_structured_content_2.json | 52 + .../20251127-114229-639_m_1_1_4/message.json | 19 + .../message_text.txt | 4 + .../document_001_memo_analysis_report_1.txt | 22 + .../document_001_metadata.json | 12 + .../document_002_metadata.json | 12 + .../document_002_structured_content_3.json | 136 + .../20251127-114301-221_m_1_1_5/message.json | 19 + .../message_text.txt | 4 + .../document_001_memo_summary.txt | 12 + .../document_001_metadata.json | 12 + .../document_002_metadata.json | 12 + .../document_002_structured_content_4.json | 62 + .../20251127-114309-479_m_1_1_0/message.json | 19 + .../message_text.txt | 4 + .../20251127-114310-696_m_1_0_0/message.json | 19 + .../message_text.txt | 4 + ...251127-113945-043-userintention_prompt.txt | 28 + ...1127-113947-044-userintention_response.txt | 8 + ...51127-113949-045-intentanalysis_prompt.txt | 30 + ...127-113951-046-intentanalysis_response.txt | 14 + .../20251127-113951-047-taskplan_prompt.txt | 100 + .../20251127-113957-048-taskplan_response.txt | 20 + .../20251127-113959-049-actionplan_prompt.txt | 87 + ...0251127-114003-050-actionplan_response.txt | 10 + .../20251127-114003-051-paramplan_prompt.txt | 62 + ...20251127-114005-052-paramplan_response.txt | 10 + ...-114007-053-document_generation_prompt.txt | 84 + ...14016-054-document_generation_response.txt | 89 + ...7-055-document_generation_final_result.txt | 105 + ...127-114022-056-renderer_styling_prompt.txt | 66 + ...7-114022-057-renderer_styling_response.txt | 56 + ...14022-058-document_generation_response.txt | 4 + ...27-114024-059-contentvalidation_prompt.txt | 57 + ...-114031-060-contentvalidation_response.txt | 24 + .../20251127-114031-061-refinement_prompt.txt | 79 + ...0251127-114032-062-refinement_response.txt | 6 + .../20251127-114033-063-actionplan_prompt.txt | 92 + ...0251127-114037-064-actionplan_response.txt | 10 + .../20251127-114038-065-paramplan_prompt.txt | 62 + ...20251127-114040-066-paramplan_response.txt | 10 + ...1127-114102-067-extraction_merged_text.txt | 103 + ...-114103-068-document_generation_prompt.txt | 189 ++ ...14109-069-document_generation_response.txt | 117 + ...9-070-document_generation_final_result.txt | 133 + ...14109-071-document_generation_response.txt | 4 + ...27-114110-072-contentvalidation_prompt.txt | 57 + ...-114115-073-contentvalidation_response.txt | 24 + .../20251127-114115-074-refinement_prompt.txt | 77 + ...0251127-114116-075-refinement_response.txt | 6 + .../20251127-114118-076-actionplan_prompt.txt | 96 + ...0251127-114127-077-actionplan_response.txt | 10 + .../20251127-114127-078-paramplan_prompt.txt | 62 + ...20251127-114129-079-paramplan_response.txt | 10 + ...1127-114140-080-extraction_merged_text.txt | 11 + ...-114141-081-document_generation_prompt.txt | 97 + ...14146-082-document_generation_response.txt | 49 + ...6-083-document_generation_final_result.txt | 51 + ...14146-084-document_generation_response.txt | 4 + ...27-114147-085-contentvalidation_prompt.txt | 57 + ...-114155-086-contentvalidation_response.txt | 24 + .../20251127-114155-087-refinement_prompt.txt | 75 + ...0251127-114157-088-refinement_response.txt | 6 + .../20251127-114158-089-actionplan_prompt.txt | 100 + ...0251127-114159-090-actionplan_response.txt | 10 + .../20251127-114200-091-paramplan_prompt.txt | 62 + ...20251127-114201-092-paramplan_response.txt | 10 + ...1127-114214-093-extraction_merged_text.txt | 65 + ...-114214-094-document_generation_prompt.txt | 151 ++ ...14228-095-document_generation_response.txt | 121 + ...8-096-document_generation_final_result.txt | 135 + ...14228-097-document_generation_response.txt | 4 + ...27-114229-098-contentvalidation_prompt.txt | 57 + ...-114235-099-contentvalidation_response.txt | 24 + .../20251127-114235-100-refinement_prompt.txt | 80 + ...0251127-114237-101-refinement_response.txt | 6 + .../20251127-114238-102-actionplan_prompt.txt | 104 + ...0251127-114240-103-actionplan_response.txt | 10 + .../20251127-114241-104-paramplan_prompt.txt | 62 + ...20251127-114242-105-paramplan_response.txt | 10 + ...1127-114250-106-extraction_merged_text.txt | 14 + ...-114251-107-document_generation_prompt.txt | 100 + ...14259-108-document_generation_response.txt | 57 + ...9-109-document_generation_final_result.txt | 61 + ...14259-110-document_generation_response.txt | 4 + ...27-114301-111-contentvalidation_prompt.txt | 57 + ...-114306-112-contentvalidation_response.txt | 24 + .../20251127-114306-113-refinement_prompt.txt | 75 + ...0251127-114308-114-refinement_response.txt | 6 + 125 files changed, 22171 insertions(+) create mode 100644 docs/code-documentation/aicore-component.md create mode 100644 docs/code-documentation/architecture-overview.md create mode 100644 docs/code-documentation/connectors-component.md create mode 100644 docs/code-documentation/datamodels-interfaces-component.md create mode 100644 docs/code-documentation/features-component.md create mode 100644 docs/code-documentation/gateway-development-framework.md create mode 100644 docs/code-documentation/interactive-workflow-planning-ui.md create mode 100644 docs/code-documentation/security-api.md create mode 100644 docs/code-documentation/security-component.md create mode 100644 docs/code-documentation/services-api-reference.md create mode 100644 docs/code-documentation/services-component.md create mode 100644 docs/code-documentation/workflows-component.md create mode 100644 docs/frontend-documentation/workflow-routes-frontend.md create mode 100644 logs/debug/messages/20251127-113948-147_m_1_0_0/message.json create mode 100644 logs/debug/messages/20251127-113948-147_m_1_0_0/message_text.txt create mode 100644 logs/debug/messages/20251127-113957-841_m_1_1_0/message.json create mode 100644 logs/debug/messages/20251127-113957-841_m_1_1_0/message_text.txt create mode 100644 logs/debug/messages/20251127-113958-980_m_1_1_0/message.json create mode 100644 logs/debug/messages/20251127-113958-980_m_1_1_0/message_text.txt create mode 100644 logs/debug/messages/20251127-114024-001_m_1_1_1/message.json create mode 100644 logs/debug/messages/20251127-114024-001_m_1_1_1/message_text.txt create mode 100644 logs/debug/messages/20251127-114024-001_m_1_1_1/round1_task1_action1_results/document_001_metadata.json create mode 100644 logs/debug/messages/20251127-114024-001_m_1_1_1/round1_task1_action1_results/document_001_test_document_memo.docx create mode 100644 logs/debug/messages/20251127-114024-001_m_1_1_1/round1_task1_action1_results/document_002_metadata.json create mode 100644 logs/debug/messages/20251127-114024-001_m_1_1_1/round1_task1_action1_results/document_002_structured_content.json create mode 100644 logs/debug/messages/20251127-114110-763_m_1_1_2/message.json create mode 100644 logs/debug/messages/20251127-114110-763_m_1_1_2/message_text.txt create mode 100644 logs/debug/messages/20251127-114110-763_m_1_1_2/round1_task1_action2_results/document_001_memo_analysis_report.txt create mode 100644 logs/debug/messages/20251127-114110-763_m_1_1_2/round1_task1_action2_results/document_001_metadata.json create mode 100644 logs/debug/messages/20251127-114110-763_m_1_1_2/round1_task1_action2_results/document_002_metadata.json create mode 100644 logs/debug/messages/20251127-114110-763_m_1_1_2/round1_task1_action2_results/document_002_structured_content_1.json create mode 100644 logs/debug/messages/20251127-114147-275_m_1_1_3/message.json create mode 100644 logs/debug/messages/20251127-114147-275_m_1_1_3/message_text.txt create mode 100644 logs/debug/messages/20251127-114147-275_m_1_1_3/round1_task1_action3_results/document_001_memo_analysis_summary.txt create mode 100644 logs/debug/messages/20251127-114147-275_m_1_1_3/round1_task1_action3_results/document_001_metadata.json create mode 100644 logs/debug/messages/20251127-114147-275_m_1_1_3/round1_task1_action3_results/document_002_metadata.json create mode 100644 logs/debug/messages/20251127-114147-275_m_1_1_3/round1_task1_action3_results/document_002_structured_content_2.json create mode 100644 logs/debug/messages/20251127-114229-639_m_1_1_4/message.json create mode 100644 logs/debug/messages/20251127-114229-639_m_1_1_4/message_text.txt create mode 100644 logs/debug/messages/20251127-114229-639_m_1_1_4/round1_task1_action4_results/document_001_memo_analysis_report_1.txt create mode 100644 logs/debug/messages/20251127-114229-639_m_1_1_4/round1_task1_action4_results/document_001_metadata.json create mode 100644 logs/debug/messages/20251127-114229-639_m_1_1_4/round1_task1_action4_results/document_002_metadata.json create mode 100644 logs/debug/messages/20251127-114229-639_m_1_1_4/round1_task1_action4_results/document_002_structured_content_3.json create mode 100644 logs/debug/messages/20251127-114301-221_m_1_1_5/message.json create mode 100644 logs/debug/messages/20251127-114301-221_m_1_1_5/message_text.txt create mode 100644 logs/debug/messages/20251127-114301-221_m_1_1_5/round1_task1_action5_results/document_001_memo_summary.txt create mode 100644 logs/debug/messages/20251127-114301-221_m_1_1_5/round1_task1_action5_results/document_001_metadata.json create mode 100644 logs/debug/messages/20251127-114301-221_m_1_1_5/round1_task1_action5_results/document_002_metadata.json create mode 100644 logs/debug/messages/20251127-114301-221_m_1_1_5/round1_task1_action5_results/document_002_structured_content_4.json create mode 100644 logs/debug/messages/20251127-114309-479_m_1_1_0/message.json create mode 100644 logs/debug/messages/20251127-114309-479_m_1_1_0/message_text.txt create mode 100644 logs/debug/messages/20251127-114310-696_m_1_0_0/message.json create mode 100644 logs/debug/messages/20251127-114310-696_m_1_0_0/message_text.txt create mode 100644 logs/debug/prompts/20251127-113945-043-userintention_prompt.txt create mode 100644 logs/debug/prompts/20251127-113947-044-userintention_response.txt create mode 100644 logs/debug/prompts/20251127-113949-045-intentanalysis_prompt.txt create mode 100644 logs/debug/prompts/20251127-113951-046-intentanalysis_response.txt create mode 100644 logs/debug/prompts/20251127-113951-047-taskplan_prompt.txt create mode 100644 logs/debug/prompts/20251127-113957-048-taskplan_response.txt create mode 100644 logs/debug/prompts/20251127-113959-049-actionplan_prompt.txt create mode 100644 logs/debug/prompts/20251127-114003-050-actionplan_response.txt create mode 100644 logs/debug/prompts/20251127-114003-051-paramplan_prompt.txt create mode 100644 logs/debug/prompts/20251127-114005-052-paramplan_response.txt create mode 100644 logs/debug/prompts/20251127-114007-053-document_generation_prompt.txt create mode 100644 logs/debug/prompts/20251127-114016-054-document_generation_response.txt create mode 100644 logs/debug/prompts/20251127-114017-055-document_generation_final_result.txt create mode 100644 logs/debug/prompts/20251127-114022-056-renderer_styling_prompt.txt create mode 100644 logs/debug/prompts/20251127-114022-057-renderer_styling_response.txt create mode 100644 logs/debug/prompts/20251127-114022-058-document_generation_response.txt create mode 100644 logs/debug/prompts/20251127-114024-059-contentvalidation_prompt.txt create mode 100644 logs/debug/prompts/20251127-114031-060-contentvalidation_response.txt create mode 100644 logs/debug/prompts/20251127-114031-061-refinement_prompt.txt create mode 100644 logs/debug/prompts/20251127-114032-062-refinement_response.txt create mode 100644 logs/debug/prompts/20251127-114033-063-actionplan_prompt.txt create mode 100644 logs/debug/prompts/20251127-114037-064-actionplan_response.txt create mode 100644 logs/debug/prompts/20251127-114038-065-paramplan_prompt.txt create mode 100644 logs/debug/prompts/20251127-114040-066-paramplan_response.txt create mode 100644 logs/debug/prompts/20251127-114102-067-extraction_merged_text.txt create mode 100644 logs/debug/prompts/20251127-114103-068-document_generation_prompt.txt create mode 100644 logs/debug/prompts/20251127-114109-069-document_generation_response.txt create mode 100644 logs/debug/prompts/20251127-114109-070-document_generation_final_result.txt create mode 100644 logs/debug/prompts/20251127-114109-071-document_generation_response.txt create mode 100644 logs/debug/prompts/20251127-114110-072-contentvalidation_prompt.txt create mode 100644 logs/debug/prompts/20251127-114115-073-contentvalidation_response.txt create mode 100644 logs/debug/prompts/20251127-114115-074-refinement_prompt.txt create mode 100644 logs/debug/prompts/20251127-114116-075-refinement_response.txt create mode 100644 logs/debug/prompts/20251127-114118-076-actionplan_prompt.txt create mode 100644 logs/debug/prompts/20251127-114127-077-actionplan_response.txt create mode 100644 logs/debug/prompts/20251127-114127-078-paramplan_prompt.txt create mode 100644 logs/debug/prompts/20251127-114129-079-paramplan_response.txt create mode 100644 logs/debug/prompts/20251127-114140-080-extraction_merged_text.txt create mode 100644 logs/debug/prompts/20251127-114141-081-document_generation_prompt.txt create mode 100644 logs/debug/prompts/20251127-114146-082-document_generation_response.txt create mode 100644 logs/debug/prompts/20251127-114146-083-document_generation_final_result.txt create mode 100644 logs/debug/prompts/20251127-114146-084-document_generation_response.txt create mode 100644 logs/debug/prompts/20251127-114147-085-contentvalidation_prompt.txt create mode 100644 logs/debug/prompts/20251127-114155-086-contentvalidation_response.txt create mode 100644 logs/debug/prompts/20251127-114155-087-refinement_prompt.txt create mode 100644 logs/debug/prompts/20251127-114157-088-refinement_response.txt create mode 100644 logs/debug/prompts/20251127-114158-089-actionplan_prompt.txt create mode 100644 logs/debug/prompts/20251127-114159-090-actionplan_response.txt create mode 100644 logs/debug/prompts/20251127-114200-091-paramplan_prompt.txt create mode 100644 logs/debug/prompts/20251127-114201-092-paramplan_response.txt create mode 100644 logs/debug/prompts/20251127-114214-093-extraction_merged_text.txt create mode 100644 logs/debug/prompts/20251127-114214-094-document_generation_prompt.txt create mode 100644 logs/debug/prompts/20251127-114228-095-document_generation_response.txt create mode 100644 logs/debug/prompts/20251127-114228-096-document_generation_final_result.txt create mode 100644 logs/debug/prompts/20251127-114228-097-document_generation_response.txt create mode 100644 logs/debug/prompts/20251127-114229-098-contentvalidation_prompt.txt create mode 100644 logs/debug/prompts/20251127-114235-099-contentvalidation_response.txt create mode 100644 logs/debug/prompts/20251127-114235-100-refinement_prompt.txt create mode 100644 logs/debug/prompts/20251127-114237-101-refinement_response.txt create mode 100644 logs/debug/prompts/20251127-114238-102-actionplan_prompt.txt create mode 100644 logs/debug/prompts/20251127-114240-103-actionplan_response.txt create mode 100644 logs/debug/prompts/20251127-114241-104-paramplan_prompt.txt create mode 100644 logs/debug/prompts/20251127-114242-105-paramplan_response.txt create mode 100644 logs/debug/prompts/20251127-114250-106-extraction_merged_text.txt create mode 100644 logs/debug/prompts/20251127-114251-107-document_generation_prompt.txt create mode 100644 logs/debug/prompts/20251127-114259-108-document_generation_response.txt create mode 100644 logs/debug/prompts/20251127-114259-109-document_generation_final_result.txt create mode 100644 logs/debug/prompts/20251127-114259-110-document_generation_response.txt create mode 100644 logs/debug/prompts/20251127-114301-111-contentvalidation_prompt.txt create mode 100644 logs/debug/prompts/20251127-114306-112-contentvalidation_response.txt create mode 100644 logs/debug/prompts/20251127-114306-113-refinement_prompt.txt create mode 100644 logs/debug/prompts/20251127-114308-114-refinement_response.txt diff --git a/docs/code-documentation/aicore-component.md b/docs/code-documentation/aicore-component.md new file mode 100644 index 00000000..336eb71d --- /dev/null +++ b/docs/code-documentation/aicore-component.md @@ -0,0 +1,1046 @@ +# AI Core Component Documentation + +## Overview + +The `aicore` module is the **centralized AI infrastructure layer** that provides a **plugin-based architecture** for integrating multiple AI providers (OpenAI, Anthropic, Perplexity, Tavily) into the application. It acts as an abstraction layer between high-level AI services and specific AI provider APIs, enabling dynamic model discovery, intelligent model selection, and automatic failover. + +**Key Responsibilities:** +- Dynamic discovery and registration of AI connectors (plugins) +- Model registry with unified model metadata +- Intelligent model selection based on operation type, context size, and optimization criteria +- Automatic failover between models +- Standardized interface for AI operations across all providers + +## Architecture + +### System Architecture Overview + +```mermaid +graph TB + subgraph "Application Layer" + Routes[FastAPI Routes
routeWorkflows.py
routeChatPlayground.py] + end + + subgraph "Service Layer" + AiService[AiService
mainServiceAi.py] + Methods[callAiPlanning
callAiDocuments
callAiText] + AiService --> Methods + end + + subgraph "Interface Layer" + AiObjects[AiObjects
interfaceAiObjects.py] + CallHandler[call request
Handles failover & model calls] + AiObjects --> CallHandler + end + + subgraph "AI Core Layer" + Registry[ModelRegistry
discoverConnectors
registerConnector
getAvailableModels] + Selector[ModelSelector
selectModel
getFailoverModelList
scoring logic] + Base[BaseConnectorAi
getModels
getConnectorType
getCachedModels] + + Registry -.-> Selector + Selector -.-> Base + end + + subgraph "Plugin Connectors" + OpenAI[aicorePluginOpenai] + Anthropic[aicorePluginAnthropic] + Perplexity[aicorePluginPerplexity] + Tavily[aicorePluginTavily] + end + + subgraph "AI Provider APIs" + OpenAI_API[OpenAI API
api.openai.com] + Anthropic_API[Anthropic API
api.anthropic.com] + Perplexity_API[Perplexity API
api.perplexity.ai] + Tavily_API[Tavily API
api.tavily.com] + end + + Routes --> AiService + AiService --> AiObjects + AiObjects --> Registry + AiObjects --> Selector + + Base --> OpenAI + Base --> Anthropic + Base --> Perplexity + Base --> Tavily + + OpenAI --> OpenAI_API + Anthropic --> Anthropic_API + Perplexity --> Perplexity_API + Tavily --> Tavily_API + + style Routes fill:#e1f5ff + style AiService fill:#fff3e0 + style AiObjects fill:#f3e5f5 + style Registry fill:#e8f5e9 + style Selector fill:#e8f5e9 + style Base fill:#e8f5e9 + style OpenAI fill:#fff9c4 + style Anthropic fill:#fff9c4 + style Perplexity fill:#fff9c4 + style Tavily fill:#fff9c4 +``` + +### Component Structure + +The aicore module is organized into several key files: + +- **aicoreBase.py**: Defines the abstract base class that all AI connectors must inherit from, establishing the contract for plugin implementations +- **aicoreModelRegistry.py**: Manages the centralized registry of all available AI models across all connectors +- **aicoreModelSelector.py**: Implements the intelligent model selection algorithm based on multiple criteria +- **aicorePlugin*.py files**: Individual connector implementations for each AI provider (OpenAI, Anthropic, Perplexity, Tavily, and potentially internal systems) + +Each plugin file follows the naming convention `aicorePlugin.py`, which enables the automatic discovery mechanism to find and register them at startup without requiring manual configuration. + +### Core Components + +#### 1. **BaseConnectorAi** (`aicoreBase.py`) +The abstract base class that establishes the contract for all AI connector implementations. This class ensures that every AI provider plugin implements a consistent interface, making the system extensible and maintainable. + +**Core Responsibilities:** + +The base connector defines several essential methods that every plugin must implement: + +- **Model Discovery**: Each connector provides its list of available models through `getModels()`, which returns comprehensive metadata about each model including capabilities, costs, and performance characteristics +- **Connector Identification**: The `getConnectorType()` method returns a unique identifier string for the connector (such as "openai" or "anthropic"), used throughout the system for routing and logging +- **Cached Model Access**: The `getCachedModels()` method provides performance optimization by returning cached model metadata with automatic TTL (Time-To-Live) validation +- **Model Lookup**: Utility methods like `getModelByDisplayName()` enable quick retrieval of specific models by their unique identifiers +- **Cache Management**: The `clearCache()` method allows manual cache invalidation when model configurations need immediate refresh + +**Critical Design Principle - Unique Display Names:** + +The system enforces a strict uniqueness constraint on model display names across all connectors. While the `name` field (used for actual API calls) can be duplicated across different model instances (for example, "gpt-4o" might have multiple instances for different use cases), the `displayName` must be globally unique. This serves as the primary key in the model registry and prevents configuration conflicts. Examples of unique display names include "OpenAI GPT-4o", "OpenAI GPT-4o Instance Vision", and "Anthropic Claude 3 Opus". + +**Performance Optimization Through Caching:** + +To minimize unnecessary operations, the base connector implements a sophisticated caching mechanism with a 5-minute TTL. When `getCachedModels()` is called, the system checks if cached data exists and if the last update timestamp is within the 300-second window. If the cache is still valid, it returns the cached models immediately, avoiding the overhead of regenerating model metadata. If the cache has expired, it automatically refreshes by calling `getModels()` and updates both the cache and timestamp. This approach significantly reduces computational overhead during high-frequency operations while ensuring data freshness. + +#### 2. **ModelRegistry** (`aicoreModelRegistry.py`) +The centralized registry serves as the single source of truth for all available AI models in the system. It acts as a dynamic inventory management system, automatically discovering, validating, and organizing models from all registered connectors. + +**Automatic Plugin Discovery:** + +The registry implements a sophisticated auto-discovery mechanism that scans the aicore directory for any files matching the pattern `aicorePlugin*.py`. This pattern-based discovery enables zero-configuration extensibility - developers can add new AI providers simply by creating a properly named file, and the system automatically detects and integrates it during startup. The discovery process imports each plugin module, inspects its classes to find those inheriting from BaseConnectorAi, and instantiates them for registration. + +**Dynamic Registration and Validation:** + +When a connector is registered through `registerConnector()`, the registry performs critical validation steps. It calls the connector's `getCachedModels()` method to retrieve all available models, then validates that each model's `displayName` is unique across the entire registry. If a duplicate is detected, the registration fails with a detailed error message identifying both the existing and conflicting model configurations. This strict validation prevents configuration errors that could lead to unpredictable model selection behavior. + +**Intelligent Refresh Mechanism:** + +The registry maintains model freshness through a dual-refresh strategy. First, it implements automatic periodic refresh with a 5-minute interval - when any query method is called, the system checks if the last refresh timestamp exceeds this threshold and triggers an automatic update if needed. Second, it provides a `refreshModels()` method with a force parameter, allowing manual refresh operations that bypass the TTL check. This is particularly useful during development or when connector configurations change dynamically. + +**Comprehensive Query Interface:** + +The registry exposes a rich query interface for model retrieval: + +- **Direct Lookup**: `getModel(displayName)` provides O(1) access to specific models using their unique identifier +- **Complete Inventory**: `getModels()` returns the full catalog of registered models +- **Connector Filtering**: `getModelsByConnector(connectorType)` enables retrieval of all models from a specific provider +- **Availability Filtering**: `getAvailableModels()` returns only models currently marked as available, filtering out any disabled or problematic models +- **Reverse Lookup**: `getConnectorForModel(displayName)` retrieves the connector instance responsible for a specific model, enabling direct connector interaction +- **Statistical Analysis**: `getModelStats()` provides aggregate metrics including model counts by connector, capability, and priority + +**Singleton Pattern:** + +The registry is implemented as a global singleton instance (modelRegistry) that can be imported and used throughout the application, ensuring consistent model access and preventing duplicate registries. + +#### 3. **ModelSelector** (`aicoreModelSelector.py`) +The intelligent model selection engine implements a sophisticated scoring algorithm that evaluates available models against multiple criteria to determine the optimal choice for each AI operation. Rather than using hard-coded rules or simple priority lists, the selector employs a weighted scoring system that considers operation compatibility, resource constraints, and performance preferences to create a ranked failover list. + +**Selection Algorithm:** + +```mermaid +flowchart TD + Start[AI Call Request] --> GetModels[Get Available Models
from Registry] + GetModels --> OpFilter[Filter by Operation Type
MUST support requested operation] + OpFilter --> SizeFilter[Filter by Prompt Size
Prompt must fit within 80% of context] + SizeFilter --> Scoring[Calculate Score for Each Model] + + Scoring --> Score1[Operation Type Rating × 1000
PRIMARY sorting criteria] + Scoring --> Score2[Size Rating
How well prompt+context fits] + Scoring --> Score3[Processing Mode Rating
Compatibility score] + Scoring --> Score4[Priority Rating
Speed/Quality/Cost preference] + + Score1 --> Combine[Combine All Scores] + Score2 --> Combine + Score3 --> Combine + Score4 --> Combine + + Combine --> Sort[Sort by Total Score
Descending] + Sort --> Failover[Create Failover List] + Failover --> Return[Return Best Model
+ Fallback Models] + + style Start fill:#e1f5ff + style OpFilter fill:#fff3e0 + style SizeFilter fill:#fff3e0 + style Scoring fill:#f3e5f5 + style Sort fill:#e8f5e9 + style Return fill:#c8e6c9 +``` + +**Detailed Algorithm Process:** + +**Phase 1: Operation Type Filtering (Mandatory Constraint)** + +The first filtering phase is absolute - a model must explicitly support the requested operation type to be considered. Each model in the registry declares its supported operations through an `operationTypes` list, where each operation (such as PLAN, DATA_ANALYSE, DATA_GENERATE, IMAGE_ANALYSE) is associated with a performance rating from 1-10. Models lacking the required operation type are immediately excluded from consideration, regardless of their other characteristics. This ensures that specialized operations like image analysis are only routed to vision-capable models, and web search operations are directed to appropriate connectors. + +**Phase 2: Context Size Validation (Resource Constraint)** + +After operation filtering, the selector validates that each remaining model can physically accommodate the input. The system calculates the approximate token count for both the prompt and context (using a 4-byte-per-token approximation), then compares this against 80% of each model's declared context length. This 80% threshold provides a safety margin for message formatting overhead, system prompts, and output token reservation. Models with insufficient context capacity are filtered out, preventing runtime failures due to context length violations. For models with zero context length (indicating unlimited capacity), this check is bypassed. + +**Phase 3: Multi-Factor Scoring (Quality Assessment)** + +Each model that passes both mandatory filters receives a composite score calculated from four weighted components: + +- **Operation Type Rating (Primary Factor)**: Multiplied by 1000 to establish it as the dominant sorting criterion. A model rated 9/10 for DATA_ANALYSE will score 9000 points from this factor alone, while a model rated 7/10 scores only 7000. This massive weighting ensures that operation-specific optimization takes precedence over other factors. + +- **Size Efficiency Rating**: Measures how efficiently the model utilizes its context window. If the prompt+context fits comfortably (total size ≤ 80% of capacity), the rating equals (actual size / maximum allowed size), rewarding larger models for handling substantial content. If the content exceeds the limit (shouldn't happen after filtering, but serves as safety), the rating inverts to (maximum / actual), penalizing undersized models. + +- **Processing Mode Compatibility**: Evaluates alignment between the model's processing mode (BASIC, ADVANCED, DETAILED) and the requested mode. Perfect matches score 1.0, while compatible mismatches receive fractional scores (e.g., 0.5 for ADVANCED model handling BASIC request). This allows flexible matching while preferring mode-appropriate models. + +- **Priority Optimization**: Applies user preference for speed, quality, or cost efficiency. For SPEED priority, models with high `speedRating` values score better. For QUALITY, `qualityRating` dominates. For COST, the system inverts cost metrics to favor inexpensive models while adding weighted bonuses for speed and quality. BALANCED priority treats all factors equally. + +**Phase 4: Ranking and Failover List Generation** + +After scoring, models are sorted in descending order by their composite scores. The resulting list represents an optimal failover chain - the first model is the best match for the specific request, while subsequent models serve as progressively less optimal but still viable alternatives. This ranked list is returned for use by the call handler, which attempts models in order until one succeeds. + +**Primary Methods:** + +The selector exposes two main methods: `selectModel()` returns only the top-ranked model (index 0 of the failover list), while `getFailoverModelList()` returns the complete ranked list for failover handling. Both methods accept the same parameters: the prompt text, context data, AI call options, and the list of available models. + +**Global Singleton:** + +Like the registry, the selector is implemented as a global singleton (modelSelector) for consistent access throughout the application. + +#### 4. **Plugin Connectors** (`aicorePlugin*.py`) +Each plugin connector represents a concrete implementation of the BaseConnectorAi interface, tailored to a specific AI provider's API specifications and capabilities. These plugins serve as translation layers between the system's standardized interface and the provider-specific API requirements. + +**Architectural Pattern:** + +Each connector follows a consistent architectural pattern with four main components: + +**Initialization and Configuration:** +The constructor loads provider-specific configuration from the application's environment settings, including API keys, endpoint URLs, and any provider-specific parameters. It also initializes an HTTP client (typically using httpx) with appropriate timeouts, retry logic, and authentication headers. This separation of configuration from code enables easy deployment across different environments without code changes. + +**Connector Identification:** +The `getConnectorType()` method returns a simple string identifier for the connector, such as "openai", "anthropic", "perplexity", or "tavily". This identifier is used throughout the system for logging, routing, and model attribution. It must be unique across all connectors and is stored in every model's metadata. + +**Model Catalog Definition:** +The `getModels()` method returns a comprehensive list of AiModel instances, each representing a distinct AI model or model configuration. Each model entry includes: + +- **Identity**: Unique `displayName` (e.g., "OpenAI GPT-4o") and API `name` (e.g., "gpt-4o") +- **Technical Specifications**: Context window size in tokens, maximum output tokens, default temperature +- **Economic Metrics**: Cost per 1000 input tokens and output tokens, enabling accurate cost tracking +- **Performance Characteristics**: Speed rating (1-10) indicating response time, quality rating (1-10) for output quality +- **Operational Capabilities**: List of supported operation types with performance ratings for each +- **Execution Reference**: A callable reference (`functionCall`) pointing to the method that handles API communication +- **Strategic Attributes**: Priority classification (SPEED, QUALITY, COST, BALANCED) and processing mode (BASIC, ADVANCED, DETAILED) + +**API Communication Implementation:** + +Connectors implement one or more call methods (such as `callAiBasic()`, `callAiImage()`, or specialized methods) that handle the actual communication with the AI provider's API. These methods: + +- Accept standardized `AiModelCall` objects containing messages, model reference, and options +- Transform the standardized request format into the provider's specific API format (different providers use varying JSON schemas for requests) +- Execute HTTP requests with appropriate error handling, timeouts, and retry logic +- Parse provider-specific response formats back into standardized `AiModelResponse` objects +- Calculate actual costs based on token usage reported by the provider +- Handle provider-specific error codes and translate them into meaningful exceptions + +**Provider-Specific Adaptations:** + +Each connector adapts to its provider's unique characteristics: + +- **OpenAI Connectors**: Support both text completion and vision capabilities, handle rate limiting, manage model versioning +- **Anthropic Connectors**: Implement Claude-specific message formatting, handle thinking tokens, manage conversation context +- **Perplexity Connectors**: Integrate web search capabilities, handle citation extraction, manage search-enhanced responses +- **Tavily Connectors**: Implement web crawling protocols, handle structured data extraction, manage crawl depth and scope + +## Connection to serviceAi + +The `aicore` module is the **foundation layer** that `serviceAi` (AI Service) builds upon. Here's how they connect: + +### Integration Flow + +```mermaid +sequenceDiagram + participant App as Application
(app.py) + participant Service as Service Layer
(mainServiceAi.py) + participant Interface as Interface Layer
(interfaceAiObjects.py) + participant Core as AI Core
(aicore/) + participant Provider as AI Provider APIs + + App->>Service: HTTP Request + Service->>Interface: callAiDocuments/Planning + Interface->>Core: AiCallRequest + Core->>Core: Model Selection + Core->>Provider: API Call + Provider-->>Core: API Response + Core-->>Interface: AiCallResponse + Interface-->>Service: Processed Result + Service-->>App: HTTP Response +``` + +### Initialization Sequence + +```mermaid +sequenceDiagram + participant App as app.py + participant Lifecycle as featuresLifecycle + participant Service as AiService + participant AiObjects as AiObjects + participant Registry as ModelRegistry + participant Plugins as Plugin Connectors + + App->>Lifecycle: lifespan startup + Lifecycle->>Lifecycle: start() + Lifecycle->>Service: create AiService + Service->>AiObjects: AiObjects.create() + + AiObjects->>AiObjects: _discoverAndRegisterConnectors() + AiObjects->>Registry: discoverConnectors() + + Registry->>Registry: Scan aicore folder
for aicorePlugin*.py + Registry->>Plugins: Import & instantiate connectors + + loop For each discovered connector + AiObjects->>Registry: registerConnector(connector) + Registry->>Plugins: connector.getModels() + Plugins-->>Registry: List[AiModel] + Registry->>Registry: Validate displayName uniqueness + Registry->>Registry: Store models with displayName as key + end + + Registry-->>AiObjects: Registration complete + AiObjects-->>Service: Initialized with all models + Service-->>Lifecycle: AiService ready + Lifecycle-->>App: Startup complete + + Note over Registry: Models cached for 5 minutes
with auto-refresh +``` + +### Service-to-Core Communication + +The communication between the service layer and aicore follows a well-defined request-response pattern with multiple abstraction layers, each serving a specific purpose in the overall architecture. + +**High-Level Service Operations:** + +The AiService class (in `mainServiceAi.py`) provides domain-specific methods that application features and workflows can invoke. These methods abstract away the complexity of AI operations, presenting simple interfaces like `callAiPlanning()` for task planning and `callAiDocuments()` for document processing. + +When `callAiPlanning()` is invoked, it handles prompt construction by integrating placeholders and building a complete prompt string. It then creates an AiCallRequest object configured specifically for planning operations - with operation type set to PLAN, priority set to QUALITY (since planning requires accurate reasoning), and processing mode set to DETAILED (to ensure comprehensive analysis). This request is passed to `aiObjects.call()`, initiating the core AI processing chain. + +The `callAiDocuments()` method follows a similar pattern but with more flexibility - it accepts custom options, handles document attachments, and can process various output formats. It manages document extraction, prompt building with continuation contexts, and result formatting, while delegating the actual AI communication to the aicore layer. + +**Interface Layer Orchestration:** + +The AiObjects class (in `interfaceAiObjects.py`) serves as the orchestration layer, coordinating between the service layer's high-level requests and the aicore's model selection and execution capabilities. When its `call()` method receives an AiCallRequest, it follows a three-phase process: + +**Phase 1 - Model Selection:** +The interface queries the modelRegistry to retrieve all currently available models. It then invokes the modelSelector's `getFailoverModelList()` method, passing the request's prompt, context, and options. The selector returns a prioritized list of suitable models, ranked from most to least optimal for the specific request characteristics. + +**Phase 2 - Failover Execution:** +The interface iterates through the failover list, attempting each model in sequence. For each attempt, it calls the internal `_callWithModel()` method, which constructs a standardized AiModelCall object and invokes the model's `functionCall` reference. This reference points to the connector's API communication method, which executes the actual HTTP request to the AI provider. + +If the model call succeeds, the interface immediately returns the AiCallResponse to the service layer, completing the request. If an exception occurs (due to API errors, rate limits, or other issues), the interface logs the error with detailed context and proceeds to the next model in the failover list. + +**Phase 3 - Completion or Failure:** +If any model succeeds, the operation completes successfully. If all models in the failover list fail (a rare but possible scenario during API outages or configuration errors), the interface returns an AiCallResponse with an error message and error count, allowing the service layer to handle the failure gracefully. + +**Cross-Cutting Concerns:** + +Throughout this communication flow, several cross-cutting concerns are handled automatically: + +- **Metrics Collection**: Every AI call records timing, token usage, costs, and error counts for monitoring and optimization +- **Progress Tracking**: Long-running operations emit progress updates through callbacks for user feedback +- **Content Chunking**: Large content that exceeds model context limits is automatically chunked and processed in segments +- **Token Management**: The system calculates token usage estimates and reserves appropriate context space for prompts, system messages, and expected outputs + +### Key Integration Points + +1. **Model Selection**: `serviceAi` delegates to `modelSelector` for choosing the right model +2. **Failover Handling**: `AiObjects.call()` automatically tries multiple models if one fails +3. **Operation Types**: `serviceAi` defines operation types (PLAN, DATA_ANALYSE, etc.) that `aicore` uses for selection +4. **Standardized Interface**: All AI calls go through `AiCallRequest`/`AiCallResponse` regardless of provider + +## Connection to the Application + +### Application Flow + +```mermaid +sequenceDiagram + participant User + participant Route as FastAPI Route + participant Workflow as Workflow/Feature + participant Service as AiService + participant Objects as AiObjects + participant Registry as ModelRegistry + participant Selector as ModelSelector + participant Plugin as Plugin Connector + participant API as AI Provider API + + User->>Route: HTTP Request + Route->>Workflow: Call workflow + Workflow->>Service: callAiDocuments() + Service->>Objects: aiObjects.call(request) + Objects->>Registry: getAvailableModels() + Registry-->>Objects: List of models + Objects->>Selector: getFailoverModelList() + Selector-->>Objects: Sorted model list + + loop Try each model until success + Objects->>Plugin: model.functionCall() + Plugin->>API: HTTP Request + + alt Success + API-->>Plugin: Response + Plugin-->>Objects: AiModelResponse + Objects-->>Service: AiCallResponse + else Error + API-->>Plugin: Error + Plugin-->>Objects: Exception + Objects->>Objects: Try next model + end + end + + Service-->>Workflow: Result + Workflow-->>Route: Response + Route-->>User: HTTP Response + + Note over Objects,Plugin: Automatic failover
tries next best model +``` + +### Example: Chat Workflow + +**User Request**: "Analyze this document and extract key information" + +```mermaid +sequenceDiagram + participant User + participant Route as Route Handler
routeChatPlayground.py + participant Workflow as Workflow Layer + participant AiService as AiService
mainServiceAi.py + participant AiObjects as AiObjects
interfaceAiObjects.py + participant Registry as ModelRegistry + participant Selector as ModelSelector + participant Connector as aicorePluginOpenai.py + participant OpenAI as OpenAI API + + User->>Route: POST /chat/message
"Analyze document" + Route->>Workflow: featureWorkflow.run(request) + + Workflow->>AiService: callAiDocuments()
operationType=DATA_EXTRACT + Note over AiService: Build prompt with placeholders + + AiService->>AiObjects: aiObjects.call(request) + AiObjects->>Registry: getAvailableModels() + Registry-->>AiObjects: List of models + + AiObjects->>Selector: getFailoverModelList() + Note over Selector: Filter by DATA_EXTRACT
Score and sort models + Selector-->>AiObjects: [GPT-3.5, GPT-4, ...] + + AiObjects->>Connector: model.functionCall(AiModelCall) + Note over Connector: Format for OpenAI API + + Connector->>OpenAI: HTTP POST with messages + OpenAI-->>Connector: JSON response + + Connector-->>AiObjects: AiModelResponse + AiObjects-->>AiService: AiCallResponse + Note over AiService: Handle looping if needed + + AiService-->>Workflow: Extracted content + Workflow-->>Route: Result with documents + Route-->>User: HTTP 200 + JSON response + + Note over User,OpenAI: Full request/response cycle
with automatic failover +``` + +**Detailed Flow Breakdown:** + +**Step 1: HTTP Request Reception** +When a user sends a chat message through the frontend, it arrives as an HTTP POST request to the `/chat/message` endpoint defined in `routeChatPlayground.py`. The route handler receives a ChatMessageRequest containing the user's message, any attached documents, and conversation context. The handler immediately delegates to the workflow system by calling `featureWorkflow.run(request)`, which orchestrates the entire chat processing pipeline. + +**Step 2: Workflow Orchestration** +The workflow layer (living between routes and services) analyzes the user's request to determine the appropriate processing strategy. For a document analysis request, it identifies that document extraction is needed and invokes `serviceCenter.ai.callAiDocuments()`. This call includes the constructed prompt ("Extract key information from documents"), the attached chat documents, and explicitly configured options specifying DATA_EXTRACT as the operation type - signaling that this is an information extraction task rather than generation or analysis. + +**Step 3: Service Layer Processing** +The AiService receives the document processing request and performs several preparatory operations. It builds the complete prompt by replacing any placeholder markers with actual content (such as document titles, user context, or system instructions). It validates the documents and converts them into the appropriate format for AI processing. For lengthy responses that might span multiple AI generations, it sets up a looping mechanism that can handle continuation contexts. Finally, it creates an AiCallRequest object and passes it to `aiObjects.call()`, transitioning into the core AI layer. + +**Step 4: Intelligent Model Selection** +The AiObjects interface queries the modelRegistry to retrieve all currently available and healthy models. It then invokes the modelSelector with the full request context - passing the prompt text, any additional context, and the configured options. The selector executes its multi-phase filtering and scoring algorithm, ultimately returning a prioritized failover list. For a DATA_EXTRACT operation, this list typically starts with fast, cost-efficient models (like GPT-3.5 Turbo or Claude Haiku) since extraction doesn't require the highest reasoning capabilities. + +**Step 5: Model Execution with Failover** +AiObjects begins iterating through the failover list, attempting each model in sequence. For the first model (assume GPT-3.5 Turbo from OpenAI), it constructs an AiModelCall object containing the formatted messages and invokes the model's registered `functionCall`, which points to the OpenAI connector's API method. The connector transforms the standardized request into OpenAI's specific JSON format, adds authentication headers, and sends an HTTP POST request to `api.openai.com/v1/chat/completions`. + +If the OpenAI API responds successfully, the connector parses the JSON response, extracts the generated text, calculates costs based on reported token usage, and wraps everything in an AiModelResponse object. This response flows back through AiObjects, which converts it to an AiCallResponse and returns it to the service layer. + +If the API call fails (network timeout, rate limit, API error), the connector throws an exception. AiObjects catches this exception, logs detailed error information including the model name and error type, and immediately proceeds to the next model in the failover list. This process continues until either a model succeeds or the entire list is exhausted. + +**Step 6: Response Assembly and Delivery** +Once the AiService receives a successful AiCallResponse, it processes the content according to the request specifications. For document extraction, this might involve parsing structured JSON from the AI's response, validating the extracted data against expected schemas, and formatting it for frontend consumption. The processed result flows back up through the workflow layer, which adds any workflow-specific metadata (execution time, step logs), and finally reaches the route handler. The handler constructs an HTTP response with appropriate status codes and headers, delivering the extracted information back to the waiting frontend client. + +**Error Handling Throughout:** +At every step, comprehensive error handling ensures graceful degradation. If document processing fails, the workflow might retry with different parameters or return a helpful error message. If all AI models fail, the system returns a structured error response rather than crashing. Each failure point is logged with sufficient context for debugging and monitoring. + +### Configuration + +**Environment-Based Secrets Management:** + +The aicore system loads all sensitive configuration through the application's central `APP_CONFIG` system, which reads from environment files (env_dev.env, env_int.env, env_prod.env) based on the deployment environment. Each AI provider connector requires its API key stored under a standardized naming convention: `Connector_Ai_API_SECRET`. For example, the OpenAI connector looks for `Connector_AiOpenai_API_SECRET`, while Anthropic uses `Connector_AiAnthropic_API_SECRET`. This convention enables consistent configuration management across all providers and environments. + +Additional provider-specific settings follow similar naming patterns with descriptive suffixes. The SECRET suffix indicates that these values contain sensitive information and should never be committed to version control or exposed in logs. Configuration loading happens during connector initialization, allowing different API keys per environment without code changes. + +**Plugin-Level Model Configuration:** + +Each plugin file contains hard-coded model definitions specifying technical and economic characteristics. These configurations include: + +- **Capacity Parameters**: Context window sizes (in tokens) define maximum input lengths, while max token settings limit output generation length +- **Economic Metrics**: Input and output costs per 1000 tokens enable accurate cost tracking and budget management +- **Performance Characteristics**: Speed ratings (1-10 scale) indicate typical response time, while quality ratings reflect output sophistication and accuracy +- **Operational Capabilities**: Operation type ratings specify which tasks each model handles well, with ratings from 1-10 for supported operations +- **Strategic Classifications**: Priority tags (SPEED, QUALITY, COST, BALANCED) and processing mode designations (BASIC, ADVANCED, DETAILED) guide selection algorithms + +These plugin-level configurations represent the static characteristics of models and change only when model capabilities are updated or new models are added. They're versioned with the code rather than stored in environment variables, since they're not environment-specific or sensitive. + +## Key Features + +### 1. **Dynamic Plugin Architecture** + +```mermaid +graph LR + subgraph "Auto-Discovery Process" + Scan[Scan aicore folder
for aicorePlugin*.py] + Import[Import module dynamically] + Find[Find BaseConnectorAi
subclasses] + Instantiate[Instantiate connector] + Register[Register in ModelRegistry] + end + + subgraph "Plugin Files" + P1[aicorePluginOpenai.py] + P2[aicorePluginAnthropic.py] + P3[aicorePluginPerplexity.py] + P4[aicorePluginTavily.py] + P5[aicorePlugin*.py
Add new plugins here] + end + + Scan --> P1 + Scan --> P2 + Scan --> P3 + Scan --> P4 + Scan --> P5 + + P1 --> Import + P2 --> Import + P3 --> Import + P4 --> Import + P5 --> Import + + Import --> Find + Find --> Instantiate + Instantiate --> Register + + Register --> Models[All Models Available
in ModelRegistry] + + style Scan fill:#e1f5ff + style Register fill:#c8e6c9 + style P5 fill:#fff9c4 + style Models fill:#e8f5e9 +``` + +**Key Benefits:** +- New AI providers can be added by creating `aicorePlugin*.py` files +- No code changes needed in core logic +- Automatic discovery and registration + +### 2. **Intelligent Model Selection** + +The model selection engine goes far beyond simple rule-based routing by implementing a sophisticated multi-criteria decision system: + +**Holistic Evaluation:** +Rather than selecting models based on a single factor, the selector considers operation type compatibility (can this model handle planning vs. extraction?), resource constraints (will the prompt fit?), performance preferences (does the user prioritize speed or quality?), and cost implications. Each factor contributes to a weighted score that reflects the model's overall suitability. + +**Context-Aware Decisions:** +The selector analyzes not just what operation is requested, but also the size and complexity of the input. A simple data extraction from a small document might route to a fast, economical model like GPT-3.5 Turbo, while complex multi-document analysis with a large prompt routes to more capable models like GPT-4 or Claude Opus. This context-awareness optimizes the trade-off between cost and capability. + +**Ranked Failover Lists:** +Instead of returning a single "best" model, the selector produces a complete ranked list representing a spectrum from optimal to acceptable. This ranked list serves as a failover chain - if the top model fails due to rate limits or transient errors, the system immediately tries the second-ranked model without user intervention or workflow delays. This approach significantly improves system reliability and reduces user-facing errors. + +### 3. **Automatic Failover** + +```mermaid +flowchart TD + Start[AI Call Request] --> GetList[Get Failover Model List
Sorted by Score] + GetList --> Loop{Models
Available?} + + Loop -->|Yes| Try[Try Model #N] + Try --> Call[Call model.functionCall] + + Call --> Success{Success?} + Success -->|Yes| Return[Return Response] + Success -->|No| Log[Log Error with Details] + + Log --> More{More Models
in List?} + More -->|Yes| Next[Try Next Model] + Next --> Loop + More -->|No| Fail[All Models Failed] + + Loop -->|No| Error[Return Error Response] + Fail --> Error + Return --> End[Response to Caller] + Error --> End + + style Start fill:#e1f5ff + style Try fill:#fff3e0 + style Success fill:#f3e5f5 + style Return fill:#c8e6c9 + style Error fill:#ffcdd2 + style Next fill:#fff9c4 +``` + +**Key Benefits:** +- If primary model fails, automatically tries next best +- Logs each attempt with detailed error information +- Ensures high availability of AI operations +- No manual intervention required + +### 4. **Model Caching** + +```mermaid +stateDiagram-v2 + [*] --> Empty: System Start + Empty --> Loading: First Request + Loading --> Cached: getModels() called + Cached --> Valid: Check TTL + Valid --> Cached: TTL < 5 min + Valid --> Expired: TTL >= 5 min + Expired --> Loading: Refresh + Loading --> Cached: Cache Updated + Cached --> [*]: Return Models + + note right of Cached + Models cached for 5 minutes + Reduces API calls + Improves performance + end note + + note right of Loading + Calls connector.getModels() + Updates _last_cache_update + Stores in _models_cache + end note +``` + +**Key Benefits:** +- 5-minute TTL cache for model metadata +- Reduces repeated API calls +- Improves performance +- Manual cache clearing available via `clearCache()` + +### 5. **Unified Interface** + +One of the aicore system's most powerful design principles is its provider-agnostic abstraction layer: + +**Universal Request Format:** +Regardless of whether the eventual API call goes to OpenAI, Anthropic, Perplexity, or any other provider, the requesting code always uses the same AiCallRequest structure. This insulates application code from the complexity and variability of different provider APIs. Developers can write workflow logic once, and the system handles all provider-specific transformations transparently. + +**Standardized Response Structure:** +Every AI operation returns an AiCallResponse object with the same structure and semantics, whether it came from GPT-4, Claude, or a specialized search model. This consistency simplifies response handling code - no need for provider-specific parsing logic or conditional handling based on which model was used. + +**Consistent Error Semantics:** +Different AI providers report errors in vastly different formats - OpenAI uses different status codes and error structures than Anthropic, which differs from Perplexity. The aicore connectors translate all these provider-specific error formats into consistent error responses with standardized error counts and messages. This enables unified error handling logic throughout the application. + +**Normalized Metrics:** +Cost calculations, timing measurements, and token usage reporting follow the same format regardless of provider. This enables apples-to-apples comparisons of different models' performance and economics, facilitating data-driven decisions about model selection strategies. + +### 6. **Operation Type System** + +The operation type taxonomy provides semantic categorization of AI tasks, enabling intelligent routing and specialized model selection: + +**Task-Based Classification:** +Rather than selecting models based on generic "intelligence" levels, the system classifies each request by what it's trying to accomplish. This task-based approach recognizes that different models excel at different types of operations - a model optimized for rapid extraction might not be ideal for deep analytical reasoning, even if both are "capable" in an abstract sense. + +**Operation Type Catalog:** + +- **PLAN**: Strategic reasoning operations including task decomposition, action sequencing, and decision planning. These operations require strong logical reasoning and the ability to consider multiple factors simultaneously. Typically routed to high-capability models like GPT-4 or Claude Opus. + +- **DATA_ANALYSE**: Analytical operations that examine data to identify patterns, draw insights, or make assessments. Requires good comprehension and reasoning but not necessarily creative generation. Often uses balanced models that provide good analysis without premium costs. + +- **DATA_GENERATE**: Creative content generation including report writing, document creation, and structured output generation. Emphasizes coherent, well-structured output over analytical depth. Can often use mid-tier models effectively. + +- **DATA_EXTRACT**: Information extraction and parsing operations that pull structured data from unstructured sources. Speed and accuracy matter more than sophisticated reasoning. Frequently routed to fast, economical models like GPT-3.5 Turbo or Claude Haiku. + +- **IMAGE_ANALYSE**: Vision operations including image understanding, OCR, visual question answering, and scene description. Requires specialized vision-capable models with multimodal understanding. Automatically routes to GPT-4 Vision, Claude Vision, or similar models. + +- **IMAGE_GENERATE**: Image creation and generation operations. Routes to specialized generative models like DALL-E or Stable Diffusion connectors. + +- **WEB_SEARCH**: Real-time web search operations that query current information. Routes to search-specialized connectors like Perplexity that integrate web search APIs. + +- **WEB_CRAWL**: Web content extraction and crawling operations. Routes to specialized web crawling connectors like Tavily that handle website traversal and content extraction. + +**Performance Rating System:** +Each model declares not just which operations it supports, but how well it performs each operation on a 1-10 scale. A model might rate 9/10 for DATA_ANALYSE but only 6/10 for DATA_GENERATE, reflecting its strengths in analytical over creative tasks. These ratings form the primary sorting criterion in model selection, ensuring task-appropriate routing. + +### 7. **Content-Aware Chunking** + +When content exceeds a model's context capacity, the system employs sophisticated chunking strategies rather than simply failing: + +**Model-Specific Chunk Sizing:** +Chunking decisions are based on each model's specific capabilities rather than using universal chunk sizes. A model with a 128K token context window receives much larger chunks than one with a 16K limit. The system calculates optimal chunk sizes by considering the model's total context length, subtracting reserved space for prompts and system messages, and applying a safety margin (typically 70-80% utilization). + +**Comprehensive Token Accounting:** +Naive chunking might only consider content size, but the aicore system accounts for all token consumers: the user prompt (which repeats with each chunk), system message overhead (message formatting and instructions), output token reservation (space the model needs for its response), and protocol overhead (JSON structure and metadata). This comprehensive accounting prevents context overflow errors during generation. + +**Intelligent Result Merging:** +After processing multiple chunks, their results must be intelligently combined. Simple concatenation can produce disjointed or redundant output. The system employs content-type-aware merging strategies - text chunks are merged with appropriate spacing and deduplication, structured data is merged while preserving relationships, and vision results are aggregated with context preservation. The merging system maintains coherence across chunk boundaries, producing results that read as unified responses rather than fragmented pieces. + +**Progressive Processing:** +For very large documents, chunking enables progressive processing where each chunk can be processed as soon as it's prepared, rather than waiting for the entire document. This streaming approach reduces perceived latency and enables progress reporting to users, showing incremental completion rather than a black box wait. + +## Data Models + +### Core Data Models (`datamodelAi.py`) + +```mermaid +classDiagram + class AiModel { + +string name + +string displayName + +string connectorType + +string apiUrl + +float temperature + +int maxTokens + +int contextLength + +float costPer1kTokensInput + +float costPer1kTokensOutput + +int speedRating + +int qualityRating + +callable functionCall + +PriorityEnum priority + +ProcessingModeEnum processingMode + +List~OperationTypeRating~ operationTypes + +string version + +callable calculatePriceUsd + } + + class AiCallRequest { + +string prompt + +string context + +AiCallOptions options + +List~ContentPart~ contentParts + } + + class AiCallOptions { + +OperationTypeEnum operationType + +PriorityEnum priority + +ProcessingModeEnum processingMode + +bool compressPrompt + +bool compressContext + } + + class AiCallResponse { + +string content + +string modelName + +float priceUsd + +float processingTime + +int bytesSent + +int bytesReceived + +int errorCount + } + + class OperationTypeEnum { + <> + PLAN + DATA_ANALYSE + DATA_GENERATE + DATA_EXTRACT + IMAGE_ANALYSE + IMAGE_GENERATE + WEB_SEARCH + WEB_CRAWL + } + + class PriorityEnum { + <> + BALANCED + SPEED + QUALITY + COST + } + + class ProcessingModeEnum { + <> + BASIC + ADVANCED + DETAILED + } + + AiCallRequest --> AiCallOptions + AiCallOptions --> OperationTypeEnum + AiCallOptions --> PriorityEnum + AiCallOptions --> ProcessingModeEnum + AiModel --> PriorityEnum + AiModel --> ProcessingModeEnum + + note for AiModel "Unique displayName required\nacross all connectors" + note for AiCallRequest "Input to AI system" + note for AiCallResponse "Output from AI system" +``` + +**Core Data Model Descriptions:** + +**AiModel:** Represents a complete model configuration with all metadata required for selection, execution, and cost tracking. The `name` field contains the API-level identifier used in actual provider calls, while `displayName` serves as the globally unique identifier within the registry. Technical specifications like `contextLength` (maximum input tokens) and `maxTokens` (maximum output tokens) inform chunking and validation logic. Economic fields (`costPer1kTokensInput`, `costPer1kTokensOutput`) enable precise cost tracking across all operations. Performance metrics (`speedRating`, `qualityRating`) influence selection algorithms. The `functionCall` field holds a callable reference to the connector method that executes API communication. The `operationTypes` list defines which operation types this model supports and how well it performs each, using ratings from 1-10. + +**AiCallRequest:** Encapsulates all information needed to execute an AI operation. The `prompt` contains the primary instruction or question, while optional `context` provides supporting information. The `options` object configures operation behavior including type, priority, and processing mode. For multi-modal requests (like vision operations), the `contentParts` list can contain multiple pieces of content with different MIME types. + +**AiCallOptions:** Configures how an AI operation should be executed. The `operationType` determines what kind of operation this is (planning, analysis, generation, etc.), which drives model selection. The `priority` indicates whether to optimize for speed, quality, cost, or balance. The `processingMode` suggests the depth of processing required (basic for simple tasks, detailed for complex reasoning). Boolean flags like `compressPrompt` and `compressContext` control whether the system should attempt content compression to fit context limits. + +**AiCallResponse:** Contains the complete result of an AI operation including the generated `content`, the `modelName` that produced it, and comprehensive metrics. Cost tracking is provided via `priceUsd`, calculated based on actual token usage reported by the provider. Performance metrics include `processingTime` (wall-clock time for the operation), `bytesSent` and `bytesReceived` (for network monitoring), and `errorCount` (zero for success, greater than zero indicating partial or complete failure). + +## Best Practices + +### Adding a New AI Provider + +The plugin architecture makes adding new AI providers straightforward through a four-step process: + +**Step 1: Create the Plugin File** + +Create a new file in the `modules/aicore` directory following the naming convention `aicorePlugin.py`, where `` is a descriptive name for the AI service (e.g., `aicorePluginCohere` for Cohere AI). The filename itself triggers automatic discovery - the system scans for any file matching the `aicorePlugin*.py` pattern during initialization. + +**Step 2: Implement the Connector Class** + +Within your plugin file, create a class that inherits from BaseConnectorAi. This class must implement several required methods: + +**Connector Identification:** +The `getConnectorType()` method returns a simple string identifier (lowercase, no spaces) that uniquely identifies this connector throughout the system. This identifier appears in logs, model metadata, and routing decisions. + +**Model Catalog Definition:** +The `getModels()` method returns a list of AiModel instances, one for each model configuration you want to expose. Each AiModel requires comprehensive metadata including: +- A unique displayName that differs from all other models in the system (e.g., "Cohere Command-R Plus") +- The API model name used in actual API calls +- Technical specifications (context length, max output tokens, temperature) +- Economic data (input and output costs per 1000 tokens) +- Performance ratings (speed and quality on 1-10 scales) +- Operational capabilities defined via `createOperationTypeRatings()`, specifying which operation types the model supports and how well (rating 1-10 for each) +- A reference to the callable method that handles API communication (typically a method on your connector class) + +**API Communication Method:** +Implement one or more async methods (like `callAi()`) that accept an AiModelCall object and return an AiModelResponse. This method handles the actual HTTP communication with your provider's API. It must: +- Extract messages from the AiModelCall +- Transform them into the provider's expected JSON format +- Execute the HTTP request with proper authentication and error handling +- Parse the provider's response format +- Extract the generated text and any usage statistics +- Calculate costs based on token usage +- Return everything wrapped in an AiModelResponse object + +**Step 3: Configure Environment Variables** + +Add the necessary configuration to your environment files (env_dev.env, env_int.env, env_prod.env). At minimum, this includes the API key for authentication, but might also include endpoint URLs, organization IDs, or other provider-specific settings. Use descriptive configuration key names following the convention `Connector_Ai__SECRET` for sensitive values. + +**Step 4: Automatic Integration** + +No manual registration or configuration code changes are required. When the application next starts, the modelRegistry's discovery mechanism automatically: +- Scans the aicore directory +- Finds your new plugin file +- Imports the module +- Instantiates your connector class +- Calls getModels() to retrieve available models +- Validates displayName uniqueness +- Registers all models in the global registry + +Your new AI provider is now fully integrated and will participate in model selection for appropriate operation types. The system logs will show discovery and registration messages confirming successful integration. + +### Model Selection Guidelines + +- **PLAN operations**: Use high-quality models (GPT-4, Claude 3 Opus) +- **DATA_GENERATE**: Balanced models for quality/cost trade-off +- **DATA_EXTRACT**: Speed-optimized models for bulk processing +- **IMAGE_ANALYSE**: Vision-capable models only +- **WEB_SEARCH**: Specialized search connectors (Perplexity, Tavily) + +### Error Handling Philosophy + +The aicore system implements a comprehensive error handling strategy designed for resilience and observability: + +**Automatic Failover:** +When you invoke `aiObjects.call()` with a request, the system automatically attempts multiple models from the failover list until one succeeds. Each failure is logged with detailed context (model name, error type, error message) but doesn't interrupt the execution flow. Only if all models in the failover list fail does the method return an error response. + +**Graceful Degradation:** +Rather than throwing exceptions that crash workflows, the system returns AiCallResponse objects even in failure scenarios. These error responses have `errorCount` greater than zero and contain descriptive error messages in the `content` field. This allows calling code to inspect the errorCount property and decide how to handle partial failures - whether to retry with different parameters, fall back to alternative processing paths, or present user-friendly error messages. + +**Comprehensive Logging:** +Every error is logged with sufficient context for debugging: the attempted model's displayName, the operation type, the error type (network timeout, API error, rate limit, etc.), and the full error message. This creates an audit trail for troubleshooting production issues without requiring verbose debug logging during normal operations. + +**Error Classification:** +The system distinguishes between transient errors (network timeouts, temporary API issues) that warrant trying another model, and permanent errors (authentication failures, malformed requests) that indicate configuration problems requiring immediate attention. Transient errors trigger failover silently, while permanent errors are logged at higher severity levels. + +## Performance Considerations + +### Caching +- Model registry caches for 5 minutes +- Connector models cached individually +- Reduces discovery overhead + +### Failover Strategy +- Models sorted by score (best first) +- Failed models logged with detailed errors +- Next best model tried automatically + +### Chunking +- Large content automatically chunked based on model limits +- Conservative 70-80% utilization for safety +- Intelligent merging of chunk results + +### Cost Optimization +- Model selector considers cost ratings +- Price calculated per call for tracking +- Can prioritize by cost with `PriorityEnum.COST` + +## Troubleshooting + +### Common Issues + +1. **"No models available"** + - Check API keys in environment configuration + - Verify connector plugins exist in `aicore/` folder + - Check logs for connector initialization errors + +2. **"No suitable model found"** + - Check if operation type is supported by any model + - Verify prompt size isn't too large for all models + - Review model filtering criteria in logs + +3. **"All models failed"** + - Check API connectivity and keys + - Review model-specific error messages in logs + - Verify request format is correct + +4. **"Duplicate displayName"** + - Each model must have unique `displayName` + - Check all plugin files for name conflicts + - Naming convention: ` ` + +## Future Enhancements + +- **Streaming Support**: Real-time response streaming for chat interfaces +- **Model Health Monitoring**: Track success rates and performance metrics +- **Cost Budgets**: Automatic model selection based on budget constraints +- **Custom Scoring**: User-defined scoring functions for model selection +- **A/B Testing**: Compare different models for the same operation +- **Rate Limiting**: Built-in rate limit handling per provider + +## Quick Reference + +### Common Usage Patterns + +**1. Making AI Calls:** + +There are two primary approaches for invoking AI operations in the system: + +**Via AiService (Recommended Approach):** +The recommended pattern uses the high-level service methods like `callAiPlanning()`, `callAiDocuments()`, or `callAiText()`. These methods are accessed through the serviceCenter and handle all complexity internally. For planning operations, you call `serviceCenter.ai.callAiPlanning()` with a prompt string and optional placeholder list. Placeholders allow dynamic content injection - the system replaces markers like `{TASK}` with actual content before sending to the AI. This approach provides automatic prompt building, placeholder resolution, and response formatting. + +**Direct via AiObjects (Advanced Use):** +For specialized scenarios requiring fine-grained control, you can construct an AiCallRequest manually and invoke `aiObjects.call()` directly. This requires creating an AiCallOptions object with explicit operation type and priority settings, then awaiting the call. The response object contains the generated content plus metrics like token usage, processing time, and costs. This approach is typically used within service implementations or for custom AI workflows. + +**2. Querying Available Models:** + +The modelRegistry provides comprehensive model inventory access: + +**Complete Inventory Access:** +Calling `modelRegistry.getAvailableModels()` returns all currently available and healthy models across all registered connectors. This list automatically excludes any models marked as unavailable due to configuration issues or connector errors. + +**Connector-Specific Filtering:** +Use `modelRegistry.getModelsByConnector("openai")` to retrieve only models from a specific provider. This is useful when implementing provider-specific features or debugging connector issues. Pass the connector type string (openai, anthropic, perplexity, tavily) as the parameter. + +**Direct Model Lookup:** +For retrieving a specific model's full metadata, use `modelRegistry.getModel("OpenAI GPT-4o")` with the exact displayName. This returns the complete AiModel object including capabilities, costs, ratings, and the functionCall reference. + +**Statistical Overview:** +The `modelRegistry.getModelStats()` method provides aggregate statistics including total model count, availability counts, breakdowns by connector type, capability distribution, and priority classifications. This is valuable for monitoring system health and model distribution. + +**3. Understanding Model Selection:** + +To understand how the system selects models for specific requests: + +**Generating Failover Lists:** +Invoke `modelSelector.getFailoverModelList()` with your prompt, context, options, and the list of available models. The selector executes its full filtering and scoring algorithm, returning a ranked list ordered from most to least suitable. The first element represents the optimal choice, while subsequent elements serve as fallback options. + +**Analyzing Selection Results:** +Each model in the failover list has been validated for operation type compatibility and context size constraints. Their ordering reflects the composite score from operation ratings, size efficiency, processing mode alignment, and priority preferences. Examining this list helps understand why specific models were chosen or excluded for particular operations. + +### Operation Types Reference + +| Operation Type | Description | Best Models | Use Case | +|---------------|-------------|-------------|----------| +| `PLAN` | Task planning, action selection | GPT-4, Claude Opus | Workflow planning, decision making | +| `DATA_ANALYSE` | Data analysis and insights | GPT-4, Claude Sonnet | Document analysis, pattern detection | +| `DATA_GENERATE` | Content generation | GPT-4, Claude Sonnet | Report creation, document generation | +| `DATA_EXTRACT` | Information extraction | GPT-3.5, Claude Haiku | Text extraction, data parsing | +| `IMAGE_ANALYSE` | Image/vision analysis | GPT-4 Vision, Claude Vision | Image understanding, OCR | +| `IMAGE_GENERATE` | Image generation | DALL-E, Stable Diffusion | Image creation | +| `WEB_SEARCH` | Web search operations | Perplexity | Real-time web search | +| `WEB_CRAWL` | Web crawling | Tavily | Website content extraction | + +### Priority Reference + +| Priority | Description | Selection Behavior | +|----------|-------------|-------------------| +| `BALANCED` | Balance speed, quality, cost | Default selection | +| `SPEED` | Prioritize fast response | Favor high speedRating models | +| `QUALITY` | Prioritize high-quality output | Favor high qualityRating models | +| `COST` | Prioritize low cost | Favor low-cost models | + +### Processing Mode Reference + +| Mode | Description | When to Use | +|------|-------------|-------------| +| `BASIC` | Simple, straightforward processing | Quick tasks, simple questions | +| `ADVANCED` | Complex reasoning required | Multi-step tasks, analysis | +| `DETAILED` | Comprehensive, thorough output | Planning, detailed generation | + +### Module Import Structure + +The aicore system is organized across several module paths for clean separation of concerns: + +**Core Infrastructure Components:** +- The base connector interface lives at `modules.aicore.aicoreBase` and exports BaseConnectorAi +- The global model registry singleton is imported from `modules.aicore.aicoreModelRegistry` as modelRegistry +- The global model selector singleton is imported from `modules.aicore.aicoreModelSelector` as modelSelector + +**Data Model Definitions:** +All AI-related data models are centralized in `modules.datamodels.datamodelAi`, including: +- AiModel: Complete model metadata and configuration +- AiCallRequest and AiCallResponse: Request/response wrapper objects +- AiCallOptions: Configuration options for AI operations +- OperationTypeEnum, PriorityEnum, ProcessingModeEnum: Enumeration types for operation classification + +**Interface and Service Layers:** +- The AiObjects interface class is available at `modules.interfaces.interfaceAiObjects` +- The high-level AiService class is located at `modules.services.serviceAi.mainServiceAi` + +Most application code interacts with the service layer rather than importing core components directly, maintaining proper architectural separation. + +## Summary + +The `aicore` module is the **backbone of AI operations** in the application, providing: +- **Abstraction**: Single interface for multiple AI providers +- **Intelligence**: Smart model selection and automatic failover +- **Flexibility**: Plugin architecture for easy provider addition +- **Reliability**: Caching, failover, and error handling +- **Performance**: Context-aware chunking and optimization + +It connects to `serviceAi` as the **foundation layer**, enabling high-level AI services to operate without knowledge of specific AI provider implementations. The entire system integrates seamlessly into the application through the service layer architecture. + +--- + +**Related Documentation:** +- [Services API Reference](./services-api-reference.md) +- [Architecture Overview](./architecture-overview.md) +- [Security Component](./security-component.md) + diff --git a/docs/code-documentation/architecture-overview.md b/docs/code-documentation/architecture-overview.md new file mode 100644 index 00000000..5d3495c8 --- /dev/null +++ b/docs/code-documentation/architecture-overview.md @@ -0,0 +1,209 @@ +# Architecture Overview + +High-level architecture diagram of the Gateway project. + +```mermaid +graph TB + %% Entry Point + App[app.py
FastAPI Application] + + %% Middleware Layer + App --> Security[Security
Auth, CSRF, JWT, Token Refresh] + App --> CORS[CORS Middleware] + + %% API Layer + App --> Routes[Routes
API Endpoints] + + %% Business Logic Layer + Routes --> Features[Features
Business Logic Modules] + Routes --> Services[Services
Service Layer] + + %% Features can use Services + Features --> Services + + %% Data Access Layer + Services --> Interfaces[Interfaces
Data Access Layer] + Features --> Interfaces + + %% External Connections + Interfaces --> Connectors[Connectors
External System Connections] + Interfaces --> Database[(Database
PostgreSQL)] + + %% Connectors connect to external systems + Connectors --> Database + Connectors --> External[External Systems
Jira, ClickUp, Google, etc.] + + %% Shared Resources + App -.-> Shared[Shared Modules
Configuration, Logging, Utils] + Routes -.-> Shared + Features -.-> Shared + Services -.-> Shared + Interfaces -.-> Shared + + %% Data Models used throughout + Routes -.-> DataModels[Data Models
Request/Response Schemas] + Features -.-> DataModels + Services -.-> DataModels + Interfaces -.-> DataModels + + %% Feature Lifecycle Management + App --> FeaturesLifecycle[Features Lifecycle
Startup/Shutdown Management] + FeaturesLifecycle --> Features + + %% Styling + classDef entryPoint fill:#e1f5ff,stroke:#01579b,stroke-width:3px + classDef apiLayer fill:#f3e5f5,stroke:#4a148c,stroke-width:2px + classDef businessLogic fill:#e8f5e9,stroke:#1b5e20,stroke-width:2px + classDef dataAccess fill:#fff3e0,stroke:#e65100,stroke-width:2px + classDef external fill:#fce4ec,stroke:#880e4f,stroke-width:2px + classDef shared fill:#f5f5f5,stroke:#424242,stroke-width:1px,stroke-dasharray: 5 5 + + class App entryPoint + class Routes,Security,CORS apiLayer + class Features,Services businessLogic + class Interfaces,Connectors dataAccess + class Database,External external + class Shared,DataModels,FeaturesLifecycle shared +``` + +## Data Flow Diagram + +The following sequence diagram shows how data flows through the same architectural layers from the architecture diagram above. + +```mermaid +sequenceDiagram + participant Client + participant App as app.py
FastAPI Application + participant Security as Security
Auth, CSRF, JWT + participant Routes as Routes
API Endpoints + participant Features as Features
Business Logic + participant Services as Services
Service Layer + participant Interfaces as Interfaces
Data Access Layer + participant Connectors as Connectors
External Connections + participant Database as Database
PostgreSQL + participant External as External Systems
Jira, ClickUp, etc. + + %% Request Flow + Client->>App: HTTP Request + App->>Security: Validate Auth & CSRF + Security-->>App: Authenticated + App->>Routes: Validated Request + Routes->>Routes: Validate Data Models + + alt Route delegates to Features + Routes->>Features: Delegate Request + Features->>Services: Use Service (optional) + Services-->>Features: Service Result + Features->>Interfaces: Request Data Access + else Route delegates to Services + Routes->>Services: Delegate Request + Services->>Interfaces: Request Data Access + end + + Interfaces->>Connectors: Query Request + + alt Database Query + Connectors->>Database: Execute Query + Database-->>Connectors: Raw Data + else External API Call + Connectors->>External: API Call + External-->>Connectors: API Response + end + + %% Response Flow + Connectors-->>Interfaces: Raw Data + Interfaces->>Interfaces: Transform to Domain Objects + Interfaces-->>Features: Domain Objects + Interfaces-->>Services: Domain Objects + + Features->>Features: Process Business Logic + Services->>Services: Process Service Logic + + Features-->>Routes: Processed Data + Services-->>Routes: Processed Data + + Routes->>Routes: Serialize to Response Models + Routes-->>App: HTTP Response + App-->>Client: HTTP Response +``` + +### Request Flow (Top to Bottom) + +1. **Client** sends HTTP request to **app.py** +2. **app.py** forwards to **Security** middleware for authentication and CSRF validation +3. **Security** validates and returns authenticated request to **app.py** +4. **app.py** forwards validated request to **Routes** +5. **Routes** validate request data using Data Models, then delegate to either: + - **Features** (which may use **Services**), or + - **Services** directly +6. **Features/Services** call **Interfaces** for data access +7. **Interfaces** use **Connectors** to execute queries +8. **Connectors** query **Database** or call **External Systems** + +### Response Flow (Bottom to Top) + +1. **Database/External Systems** return raw data to **Connectors** +2. **Connectors** pass raw data to **Interfaces** +3. **Interfaces** transform raw data into domain objects +4. **Interfaces** return domain objects to **Features/Services** +5. **Features/Services** process business logic and return processed data to **Routes** +6. **Routes** serialize data to response models and return HTTP response to **app.py** +7. **app.py** returns HTTP response to **Client** + +### Data Transformations + +- **Routes**: HTTP Request ↔ Validated Data Models (Pydantic) +- **Features/Services**: Data Models ↔ Domain Objects (Business Logic Processing) +- **Interfaces**: Domain Objects ↔ Raw Data (SQL/API Format) +- **Connectors**: Raw Data ↔ Database Queries/API Calls + +## Layer Descriptions + +### Entry Point Layer +**app.py** - The FastAPI application entry point that orchestrates the entire system. It initializes logging, configures CORS and security middleware, registers all route routers, and manages the application lifecycle (startup/shutdown). This is where the application server starts and all components are wired together. + +### API Layer +**Routes** - HTTP endpoints that define the REST API surface. Routes receive client requests, validate input using data models, delegate to features or services for business logic, and return structured responses. Each route module handles a specific domain (e.g., Real Estate, Chat, Workflows, Security). + +**Security** - Middleware and services that handle authentication, authorization, CSRF protection, JWT token management, and token refresh. Ensures all requests are properly authenticated and authorized before reaching business logic. See [Security Component Documentation](./security-component.md) for detailed documentation. + +**CORS** - Cross-Origin Resource Sharing middleware that controls which external domains can access the API, enabling secure cross-origin requests from web applications. + +### Business Logic Layer +**Features** - Domain-specific business logic modules that implement core functionality for specific use cases (e.g., Real Estate management, Chat workflows, Data neutralization). Features are stateless and orchestrate services to fulfill business requirements. They can be called directly from routes or managed by the Features Lifecycle for background processing. + +**Services** - Reusable, composable service components that provide cross-cutting functionality (AI processing, document extraction, content generation, chat operations, ticket management, etc.). Services encapsulate complex operations and can be used by multiple features. They typically use interfaces to access data and may call other services. + +### Data Access Layer +**Interfaces** - Abstraction layer that provides a clean, domain-oriented API for accessing data. Interfaces hide the complexity of database connections and external system integrations, offering high-level methods for CRUD operations. They handle user context, access control, and data transformation between the application and persistence layers. + +**Connectors** - Concrete implementations that handle low-level communication with external systems. Database connectors manage PostgreSQL connections, query execution, and transaction handling. External connectors integrate with third-party services (Jira, ClickUp, Google Voice, SharePoint) using their specific APIs and protocols. + +### External Systems Layer +**Database** - PostgreSQL databases that persist application data. Multiple databases may exist for different domains (e.g., chat data, real estate data, management data). Connectors handle all database interactions. + +**External Systems** - Third-party services and APIs that the application integrates with. These include ticketing systems (Jira, ClickUp), cloud services (Google Voice, SharePoint), and other external platforms. Connectors abstract away the specifics of each integration. + +### Shared Resources Layer +**Shared Modules** - Common utilities and infrastructure used throughout the application. Includes configuration management, logging utilities, time/date helpers, JSON processing, attribute utilities, and audit logging. These modules provide cross-cutting concerns that don't belong to any specific domain. + +**Data Models** - Pydantic models that define data structures for requests, responses, and database entities. They provide validation, serialization, and type safety across all layers. Models are organized by domain (e.g., Real Estate, Chat, Security, AI). + +**Features Lifecycle** - Manages the startup and shutdown of features that require background processing, scheduled tasks, or event-driven operations. Coordinates initialization and cleanup of features that need persistent processes or event listeners. + +## Request/Response Flow Summary + +For a detailed visual representation, see the [Data Flow Diagram](#data-flow-diagram) above. + +**Simplified Request Flow**: `Client Request` → `CORS` → `Security (Auth/CSRF)` → `Routes` → `Features/Services` → `Interfaces` → `Connectors` → `Database/External Systems` + +**Simplified Response Flow**: `Database/External Systems` → `Connectors` → `Interfaces` → `Features/Services` → `Routes` → `Transform & Log` → `Client Response` + +## Key Architectural Patterns + +- **Layered Architecture**: Clear separation between API, business logic, and data access layers +- **Dependency Injection**: Services and interfaces are injected where needed +- **Interface Abstraction**: Interfaces abstract away database and connector details +- **Stateless Design**: Features operate statelessly without session management +- **Shared Utilities**: Common functionality centralized in shared modules + diff --git a/docs/code-documentation/connectors-component.md b/docs/code-documentation/connectors-component.md new file mode 100644 index 00000000..252fabfd --- /dev/null +++ b/docs/code-documentation/connectors-component.md @@ -0,0 +1,1241 @@ +# Connectors Component Documentation + +## Table of Contents + +1. [Overview](#overview) +2. [Architecture](#architecture) +3. [Database Connectors](#database-connectors) +4. [Voice Connector](#voice-connector) +5. [Ticket Connectors](#ticket-connectors) +6. [Integration Patterns](#integration-patterns) +7. [Configuration](#configuration) +8. [Design Principles](#design-principles) + +--- + +## Overview + +The Connectors component provides abstraction layers for external systems and data storage mechanisms. It acts as the bridge between the application's business logic and external services, databases, and third-party APIs. This component implements the **Adapter Pattern** to provide consistent interfaces regardless of the underlying technology. + +### Purpose and Scope + +The connectors component serves three primary functions: + +1. **Data Persistence** - Abstracts database operations for both JSON file-based and PostgreSQL storage +2. **Voice Processing** - Integrates Google Cloud Speech services for voice recognition, translation, and synthesis +3. **Ticket Management** - Connects to external ticketing systems (JIRA, ClickUp) for synchronization + +### Component Structure + +``` +modules/connectors/ +├── connectorDbJson.py # JSON file-based database +├── connectorDbPostgre.py # PostgreSQL database +├── connectorVoiceGoogle.py # Google Cloud Speech services +├── connectorTicketsJira.py # JIRA integration +└── connectorTicketsClickup.py # ClickUp integration +``` + +--- + +## Architecture + +### Connector Hierarchy + +```mermaid +graph TD + A[Application Layer
routes/, workflows/, features/] --> B[Interface Layer
modules/interfaces/] + B --> C[Connector Layer
modules/connectors/] + C --> D[External Systems] + + B --> B1[interfaceDbAppObjects.py
AppObjects] + B --> B2[interfaceDbChatObjects.py
ChatObjects] + B --> B3[interfaceVoiceObjects.py
VoiceObjects] + B --> B4[interfaceTicketObjects.py
TicketInterface] + + C --> C1[connectorDbJson.py
DatabaseConnector] + C --> C2[connectorDbPostgre.py
DatabaseConnector] + C --> C3[connectorVoiceGoogle.py
ConnectorGoogleSpeech] + C --> C4[connectorTicketsJira.py
ConnectorTicketJira] + C --> C5[connectorTicketsClickup.py
ConnectorTicketClickup] + + B1 --> C1 + B1 --> C2 + B2 --> C1 + B2 --> C2 + B3 --> C3 + B4 --> C4 + B4 --> C5 + + C1 --> D1[JSON Files] + C2 --> D2[PostgreSQL] + C3 --> D3[Google Cloud APIs] + C4 --> D4[JIRA API] + C5 --> D5[ClickUp API] + + style A fill:#e1f5ff + style B fill:#fff9e6 + style C fill:#e8f5e9 + style D fill:#fce4ec +``` + +### Layered Architecture Pattern + +The connectors follow a three-tier architecture: + +1. **Application Layer**: Business logic, workflows, services +2. **Interface Layer**: Domain-specific abstractions (AppObjects, ChatObjects, etc.) +3. **Connector Layer**: Technology-specific implementations +4. **External Systems**: Databases, APIs, cloud services + +This separation ensures: +- **Loose Coupling**: Application code doesn't depend on specific technologies +- **Testability**: Connectors can be mocked or swapped +- **Flexibility**: Easy migration between storage backends or service providers +- **Maintainability**: Changes to external systems are isolated to connector layer + +--- + +## Database Connectors + +### Overview + +The application supports two database connector implementations that provide identical public APIs but different storage mechanisms. This allows deployment flexibility without code changes. + +### DatabaseConnector Interface + +Both database connectors implement a common interface using duck typing (no formal interface class). They provide: + +- **CRUD Operations**: Create, read, update, delete records +- **Schema Management**: Dynamic table creation from Pydantic models +- **Context Management**: User-aware operations for audit trails +- **Concurrency Control**: Thread-safe operations with locking mechanisms +- **Initial Record Tracking**: System table for bootstrap data + +### JSON Database Connector + +#### Purpose and Use Cases + +The JSON connector is ideal for: +- **Development Environments**: Fast setup without database infrastructure +- **Small Deployments**: Low-volume applications +- **Portable Data**: Easy backup and version control +- **Testing**: Simplified test data management + +#### Storage Structure + +```mermaid +graph LR + A[Database Host Directory] --> B[Database Name Directory] + B --> C[Table1 Directory] + B --> D[Table2 Directory] + B --> E[_system.json] + + C --> C1[record1.json] + C --> C2[record2.json] + C --> C3[_metadata.json] + + D --> D1[record3.json] + D --> D2[record4.json] + + style E fill:#ffeb3b + style C3 fill:#ffeb3b +``` + +**File System Layout:** +- Each database is a directory +- Each table is a subdirectory +- Each record is a separate JSON file +- Metadata files track record IDs and indexes +- System table stores initial record references + +#### Key Features + +**Atomic Operations:** +- Temporary file creation with validation +- Atomic move operations to prevent corruption +- Lock management for concurrent access + +**Caching Strategy:** +- In-memory table cache for performance +- Metadata cache for quick record lookups +- Intelligent cache invalidation + +**Concurrency Control:** +- File-level locks with timeout protection +- Table-level locks for metadata operations +- Deadlock prevention through lock ordering + +### PostgreSQL Database Connector + +#### Purpose and Use Cases + +The PostgreSQL connector is designed for: +- **Production Environments**: High-performance, reliable storage +- **Multi-User Systems**: Concurrent access with ACID guarantees +- **Large Datasets**: Efficient querying and indexing +- **Scalability**: Horizontal and vertical scaling capabilities + +#### Schema Architecture + +```mermaid +erDiagram + _system ||--o{ Tables : tracks_initial_records + Tables ||--o{ Records : contains + + _system { + varchar table_name PK + varchar initial_id + double _createdAt + double _modifiedAt + } + + Tables { + varchar id PK + text field1 + jsonb field2 + double _createdAt + double _modifiedAt + varchar _createdBy + varchar _modifiedBy + } +``` + +#### Dynamic Schema Generation + +The connector automatically: +- Creates tables from Pydantic models +- Maps Python types to SQL types +- Adds metadata columns automatically +- Creates indexes for foreign key fields +- Performs additive migrations (adds missing columns) + +**Type Mapping:** +- `str` → `TEXT` +- `int` → `INTEGER` +- `float` → `DOUBLE PRECISION` +- `bool` → `BOOLEAN` +- `dict/list` → `JSONB` (enables flexible document storage) + +#### JSONB Support + +The connector uses PostgreSQL's JSONB type for complex fields: +- Efficient binary JSON storage +- Indexable JSON content +- Native JSON operators +- Flexible schema evolution + +### Database Connector Selection + +The system selects connectors through import statements in interface files: + +```mermaid +graph TD + A[Interface Initialization] --> B{Check DB_HOST Config} + B -->|File Path| C[Import connectorDbJson] + B -->|Host:Port| D[Import connectorDbPostgre] + C --> E[Create DatabaseConnector] + D --> E + E --> F[Initialize System] + + style B fill:#fff3e0 +``` + +**Selection Criteria:** +- Configuration-driven through `config.ini` +- Import statement determines implementation +- Transparent to application layer +- No runtime switching (decided at startup) + +### Common Operations Flow + +```mermaid +sequenceDiagram + participant App as Application + participant Iface as Interface Layer + participant DB as DatabaseConnector + participant Storage as Storage Backend + + App->>Iface: getRecordset(Model, filters) + Iface->>DB: getRecordset(model_class, recordFilter) + + alt PostgreSQL + DB->>Storage: SELECT * FROM table WHERE... + Storage-->>DB: Rows + else JSON + DB->>Storage: Read files from directory + Storage-->>DB: JSON objects + end + + DB->>DB: Apply filters + DB->>DB: Handle JSONB parsing + DB-->>Iface: List of records + Iface->>Iface: Apply UAM filters + Iface-->>App: Filtered records + + Note over DB,Storage: Both connectors provide
identical interface +``` + +### Transaction Handling + +**PostgreSQL:** +- Uses database transactions +- ACID compliance +- Automatic rollback on errors +- Connection pooling and retry logic + +**JSON:** +- File-level atomicity +- Lock-based isolation +- Manual rollback through file operations +- Lock timeout protection + +### Performance Considerations + +| Aspect | JSON Connector | PostgreSQL Connector | +|--------|---------------|---------------------| +| **Read Speed** | Fast for small datasets, degrades with size | Consistent, optimized with indexes | +| **Write Speed** | Fast for single records | Fast with connection pooling | +| **Concurrent Access** | Limited by file locking | Excellent with MVCC | +| **Query Capability** | In-memory filtering only | Full SQL with JSONB operators | +| **Scalability** | Limited to single server | Horizontal and vertical scaling | +| **Memory Usage** | High (full table caching) | Low (database managed) | + +--- + +## Voice Connector + +### Overview + +The `ConnectorGoogleSpeech` provides integration with Google Cloud AI services for voice processing, offering a complete pipeline for speech recognition, translation, and text-to-speech synthesis. + +### Architecture + +```mermaid +graph TB + A[Voice Interface] --> B[ConnectorGoogleSpeech] + + B --> C[Speech-to-Text Client] + B --> D[Translation Client] + B --> E[Text-to-Speech Client] + + C --> F[Google Cloud Speech-to-Text API] + D --> G[Google Cloud Translation API] + E --> H[Google Cloud Text-to-Speech API] + + F --> I[Audio Processing] + G --> J[Language Translation] + H --> K[Voice Synthesis] + + style B fill:#e8f5e9 + style F fill:#e3f2fd + style G fill:#e3f2fd + style H fill:#e3f2fd +``` + +### Core Capabilities + +#### 1. Speech-to-Text Processing + +**Audio Format Support:** +- WEBM OPUS (primary web recording format) +- WAV (Linear PCM) +- MP3 +- FLAC +- OGG + +**Processing Pipeline:** + +```mermaid +sequenceDiagram + participant Client + participant Connector + participant Validator + participant API as Google Speech API + + Client->>Connector: speechToText(audioContent) + Connector->>Validator: validateAudioFormat() + + Validator->>Validator: Detect format + Validator->>Validator: Extract sample rate + Validator->>Validator: Determine channels + Validator-->>Connector: Format metadata + + Connector->>API: recognize(config, audio) + + alt Success + API-->>Connector: Transcription + confidence + Connector-->>Client: Success response + else API Error + API-->>Connector: Error + Connector->>Connector: Try fallback configs + Connector->>API: recognize(fallback_config) + API-->>Connector: Result + Connector-->>Client: Response + end +``` + +**Audio Format Detection:** +- Magic byte pattern recognition +- Header parsing for metadata extraction +- Automatic format-specific configuration +- Deep scanning for ambiguous formats + +**Fallback Strategy:** +Multiple configurations tried automatically: +1. Detected format with detected parameters +2. Alternative encodings (LINEAR16, WEBM_OPUS) +3. Standard sample rates (8kHz, 16kHz, 44.1kHz, 48kHz) +4. Different recognition models (latest_long, phone_call, latest_short) + +#### 2. Translation Services + +**Features:** +- Automatic language detection +- HTML entity decoding +- Bidirectional translation +- Preserves text formatting + +**Translation Flow:** + +```mermaid +graph LR + A[Input Text] --> B[Google Translation API] + B --> C[Detect Source Language] + C --> D[Translate to Target] + D --> E[Decode HTML Entities] + E --> F[Return Result] + + style B fill:#e3f2fd +``` + +#### 3. Text-to-Speech Synthesis + +**Voice Selection:** +- Language-specific voices +- Gender-based voice selection +- Neural voice quality +- Multiple voice variants per language + +**Synthesis Process:** + +```mermaid +sequenceDiagram + participant Client + participant Connector + participant API as Google TTS API + + Client->>Connector: textToSpeech(text, language, voice) + + alt Voice Specified + Connector->>API: synthesize_speech(voice) + else No Voice + Connector->>Client: Error: no default voice + end + + API->>API: Generate audio + API-->>Connector: MP3 audio data + Connector-->>Client: Audio content + metadata +``` + +### Complete Pipeline: Speech-to-Translated-Text + +The connector provides an integrated pipeline: + +```mermaid +graph TD + A[Audio Input] --> B[Speech-to-Text] + B --> C[Original Text] + C --> D[Translation] + D --> E[Translated Text] + + B -.->|Confidence Score| F[Metadata] + D -.->|Source Language| F + F --> G[Complete Response] + + style A fill:#ffebee + style C fill:#fff3e0 + style E fill:#e8f5e9 + style G fill:#e1f5fe +``` + +**Use Cases:** +- Real-time voice translation +- Multilingual voice assistants +- International call centers +- Language learning applications + +### Authentication and Configuration + +**Credential Management:** +- Service account JSON key stored in configuration +- Parsed and loaded at initialization +- No file system dependency +- Credentials object creation from JSON + +**Configuration Parameters:** +- `Connector_GoogleSpeech_API_KEY_SECRET`: Service account JSON (encrypted) + +### Error Handling and Resilience + +**Retry Mechanisms:** +- Multiple encoding attempts +- Sample rate fallbacks +- Model fallbacks +- Graceful degradation + +**Validation:** +- Audio length verification +- Format compatibility checks +- Content quality analysis +- Silence detection + +### Integration Points + +```mermaid +graph TB + A[Routes Layer] --> B[VoiceObjects Interface] + B --> C[ConnectorGoogleSpeech] + + A1[/voice-google/speech-to-text] --> B + A2[/voice-google/translate] --> B + A3[/voice-google/text-to-speech] --> B + A4[/voice-google/languages] --> B + A5[/voice-google/voices] --> B + A6[WebSocket /ws/realtime-interpreter] --> B + + style A1 fill:#e8f5e9 + style A2 fill:#e8f5e9 + style A3 fill:#e8f5e9 + style A4 fill:#fff3e0 + style A5 fill:#fff3e0 + style A6 fill:#ffebee +``` + +--- + +## Ticket Connectors + +### Overview + +Ticket connectors provide unified access to external project management and ticketing systems. They enable bidirectional synchronization of tasks and tickets with external platforms. + +### Common Interface Pattern + +Both ticket connectors implement a common base pattern: + +**Core Operations:** +- `readAttributes()`: Fetch field metadata from the system +- `readTasks()`: Read tickets/tasks with pagination +- `writeTasks()`: Update tickets/tasks in bulk + +```mermaid +classDiagram + class TicketBase { + <> + +readAttributes() list~TicketFieldAttribute~ + +readTasks(limit) list~dict~ + +writeTasks(tasklist) None + } + + class ConnectorTicketJira { + -apiUsername: str + -apiToken: str + -apiUrl: str + -projectCode: str + -ticketType: str + +readAttributes() + +readTasks() + +writeTasks() + } + + class ConnectorTicketClickup { + -apiToken: str + -teamId: str + -listId: str + -apiUrl: str + +readAttributes() + +readTasks() + +writeTasks() + } + + TicketBase <|-- ConnectorTicketJira + TicketBase <|-- ConnectorTicketClickup +``` + +### JIRA Connector + +#### Authentication and Configuration + +**Required Parameters:** +- `apiUsername`: JIRA account username +- `apiToken`: API authentication token +- `apiUrl`: JIRA instance URL +- `projectCode`: Project identifier +- `ticketType`: Issue type filter + +#### Field Discovery + +```mermaid +sequenceDiagram + participant App + participant Connector + participant JIRA as JIRA API + + App->>Connector: readAttributes() + Connector->>JIRA: POST /search/jql + JIRA-->>Connector: Issue with all fields + + alt Fields Available + Connector->>Connector: Extract field mappings + Connector-->>App: List of attributes + else No Fields + Connector->>JIRA: GET /field + JIRA-->>Connector: All field definitions + Connector-->>App: Field list + end +``` + +**Field Mapping:** +- Maps JIRA field IDs to human-readable names +- Supports custom fields +- Handles complex field types (ADF, arrays, objects) + +#### Pagination Strategy + +The connector uses JIRA's cursor-based pagination: + +```mermaid +graph TD + A[Start] --> B[Initial Request] + B --> C{Issues Returned?} + C -->|No| D[End] + C -->|Yes| E[Process Issues] + E --> F{Has Next Page Token?} + F -->|No| D + F -->|Yes| G[Request Next Page] + G --> H{Safety Cap Reached?} + H -->|Yes| D + H -->|No| C + + style D fill:#e8f5e9 + style H fill:#ffebee +``` + +**Pagination Features:** +- Cursor-based continuation +- Duplicate detection +- Safety cap (1000 pages max) +- Configurable page size +- Loop prevention + +#### Task Updates + +**Update Flow:** + +```mermaid +sequenceDiagram + participant App + participant Connector + participant JIRA + + App->>Connector: writeTasks([task1, task2]) + + loop For each task + Connector->>Connector: Extract task ID + Connector->>Connector: Map fields + Connector->>Connector: Convert to ADF format + Connector->>JIRA: PUT /issue/{id} + + alt Success + JIRA-->>Connector: 204 No Content + else Error + JIRA-->>Connector: Error response + Connector->>Connector: Log error + end + end + + Connector-->>App: Complete +``` + +**Field Processing:** +- Automatic ADF (Atlassian Document Format) conversion for rich text +- Custom field handling +- Empty field validation +- Selective field updates + +### ClickUp Connector + +#### Authentication and Configuration + +**Required Parameters:** +- `apiToken`: ClickUp API token +- `teamId`: Workspace/team identifier +- `listId`: Optional list filter +- `apiUrl`: API endpoint (default: https://api.clickup.com/api/v2) + +#### Hierarchical Data Access + +```mermaid +graph TD + A[Team Level] --> B[Space Level] + B --> C[Folder Level] + C --> D[List Level] + D --> E[Task Level] + E --> F[Subtask Level] + + G[Connector] -.->|listId specified| D + G -.->|listId not specified| A + + style G fill:#fff3e0 +``` + +**Access Patterns:** +- List-specific access when `listId` provided +- Team-wide search when no `listId` +- Automatic subtask inclusion + +#### Field Discovery + +ClickUp provides both: +1. **Custom Fields**: From list-specific field API +2. **Core Fields**: Standard task properties + +```mermaid +graph LR + A[readAttributes] --> B{listId Present?} + B -->|Yes| C[GET /list/id/field] + B -->|No| D[Return Core Fields Only] + + C --> E[Merge Custom + Core Fields] + D --> F[Return Fields] + E --> F + + style C fill:#e3f2fd +``` + +**Core Fields:** +- ID +- Name +- Status +- Assignees +- Date Created +- Due Date + +#### Task Retrieval + +**Pagination:** +- Page-based pagination +- Configurable page size (100 default) +- Automatic page iteration + +```mermaid +sequenceDiagram + participant Connector + participant API as ClickUp API + + loop Until no more tasks + Connector->>API: GET /list/id/task?page={n} + API-->>Connector: Tasks array + + alt Tasks returned < page size + Connector->>Connector: Stop pagination + else More tasks possible + Connector->>Connector: Increment page + end + end +``` + +#### Task Updates + +**Update Strategy:** + +```mermaid +graph TD + A[Task Update Request] --> B[Extract Task ID] + B --> C[Extract Fields] + C --> D{Field Type?} + + D -->|name/summary| E[Update name] + D -->|status| F[Update status] + D -->|custom field| G[Add to custom_fields array] + D -->|other| H[Add to description] + + E --> I[Build Payload] + F --> I + G --> I + H --> I + + I --> J[PUT /task/id] + + style J fill:#e3f2fd +``` + +**Field Mapping:** +- Heuristic field name matching +- Custom field special handling +- Best-effort unknown field mapping + +### Ticket Interface Integration + +The ticket connectors are wrapped by the `TicketInterface` for field mapping: + +```mermaid +graph TB + A[Workflow/Feature] --> B[TicketService] + B --> C[TicketInterface] + C --> D[Connector Factory] + + D -->|connectorType='Jira'| E[ConnectorTicketJira] + D -->|connectorType='ClickUp'| F[ConnectorTicketClickup] + + C --> G[Task Sync Definition] + G --> H[Field Mapping] + + E --> I[External System] + F --> I + + style C fill:#fff3e0 + style G fill:#e8f5e9 +``` + +**Task Sync Definition:** +- Maps internal field names to external field paths +- Specifies read/write directions +- Handles nested field access +- Enables field transformations + +**Example Flow:** + +```mermaid +sequenceDiagram + participant Workflow + participant Interface as TicketInterface + participant Connector + participant External as External System + + Workflow->>Interface: exportTicketsAsList() + Interface->>Connector: readTasks() + Connector->>External: API Request + External-->>Connector: Raw tickets + Connector-->>Interface: Tickets list + Interface->>Interface: _transformTicketRecords() + Interface-->>Workflow: Transformed data + + Note over Interface: Applies field mapping
from sync definition +``` + +--- + +## Integration Patterns + +### Connector Initialization Patterns + +#### 1. Singleton Pattern (Database Connectors) + +Database connectors use singleton-like patterns through interface factories: + +```mermaid +graph TD + A[Request 1] --> B[getAppInterface] + C[Request 2] --> B + D[Request 3] --> B + + B --> E{Instance Exists?} + E -->|No| F[Create AppObjects] + E -->|Yes| G[Return Cached Instance] + + F --> H[Initialize DatabaseConnector] + H --> I[Store in _gatewayInterfaces] + + G --> J[Return Interface] + I --> J + + style B fill:#fff3e0 + style I fill:#e8f5e9 +``` + +**Benefits:** +- Reuses database connections +- Maintains context consistency +- Reduces initialization overhead +- Per-user instances for security + +#### 2. Factory Pattern (Ticket Connectors) + +Ticket connectors use factory pattern for runtime selection: + +```mermaid +graph TD + A[Service Request] --> B[createTicketInterfaceByType] + B --> C{Connector Type?} + + C -->|'jira'| D[Import JIRA Connector] + C -->|'clickup'| E[Import ClickUp Connector] + C -->|unknown| F[Raise ValueError] + + D --> G[Create Connector Instance] + E --> G + + G --> H[Wrap in TicketInterface] + H --> I[Return Interface] + + style B fill:#fff3e0 +``` + +**Advantages:** +- Runtime connector selection +- Easy addition of new connectors +- Consistent interface wrapping +- Configuration-driven behavior + +#### 3. Dependency Injection (Voice Connector) + +Voice connector uses lazy initialization with dependency injection: + +```mermaid +sequenceDiagram + participant Route + participant Interface as VoiceObjects + participant Connector as ConnectorGoogleSpeech + + Route->>Interface: getVoiceInterface(user) + Interface->>Interface: _getGoogleSpeechConnector() + + alt First Call + Interface->>Connector: __init__() + Connector->>Connector: Load credentials + Connector->>Connector: Initialize clients + Connector-->>Interface: Connector instance + Interface->>Interface: Cache connector + else Subsequent Calls + Interface-->>Route: Cached connector + end + + Interface-->>Route: Voice interface +``` + +### Context Management Pattern + +All connectors support context updates for audit trails: + +```mermaid +graph LR + A[User Login] --> B[Create Interface] + B --> C[Initialize Connector] + C --> D[Set userId Context] + + E[User Switch] --> F[updateContext] + F --> G[Update userId] + F --> H[Clear Caches] + + I[All Operations] --> J[Include userId in metadata] + J --> K[_createdBy] + J --> L[_modifiedBy] + + style D fill:#e8f5e9 + style G fill:#fff3e0 +``` + +**Context Metadata:** +- `_createdBy`: User who created record +- `_modifiedBy`: User who last modified record +- `_createdAt`: Creation timestamp +- `_modifiedAt`: Modification timestamp + +### Error Handling Pattern + +Connectors implement consistent error handling: + +```mermaid +graph TD + A[Operation Start] --> B{Try Operation} + B -->|Success| C[Return Result] + B -->|Error| D[Log Error] + + D --> E{Retry Possible?} + E -->|Yes| F[Execute Fallback] + E -->|No| G[Return Error Response] + + F --> H{Success?} + H -->|Yes| C + H -->|No| G + + G --> I[Structured Error Response] + + style C fill:#e8f5e9 + style G fill:#ffebee +``` + +**Error Response Structure:** +- Consistent dictionary format +- `success`: Boolean indicator +- `error`: Descriptive error message +- Additional context fields +- No exceptions propagated to application layer + +--- + +## Configuration + +### Database Configuration + +Each database interface reads specific configuration keys: + +**App Database (User/Mandate Management):** +- `DB_APP_HOST`: Database host or file path +- `DB_APP_DATABASE`: Database name +- `DB_APP_USER`: Database username +- `DB_APP_PASSWORD_SECRET`: Encrypted password +- `DB_APP_PORT`: Database port (default: 5432) + +**Chat Database:** +- `DB_CHAT_HOST` +- `DB_CHAT_DATABASE` +- `DB_CHAT_USER` +- `DB_CHAT_PASSWORD_SECRET` +- `DB_CHAT_PORT` + +**Management Database:** +- `DB_MANAGEMENT_HOST` +- `DB_MANAGEMENT_DATABASE` +- `DB_MANAGEMENT_USER` +- `DB_MANAGEMENT_PASSWORD_SECRET` +- `DB_MANAGEMENT_PORT` + +**Real Estate Database:** +- `DB_REAL_ESTATE_HOST` +- `DB_REAL_ESTATE_DATABASE` +- `DB_REAL_ESTATE_USER` +- `DB_REAL_ESTATE_PASSWORD_SECRET` +- `DB_REAL_ESTATE_PORT` + +### Voice Connector Configuration + +**Google Cloud Credentials:** +- `Connector_GoogleSpeech_API_KEY_SECRET`: Complete service account JSON key (encrypted) + +### Ticket Connector Configuration + +Ticket connectors receive configuration at runtime through `connectorParams`: + +**JIRA Configuration:** +- `apiUsername`: JIRA username +- `apiToken`: API token +- `apiUrl`: JIRA instance URL +- `projectCode`: Project key +- `ticketType`: Issue type filter + +**ClickUp Configuration:** +- `apiToken`: ClickUp API token +- `teamId`: Workspace ID +- `listId`: Optional list ID +- `apiUrl`: API base URL + +### Configuration Flow + +```mermaid +sequenceDiagram + participant Config as config.ini + participant Security as Security Module + participant Interface + participant Connector + + Config->>Security: Read encrypted values + Security->>Security: Decrypt secrets + Security-->>Interface: Configuration values + + Interface->>Connector: Initialize with config + Connector->>Connector: Validate configuration + + alt Valid Config + Connector->>Connector: Establish connections + Connector-->>Interface: Ready + else Invalid Config + Connector-->>Interface: Raise Exception + end +``` + +--- + +## Design Principles + +### 1. Abstraction and Encapsulation + +**Principle:** Hide implementation details behind consistent interfaces. + +```mermaid +graph LR + A[Application Code] --> B[Interface Layer] + B -.->|Never directly accesses| C[Connector Details] + B --> D[Public API] + D --> C + + style B fill:#e8f5e9 + style C fill:#ffebee +``` + +**Benefits:** +- Technology independence +- Easy testing with mocks +- Simplified application code +- Future-proof architecture + +### 2. Duck Typing over Formal Interfaces + +**Rationale:** Python's duck typing provides flexibility without interface boilerplate. + +Both database connectors provide identical methods without inheriting from a common base class. This allows: +- Natural Python idioms +- Easy addition of connector-specific features +- No multiple inheritance complexity +- Freedom in implementation + +### 3. Configuration over Code + +**Principle:** Behavior should be configurable without code changes. + +```mermaid +graph TD + A[Deployment Requirements] --> B[Configuration Files] + B --> C[Runtime Behavior] + + B1[Development] --> B + B2[Staging] --> B + B3[Production] --> B + + C --> C1[Connector Selection] + C --> C2[Connection Parameters] + C --> C3[Feature Flags] + + style B fill:#fff3e0 +``` + +**Implementation:** +- External configuration files +- Environment-specific settings +- Encrypted secrets support +- No hardcoded credentials + +### 4. Fail-Safe Defaults + +**Principle:** System should work out-of-the-box with sensible defaults. + +**Examples:** +- JSON connector for development (no DB setup) +- Default sample rates for audio processing +- Automatic format detection +- Graceful degradation + +### 5. Explicit Error Handling + +**Principle:** Errors should be caught, logged, and returned as data structures. + +```mermaid +graph TD + A[Operation] --> B{Success?} + B -->|Yes| C[Return Success Response] + B -->|No| D[Catch Exception] + + D --> E[Log Error with Context] + E --> F[Create Error Response] + F --> G[Return Error Structure] + + style C fill:#e8f5e9 + style G fill:#ffebee +``` + +**Benefits:** +- No unexpected exceptions +- Consistent error format +- Rich error context for debugging +- Application can handle errors gracefully + +### 6. Single Responsibility + +**Principle:** Each connector has one clear purpose. + +- **Database Connectors**: Only handle data persistence +- **Voice Connector**: Only handle voice processing +- **Ticket Connectors**: Only handle external ticket systems + +Business logic, validation, and transformations belong in higher layers. + +### 7. Dependency Inversion + +**Principle:** High-level modules don't depend on low-level modules. + +```mermaid +graph TD + A[Workflow Layer] --> B[Service Layer] + B --> C[Interface Layer] + C --> D[Connector Layer] + + A -.->|Does not depend on| D + B -.->|Does not depend on| D + + style A fill:#e3f2fd + style D fill:#e8f5e9 +``` + +The application depends on interfaces (duck-typed contracts), not concrete implementations. + +### 8. Idempotency Where Possible + +**Principle:** Operations should be safe to retry. + +**Implementation:** +- Record updates are idempotent (same result if repeated) +- Duplicate detection in pagination +- Transaction rollback on errors +- Atomic file operations + +### 9. Progressive Enhancement + +**Principle:** Core functionality works simply; advanced features add complexity only when needed. + +**Examples:** +- Basic audio format → Automatic fallbacks +- Simple field mapping → Complex transformations +- Single database → Multiple database support +- Direct API calls → Retry logic + +### 10. Audit Trail by Design + +**Principle:** All data modifications tracked automatically. + +```mermaid +graph LR + A[Create/Modify Operation] --> B[Add Metadata] + B --> C[_createdAt] + B --> D[_createdBy] + B --> E[_modifiedAt] + B --> F[_modifiedBy] + + G[User Context] --> B + H[Current Timestamp] --> B + + style B fill:#fff3e0 +``` + +**Benefits:** +- Automatic compliance +- Debugging support +- Security auditing +- User accountability + +--- + +## Summary + +The Connectors component provides a robust, flexible abstraction layer for external system integration. Key strengths include: + +- **Technology Independence**: Application code unaware of specific storage or service implementations +- **Flexibility**: Easy swapping between implementations without code changes +- **Reliability**: Comprehensive error handling and retry mechanisms +- **Performance**: Optimized for each technology (caching for JSON, connection pooling for PostgreSQL) +- **Maintainability**: Clear separation of concerns and consistent patterns +- **Extensibility**: New connectors can be added with minimal impact + +The component enables the application to work seamlessly across different deployment scenarios while maintaining clean architecture and separation of concerns. + diff --git a/docs/code-documentation/datamodels-interfaces-component.md b/docs/code-documentation/datamodels-interfaces-component.md new file mode 100644 index 00000000..8fd749c2 --- /dev/null +++ b/docs/code-documentation/datamodels-interfaces-component.md @@ -0,0 +1,1832 @@ +# Datamodels and Interfaces Component + +## Overview + +The Datamodels and Interfaces components form the core data layer of the Gateway application. They provide a clean separation between data structures (datamodels) and data access logic (interfaces), enabling type-safe, maintainable, and scalable data operations throughout the application. + +## Component Architecture + +```mermaid +graph TB + subgraph "Application Layer" + App[Application
Routes, Services, Features] + end + + subgraph "Interfaces Layer" + IF_RealEstate[Real Estate Interface] + IF_Chat[Chat Interface] + IF_App[App Interface] + IF_Component[Component Interface] + IF_AI[AI Interface] + IF_Ticket[Ticket Interface] + IF_Voice[Voice Interface] + end + + subgraph "Access Control Layer" + AC_RealEstate[Real Estate Access] + AC_Chat[Chat Access] + AC_App[App Access] + AC_Component[Component Access] + end + + subgraph "Database Connectors" + Connector_Postgre[PostgreSQL Connector] + end + + subgraph "Databases" + DB_RealEstate[(Real Estate
Database)] + DB_Chat[(Chat
Database)] + DB_App[(App
Database)] + DB_Component[(Component
Database)] + end + + subgraph "External Systems" + External_AI[AI APIs
OpenAI, Anthropic, etc.] + External_Tickets[Ticket Systems
Jira, ClickUp] + External_Voice[Voice Services
Google Cloud] + end + + App -.->|uses| IF_RealEstate + App -.->|uses| IF_Chat + App -.->|uses| IF_App + App -.->|uses| IF_Component + App -.->|uses| IF_AI + App -.->|uses| IF_Ticket + App -.->|uses| IF_Voice + + IF_RealEstate --> AC_RealEstate + IF_Chat --> AC_Chat + IF_App --> AC_App + IF_Component --> AC_Component + + AC_RealEstate --> Connector_Postgre + AC_Chat --> Connector_Postgre + AC_App --> Connector_Postgre + AC_Component --> Connector_Postgre + + Connector_Postgre --> DB_RealEstate + Connector_Postgre --> DB_Chat + Connector_Postgre --> DB_App + Connector_Postgre --> DB_Component + + IF_AI --> External_AI + IF_Ticket --> External_Tickets + IF_Voice --> External_Voice + + IF_RealEstate -.->|uses| DM_RealEstate[Real Estate
Datamodels] + IF_Chat -.->|uses| DM_Chat[Chat
Datamodels] + IF_App -.->|uses| DM_UAM[User & Mandate
Datamodels] + IF_Component -.->|uses| DM_Files[File
Datamodels] + IF_AI -.->|uses| DM_AI[AI
Datamodels] +``` + +## Data Flow + +```mermaid +sequenceDiagram + participant Route as API Route + participant Service as Service Layer + participant Interface as Interface + participant Access as Access Control + participant Connector as Database Connector + participant DB as Database + + Route->>Service: Request with User Context + Service->>Interface: Initialize with User + Interface->>Access: Check Permissions + Access-->>Interface: Permission Granted + Service->>Interface: CRUD Operation + Interface->>Access: Validate Access + Access-->>Interface: Access Validated + Interface->>Connector: Execute Query + Connector->>DB: SQL Query + DB-->>Connector: Result Set + Connector-->>Interface: Data Objects + Interface->>Access: Apply Filtering + Access-->>Interface: Filtered Data + Interface-->>Service: Datamodel Instances + Service-->>Route: Response Data +``` + +## Component Structure + +### Datamodels Structure + +``` +modules/datamodels/ +├── datamodelRealEstate.py # Real estate domain models +├── datamodelChat.py # Chat workflow models +├── datamodelAi.py # AI operation models +├── datamodelUam.py # User and mandate models +├── datamodelSecurity.py # Security and authentication models +├── datamodelFiles.py # File management models +├── datamodelDocument.py # Document structure models +├── datamodelExtraction.py # Content extraction models +├── datamodelPagination.py # Pagination models +├── datamodelVoice.py # Voice settings models +├── datamodelTickets.py # Ticket system models +├── datamodelNeutralizer.py # Data neutralization models +├── datamodelTools.py # Tool definitions +├── datamodelUtils.py # Utility models +└── __init__.py # Package exports +``` + +### Interfaces Structure + +``` +modules/interfaces/ +├── interfaceDbRealEstateObjects.py # Real estate data access +├── interfaceDbRealEstateAccess.py # Real estate access control +├── interfaceDbChatObjects.py # Chat data access +├── interfaceDbChatAccess.py # Chat access control +├── interfaceDbAppObjects.py # App/user management access +├── interfaceDbAppAccess.py # App access control +├── interfaceDbComponentObjects.py # Component management access +├── interfaceDbComponentAccess.py # Component access control +├── interfaceAiObjects.py # AI operations interface +├── interfaceTicketObjects.py # Ticket system interface +└── interfaceVoiceObjects.py # Voice operations interface +``` + +--- + +## Datamodels Component + +### datamodelRealEstate.py + +```mermaid +erDiagram + Projekt { + string id PK + string mandateId + string label + string statusProzess + json perimeter + json baulinie + json parzellen + json dokumente + json kontextInformationen + } + + Parzelle { + string id PK + string mandateId + string label + string kontextGemeinde FK + json perimeter + json baulinie + string bauzone + float az + float bz + json dokumente + json kontextInformationen + } + + Land { + string id PK + string mandateId + string label + string abk + json dokumente + json kontextInformationen + } + + Kanton { + string id PK + string mandateId + string label + string id_land FK + string abk + json dokumente + json kontextInformationen + } + + Gemeinde { + string id PK + string mandateId + string label + string id_kanton FK + string plz + json dokumente + json kontextInformationen + } + + Dokument { + string id PK + string mandateId + string label + string versionsbezeichnung + string dokumentTyp + string dokumentReferenz + string quelle + string mimeType + json kategorienTags + } + + Kontext { + string id PK + string thema + string inhalt + } + + GeoPunkt { + string koordinatensystem + float x + float y + float z + string referenz + } + + GeoPolylinie { + string id PK + bool closed + json punkte + } + + Projekt ||--o{ Parzelle : contains + Projekt ||--o{ Dokument : references + Projekt ||--o{ Kontext : has + + Parzelle ||--o{ GeoPolylinie : contains + Parzelle ||--o{ GeoPunkt : contains + Parzelle }o--|| Gemeinde : located_in + + Land ||--o{ Kanton : contains + Kanton ||--o{ Gemeinde : contains + + Land ||--o{ Dokument : has + Land ||--o{ Kontext : has + Kanton ||--o{ Dokument : has + Kanton ||--o{ Kontext : has + Gemeinde ||--o{ Dokument : has + Gemeinde ||--o{ Kontext : has + + GeoPolylinie ||--o{ GeoPunkt : contains +``` + +### datamodelChat.py + +```mermaid +erDiagram + ChatWorkflow { + string id PK + string mandateId + string status + string name + int currentRound + int currentTask + int currentAction + int totalTasks + int totalActions + float lastActivity + float startedAt + string workflowMode + int maxSteps + json logs + json messages + json stats + json tasks + } + + ChatMessage { + string id PK + string workflowId FK + string parentMessageId FK + string message + string summary + string role + string status + int sequenceNr + float publishedAt + bool success + string actionId + json documents + } + + ChatLog { + string id PK + string workflowId FK + string message + string type + float timestamp + string status + float progress + json performance + } + + ChatStat { + string id PK + string workflowId FK + float processingTime + int bytesSent + int bytesReceived + int errorCount + string process + string engine + float priceUsd + } + + ChatDocument { + string id PK + string messageId FK + string fileId FK + string fileName + int fileSize + string mimeType + int roundNumber + int taskNumber + int actionNumber + string actionId + } + + TaskPlan { + string overview + json tasks + string userMessage + } + + TaskItem { + string id PK + string workflowId FK + string userInput + string status + string error + float startedAt + float finishedAt + json actionList + int retryCount + int retryMax + bool rollbackOnFailure + json dependencies + string feedback + float processingTime + json resultLabels + } + + TaskStep { + string id PK + string objective + json dependencies + json successCriteria + string estimatedComplexity + string userMessage + string dataType + json expectedFormats + json qualityRequirements + } + + ActionItem { + string id PK + string execMethod + string execAction + json execParameters + string execResultLabel + json expectedDocumentFormats + string userMessage + string status + string error + int retryCount + int retryMax + float processingTime + float timestamp + string result + } + + ActionResult { + bool success + string error + json documents + string resultLabel + } + + AutomationDefinition { + string id PK + string mandateId + string label + string schedule + string template + json placeholders + bool active + string eventId + string status + json executionLogs + } + + ChatWorkflow ||--o{ ChatMessage : contains + ChatWorkflow ||--o{ ChatLog : contains + ChatWorkflow ||--o{ ChatStat : contains + ChatWorkflow ||--o{ TaskPlan : has + ChatWorkflow ||--o{ AutomationDefinition : defines + + ChatMessage ||--o{ ChatDocument : references + TaskPlan ||--o{ TaskStep : contains + TaskItem ||--o{ ActionItem : contains + ActionItem ||--o{ ActionResult : produces +``` + +### datamodelAi.py + +```mermaid +erDiagram + AiModel { + string name PK + string displayName + string connectorType + string apiUrl + float temperature + int maxTokens + int contextLength + float costPer1kTokensInput + float costPer1kTokensOutput + int speedRating + int qualityRating + string priority + string processingMode + json operationTypes + int minContextLength + bool isAvailable + string version + string lastUpdated + } + + OperationTypeRating { + string operationType + int rating + } + + AiCallOptions { + string operationType + string priority + bool compressPrompt + bool compressContext + bool processDocumentsIndividually + float maxCost + int maxProcessingTime + string processingMode + string resultFormat + float safetyMargin + float temperature + int maxParts + } + + AiCallRequest { + string prompt + string context + json options + json contentParts + } + + AiCallResponse { + string content + string modelName + float priceUsd + float processingTime + int bytesSent + int bytesReceived + int errorCount + } + + AiModelCall { + json messages + json model + json options + } + + AiModelResponse { + string content + bool success + string error + string modelId + float processingTime + json tokensUsed + json metadata + } + + AiModel ||--o{ OperationTypeRating : has + AiCallRequest ||--|| AiCallOptions : uses + AiCallRequest ||--|| AiCallResponse : produces + AiCallRequest }o--|| AiModel : uses + AiModelCall }o--|| AiModel : uses + AiModelCall ||--|| AiCallOptions : uses + AiModelCall ||--|| AiModelResponse : produces +``` + +### datamodelUam.py + +```mermaid +erDiagram + Mandate { + string id PK + string name + string language + bool enabled + } + + User { + string id PK + string username + string email + string fullName + string language + bool enabled + string privilege + string authenticationAuthority + string mandateId FK + } + + UserConnection { + string id PK + string userId FK + string authority + string externalId + string externalUsername + string externalEmail + string status + float connectedAt + float lastChecked + float expiresAt + string tokenStatus + float tokenExpiresAt + } + + UserInDB { + string id PK + string username + string email + string fullName + string language + bool enabled + string privilege + string authenticationAuthority + string mandateId FK + string hashedPassword + } + + Mandate ||--o{ User : contains + User ||--o{ UserConnection : has + User ||--|| UserInDB : extends +``` + +### datamodelSecurity.py + +```mermaid +erDiagram + Token { + string id PK + string userId FK + string authority + string connectionId FK + string tokenAccess + string tokenType + float expiresAt + string tokenRefresh + float createdAt + string status + float revokedAt + string revokedBy + string reason + string sessionId + string mandateId FK + } + + AuthEvent { + string id PK + string userId FK + string eventType + float timestamp + string ipAddress + string userAgent + bool success + string details + } + + Token ||--o{ AuthEvent : generates +``` + +### datamodelFiles.py + +```mermaid +erDiagram + FileItem { + string id PK + string mandateId FK + string fileName + string mimeType + string fileHash + int fileSize + float creationDate + } + + FilePreview { + string content + string mimeType + string fileName + bool isText + string encoding + int size + } + + FileData { + string id PK + string data + bool base64Encoded + } + + FileItem ||--|| FilePreview : generates + FileItem ||--|| FileData : contains +``` + +### datamodelDocument.py + +```mermaid +erDiagram + StructuredDocument { + json metadata + json sections + string summary + json tags + } + + DocumentMetadata { + string title + string author + datetime createdAt + json sourceDocuments + string extractionMethod + string version + } + + DocumentSection { + string id PK + string title + string contentType + json elements + int order + json metadata + } + + Paragraph { + string text + json formatting + json metadata + } + + Heading { + string text + int level + json metadata + } + + CodeBlock { + string code + string language + json metadata + } + + Image { + string data + string altText + string caption + json metadata + } + + BulletList { + json items + string listType + json metadata + } + + ListItem { + string text + json subitems + json metadata + } + + TableData { + json headers + json rows + string caption + json metadata + } + + StructuredDocument ||--|| DocumentMetadata : has + StructuredDocument ||--o{ DocumentSection : contains + DocumentSection ||--o{ Paragraph : can_contain + DocumentSection ||--o{ Heading : can_contain + DocumentSection ||--o{ CodeBlock : can_contain + DocumentSection ||--o{ Image : can_contain + DocumentSection ||--o{ BulletList : can_contain + DocumentSection ||--o{ TableData : can_contain + BulletList ||--o{ ListItem : contains + ListItem ||--o{ ListItem : contains +``` + +### datamodelExtraction.py + +```mermaid +erDiagram + ContentExtracted { + string id PK + json parts + json summary + } + + ContentPart { + string id PK + string parentId FK + string label + string typeGroup + string mimeType + string data + json metadata + } + + ExtractionOptions { + string prompt + string operationType + bool processDocumentsIndividually + int imageMaxPixels + int imageQuality + json mergeStrategy + bool chunkAllowed + int maxSize + int textChunkSize + int imageChunkSize + bool enableParallelProcessing + int maxConcurrentChunks + } + + MergeStrategy { + string groupBy + string orderBy + string mergeType + int maxSize + json textMerge + json tableMerge + json structureMerge + json aiResultMerge + bool preserveChunks + string chunkSeparator + bool preserveMetadata + json metadataFields + string onError + bool validateContent + bool useIntelligentMerging + string prompt + json capabilities + } + + PartResult { + json originalPart + string aiResult + int partIndex + string documentId + float processingTime + json metadata + } + + ChunkResult { + json originalChunk + string aiResult + int chunkIndex + string documentId + float processingTime + json metadata + } + + ContentExtracted ||--o{ ContentPart : contains + ContentPart ||--o{ ContentPart : parent_of + ContentExtracted ||--|| ExtractionOptions : uses + ExtractionOptions ||--|| MergeStrategy : uses + ContentPart ||--|| PartResult : produces + ContentPart ||--|| ChunkResult : produces +``` + +### datamodelPagination.py + +```mermaid +erDiagram + PaginationParams { + int page + int pageSize + string sortBy + string sortOrder + } + + PaginationRequest { + json params + json sortFields + } + + SortField { + string field + string order + } + + PaginatedResult { + json items + json metadata + json params + } + + PaginationMetadata { + int page + int pageSize + int totalItems + int totalPages + bool hasNext + bool hasPrevious + } + + PaginationRequest ||--|| PaginationParams : uses + PaginationRequest ||--o{ SortField : contains + PaginatedResult ||--|| PaginationMetadata : has + PaginatedResult ||--o{ PaginationParams : uses +``` + +### datamodelVoice.py + +```mermaid +erDiagram + VoiceSettings { + string id PK + string userId FK + string language + string voice + json settings + } +``` + +### datamodelTickets.py + +```mermaid +erDiagram + TicketFieldAttribute { + string fieldName PK + string fieldType + json fieldConfig + } +``` + +### datamodelNeutralizer.py + +```mermaid +erDiagram + DataNeutraliserConfig { + string id PK + string mandateId FK + string name + bool enabled + json attributes + } + + DataNeutralizerAttributes { + string fieldName PK + string neutralizationType + json options + } + + DataNeutraliserConfig ||--o{ DataNeutralizerAttributes : contains +``` + +### datamodelUtils.py + +```mermaid +erDiagram + Prompt { + string id PK + string name + string content + json metadata + } +``` + +### datamodelTools.py + +```mermaid +erDiagram + CountryCodes { + string ISO2Code PK + string tavilyName + string perplexityName + } +``` + +**Note**: `CountryCodes` is a utility class (not a Pydantic BaseModel) that provides static methods for country code mapping. It contains a mapping dictionary but is not persisted to the database. + +### datamodelJson.py + +No database models - contains JSON template constants and supported section types. + +--- + +## Interfaces Component + +### Overview + +The Interfaces component provides a clean abstraction layer for data access operations. Interfaces handle CRUD operations, user context management, access control, and integration with database connectors and external systems. + +### Objects vs Access Files + +Interfaces are split into two file types: + +#### Objects Files (`interface*Objects.py`) + +**Purpose**: Business logic and CRUD operations for data entities. + +**Responsibilities**: +- CRUD operations (Create, Read, Update, Delete) +- Data validation and transformation +- Business rule enforcement +- Database/external system communication +- User context management +- Pagination and filtering + +**Pattern**: Each Objects file contains methods for manipulating domain entities (e.g., `createProjekt()`, `getWorkflow()`, `updateUser()`). + +#### Access Files (`interface*Access.py`) + +**Purpose**: Permission checking and data filtering based on user privileges. + +**Responsibilities**: +- User privilege validation +- Mandate-based filtering +- Record ownership checking +- Access control attribute generation (`_hideView`, `_hideEdit`, `_hideDelete`) +- Permission decision logic + +**Pattern**: Access files contain two main methods: +- `uam()`: Unified Access Management - filters recordsets and adds access control flags +- `canModify()`: Checks if user can create/update/delete records + +**Relationship**: + +```mermaid +graph TB + Objects[Objects File] --> Access[Access File] + Access --> UAM[uam Method] + Access --> CanModify[canModify Method] + + Objects --> CRUD[CRUD Operations] + CRUD --> AccessCheck{Check Access} + AccessCheck -->|Read| UAM + AccessCheck -->|Write| CanModify + + UAM --> Filter[Filter Records] + UAM --> Flags[Add Access Flags] + + CanModify --> Permission{Permission?} + Permission -->|Yes| Allow[Allow Operation] + Permission -->|No| Deny[Deny Operation] +``` + +### Interface Types + +Interfaces are categorized into two types based on their data source: + +#### Database Interfaces + +**Why Database Connectors?**: These interfaces manage persistent data stored in PostgreSQL databases. They use database connectors to: +- Store structured data with relationships +- Ensure data consistency and integrity +- Provide ACID transactions +- Support complex queries and filtering +- Enable mandate-based data isolation + +**Characteristics**: +- Use `DatabaseConnector` for PostgreSQL access +- Implement Access classes for permission control +- Support pagination and sorting +- Apply mandate-based filtering automatically +- Track record ownership (`_createdBy`) + +#### External System Interfaces + +**Why External Connectors?**: These interfaces integrate with external APIs and services. They use connectors to: +- Communicate with third-party systems +- Transform data between formats +- Handle API authentication and rate limiting +- Provide abstraction over external service complexity + +**Characteristics**: +- Use specialized connectors (e.g., `ConnectorGoogleSpeech`, `ConnectorTicketJira`) +- May not require user context (system-level operations) +- Focus on data transformation and synchronization +- Handle external API errors and retries + +### Real Estate Interface (`interfaceDbRealEstateObjects.py`) + +**Type**: Database Interface +**Database**: PostgreSQL (Real Estate database) +**Access Control**: `interfaceDbRealEstateAccess.py` → `RealEstateAccess` + +**Purpose**: Manages real estate domain data including projects, parcels, administrative entities, and geographic information. + +**Why Database Connector**: Real estate data requires persistent storage with complex relationships (projects → parcels → administrative units), geographic data (polygons, points), and mandate-based isolation for multi-tenant scenarios. + +**CRUD Operations**: + +```mermaid +graph TB + subgraph "Projekt Operations" + PCreate[createProjekt] + PGet[getProjekt] + PGetAll[getProjekte] + PUpdate[updateProjekt] + PDelete[deleteProjekt] + end + + subgraph "Parzelle Operations" + ParCreate[createParzelle] + ParGet[getParzelle] + ParGetAll[getParzellen] + ParUpdate[updateParzelle] + ParDelete[deleteParzelle] + end + + subgraph "Dokument Operations" + DocCreate[createDokument] + DocGet[getDokument] + DocGetAll[getDokumente] + DocUpdate[updateDokument] + DocDelete[deleteDokument] + end + + subgraph "Administrative Hierarchy" + GemCreate[createGemeinde] + GemGet[getGemeinde] + GemGetAll[getGemeinden] + GemUpdate[updateGemeinde] + GemDelete[deleteGemeinde] + + KanCreate[createKanton] + KanGet[getKanton] + KanGetAll[getKantone] + KanUpdate[updateKanton] + KanDelete[deleteKanton] + + LanCreate[createLand] + LanGet[getLand] + LanGetAll[getLaender] + LanUpdate[updateLand] + LanDelete[deleteLand] + end + + subgraph "Kontext Operations" + KonCreate[createKontext] + KonGet[getKontext] + KonGetAll[getKontexte] + KonUpdate[updateKontext] + KonDelete[deleteKontext] + end +``` + +**Complete CRUD List**: + +**Projekt**: +- `createProjekt(projekt: Projekt) → Projekt` +- `getProjekt(projektId: str) → Optional[Projekt]` +- `getProjekte(recordFilter: Optional[Dict]) → List[Projekt]` +- `updateProjekt(projektId: str, updateData: Dict) → Optional[Projekt]` +- `deleteProjekt(projektId: str) → bool` + +**Parzelle**: +- `createParzelle(parzelle: Parzelle) → Parzelle` +- `getParzelle(parzelleId: str) → Optional[Parzelle]` +- `getParzellen(recordFilter: Optional[Dict]) → List[Parzelle]` +- `updateParzelle(parzelleId: str, updateData: Dict) → Optional[Parzelle]` +- `deleteParzelle(parzelleId: str) → bool` + +**Dokument**: +- `createDokument(dokument: Dokument) → Dokument` +- `getDokument(dokumentId: str) → Optional[Dokument]` +- `getDokumente(recordFilter: Optional[Dict]) → List[Dokument]` +- `updateDokument(dokumentId: str, updateData: Dict) → Optional[Dokument]` +- `deleteDokument(dokumentId: str) → bool` + +**Gemeinde**: +- `createGemeinde(gemeinde: Gemeinde) → Gemeinde` +- `getGemeinde(gemeindeId: str) → Optional[Gemeinde]` +- `getGemeinden(recordFilter: Optional[Dict]) → List[Gemeinde]` +- `updateGemeinde(gemeindeId: str, updateData: Dict) → Optional[Gemeinde]` +- `deleteGemeinde(gemeindeId: str) → bool` + +**Kanton**: +- `createKanton(kanton: Kanton) → Kanton` +- `getKanton(kantonId: str) → Optional[Kanton]` +- `getKantone(recordFilter: Optional[Dict]) → List[Kanton]` +- `updateKanton(kantonId: str, updateData: Dict) → Optional[Kanton]` +- `deleteKanton(kantonId: str) → bool` + +**Land**: +- `createLand(land: Land) → Land` +- `getLand(landId: str) → Optional[Land]` +- `getLaender(recordFilter: Optional[Dict]) → List[Land]` +- `updateLand(landId: str, updateData: Dict) → Optional[Land]` +- `deleteLand(landId: str) → bool` + +**Kontext**: +- `createKontext(kontext: Kontext) → Kontext` +- `getKontext(kontextId: str) → Optional[Kontext]` +- `getKontexte(recordFilter: Optional[Dict]) → List[Kontext]` +- `updateKontext(kontextId: str, updateData: Dict) → Optional[Kontext]` +- `deleteKontext(kontextId: str) → bool` + +**Access Control Flow**: + +```mermaid +graph TB + Request[CRUD Request] --> Objects[RealEstateObjects] + Objects --> Access[RealEstateAccess] + + Access --> CheckPriv[Check Privilege] + CheckPriv -->|SYSADMIN| AllData[All Records] + CheckPriv -->|ADMIN| MandateData[Mandate Records] + CheckPriv -->|USER| OwnData[Own Records] + + AllData --> UAM[uam Method] + MandateData --> UAM + OwnData --> UAM + + UAM --> Filter[Filter by mandateId] + UAM --> CheckOwn[Check _createdBy] + UAM --> AddFlags[Add _hideView/_hideEdit/_hideDelete] + + Filter --> Return[Return Filtered Data] + CheckOwn --> Return + AddFlags --> Return +``` + +**Key Features**: +- Geographic data support (GeoPolylinie, GeoPunkt) +- Multi-level administrative hierarchy (Land → Kanton → Gemeinde) +- Location name resolution (converts names to IDs for filtering) +- Document versioning and management +- Context information for projects and administrative units + +### Chat Interface (`interfaceDbChatObjects.py`) + +**Type**: Database Interface +**Database**: PostgreSQL (Chat database) +**Access Control**: `interfaceDbChatAccess.py` → `ChatAccess` + +**Purpose**: Manages chat workflows, messages, logs, statistics, and automation definitions for AI-powered conversation workflows. + +**Why Database Connector**: Chat workflows require persistent storage for conversation history, workflow state, performance metrics, and automation configurations. Data must be queryable, filterable, and mandate-isolated. + +**CRUD Operations**: + +```mermaid +graph TB + subgraph "ChatWorkflow Operations" + WfGet[getWorkflows - with pagination] + WfGetOne[getWorkflow - by ID] + WfCreate[createWorkflow] + WfUpdate[updateWorkflow] + WfDelete[deleteWorkflow - cascade] + end + + subgraph "ChatMessage Operations" + MsgGet[getMessages - by workflowId, pagination] + MsgCreate[createMessage] + MsgUpdate[updateMessage] + MsgDelete[deleteMessage] + MsgDeleteFile[deleteFileFromMessage] + end + + subgraph "ChatDocument Operations" + DocGet[getDocuments - by messageId] + DocCreate[createDocument] + end + + subgraph "ChatLog Operations" + LogGet[getLogs - by workflowId, pagination] + LogCreate[createLog] + end + + subgraph "ChatStat Operations" + StatGet[getStats - by workflowId] + StatCreate[createStat] + end + + subgraph "AutomationDefinition Operations" + AutoGet[getAllAutomationDefinitions - pagination] + AutoGetOne[getAutomationDefinition - by ID] + AutoCreate[createAutomationDefinition] + AutoUpdate[updateAutomationDefinition] + AutoDelete[deleteAutomationDefinition] + end + + subgraph "Utility Operations" + Unified[getUnifiedChatData - workflow snapshot] + end +``` + +**Complete CRUD List**: + +**ChatWorkflow**: +- `getWorkflows(pagination: Optional[PaginationParams]) → Union[List[Dict], PaginatedResult]` +- `getWorkflow(workflowId: str) → Optional[ChatWorkflow]` +- `createWorkflow(workflowData: Dict) → ChatWorkflow` +- `updateWorkflow(workflowId: str, workflowData: Dict) → ChatWorkflow` +- `deleteWorkflow(workflowId: str) → bool` (cascades to messages, logs, stats) + +**ChatMessage**: +- `getMessages(workflowId: str, pagination: Optional[PaginationParams]) → Union[List[ChatMessage], PaginatedResult]` +- `createMessage(messageData: Dict) → ChatMessage` +- `updateMessage(messageId: str, messageData: Dict) → Dict` +- `deleteMessage(workflowId: str, messageId: str) → bool` +- `deleteFileFromMessage(workflowId: str, messageId: str, fileId: str) → bool` + +**ChatDocument**: +- `getDocuments(messageId: str) → List[ChatDocument]` +- `createDocument(documentData: Dict) → ChatDocument` + +**ChatLog**: +- `getLogs(workflowId: str, pagination: Optional[PaginationParams]) → Union[List[ChatLog], PaginatedResult]` +- `createLog(logData: Dict) → ChatLog` + +**ChatStat**: +- `getStats(workflowId: str) → List[ChatStat]` +- `createStat(statData: Dict) → ChatStat` + +**AutomationDefinition**: +- `getAllAutomationDefinitions(pagination: Optional[PaginationParams]) → Union[List[Dict], PaginatedResult]` +- `getAutomationDefinition(automationId: str) → Optional[Dict]` +- `createAutomationDefinition(automationData: Dict) → Dict` +- `updateAutomationDefinition(automationId: str, automationData: Dict) → Dict` +- `deleteAutomationDefinition(automationId: str) → bool` + +**Utility Methods**: +- `getUnifiedChatData(workflowId: str, afterTimestamp: Optional[float]) → Dict` (returns workflow snapshot with messages, logs, stats) + +**Access Control Flow**: + +```mermaid +graph TB + Request[CRUD Request] --> Objects[ChatObjects] + Objects --> Access[ChatAccess] + + Access --> CheckPriv[Check Privilege] + CheckPriv -->|SYSADMIN| AllWorkflows[All Workflows] + CheckPriv -->|ADMIN| MandateWorkflows[Mandate Workflows] + CheckPriv -->|USER| OwnWorkflows[Own Workflows] + + AllWorkflows --> UAM[uam Method] + MandateWorkflows --> UAM + OwnWorkflows --> UAM + + UAM --> FilterWorkflow[Filter by workflowId mandate] + UAM --> CheckOwn[Check _createdBy] + UAM --> AddFlags[Add access flags] + + FilterWorkflow --> CheckChild[Check Child Access] + CheckChild -->|Message| CheckWorkflow[Check workflow ownership] + CheckChild -->|Log| CheckWorkflow + CheckChild -->|Stat| CheckWorkflow + + CheckWorkflow --> Return[Return Filtered Data] + CheckOwn --> Return + AddFlags --> Return +``` + +**Key Features**: +- Multi-round workflow support with state tracking +- Normalized data model (workflows, messages, logs, stats in separate tables) +- Cascade delete (deleting workflow removes all related data) +- Pagination support for large datasets +- Unified data retrieval for workflow snapshots +- Automation workflow definitions +- Document attachment management + +### App Interface (`interfaceDbAppObjects.py`) + +**Type**: Database Interface +**Database**: PostgreSQL (App database) +**Access Control**: `interfaceDbAppAccess.py` → `AppAccess` + +**Purpose**: Manages users, mandates, authentication tokens, and application-level configuration. + +**Why Database Connector**: User accounts, mandates, and authentication data require secure, persistent storage with strict access control. This is the foundation for all other interfaces' user context. + +**CRUD Operations**: + +```mermaid +graph TB + subgraph "User Operations" + UserGet[getUsersByMandate - pagination] + UserGetByUsername[getUserByUsername] + UserGetOne[getUser - by ID] + UserCreate[createUser - with password hash] + UserUpdate[updateUser] + UserDelete[deleteUser] + end + + subgraph "UserConnection Operations" + ConnGet[getUserConnections - by userId] + ConnGetToken[getConnectionToken - by connectionId] + end + + subgraph "Mandate Operations" + ManGet[getAllMandates - pagination] + ManGetOne[getMandate - by ID] + ManCreate[createMandate] + ManUpdate[updateMandate] + ManDelete[deleteMandate] + end + + subgraph "Neutralization Config" + NeuGet[getNeutralizationConfig] + NeuCreate[createOrUpdateNeutralizationConfig] + NeuGetAttrs[getNeutralizationAttributes] + NeuDeleteAttrs[deleteNeutralizationAttributes] + end + + subgraph "Initialization" + InitRoot[getRootInterface - system init] + InitRecords[_initRootMandate, _initAdminUser, _initEventUser] + end +``` + +**Complete CRUD List**: + +**User**: +- `getUsersByMandate(mandateId: str, pagination: Optional[PaginationParams]) → Union[List[User], PaginatedResult]` +- `getUserByUsername(username: str) → Optional[User]` +- `getUser(userId: str) → Optional[User]` +- `createUser(userData: Dict, password: Optional[str]) → User` (hashes password with Argon2) +- `updateUser(userId: str, updateData: Union[Dict, User]) → User` +- `deleteUser(userId: str) → bool` + +**UserConnection**: +- `getUserConnections(userId: str) → List[UserConnection]` +- `getConnectionToken(connectionId: str) → Optional[Token]` + +**Mandate**: +- `getAllMandates(pagination: Optional[PaginationParams]) → Union[List[Mandate], PaginatedResult]` +- `getMandate(mandateId: str) → Optional[Mandate]` +- `createMandate(name: str, language: str) → Mandate` +- `updateMandate(mandateId: str, updateData: Dict) → Mandate` +- `deleteMandate(mandateId: str) → bool` + +**DataNeutraliserConfig**: +- `getNeutralizationConfig() → Optional[DataNeutraliserConfig]` +- `createOrUpdateNeutralizationConfig(configData: Dict) → DataNeutraliserConfig` +- `getNeutralizationAttributes(file_id: str) → List[Dict]` +- `deleteNeutralizationAttributes(file_id: str) → bool` + +**Special Methods**: +- `getRootInterface() → AppObjects` (creates interface with system admin privileges for initialization) +- `getInitialId(model_class: type) → Optional[str]` (gets first record ID for a model) + +**Access Control Flow**: + +```mermaid +graph TB + Request[CRUD Request] --> Objects[AppObjects] + Objects --> Access[AppAccess] + + Access --> CheckPriv[Check Privilege] + CheckPriv -->|SYSADMIN| FullAccess[Full System Access] + CheckPriv -->|ADMIN| MandateAccess[Mandate Access Only] + CheckPriv -->|USER| SelfAccess[Own User Record Only] + + FullAccess --> UserOps[User Operations] + MandateAccess --> UserOps + SelfAccess --> UserOps + + UserOps --> CheckOwn[Check Ownership] + CheckOwn -->|User CRUD| SYSADMINOnly{SYSADMIN?} + CheckOwn -->|Mandate CRUD| SYSADMINOnly + CheckOwn -->|Own Profile| Allow[Allow] + + SYSADMINOnly -->|Yes| Allow + SYSADMINOnly -->|No| Deny[Deny] +``` + +**Key Features**: +- Password hashing with Argon2 +- Multi-provider authentication support (local, external) +- System initialization (Root mandate, Admin user, Event user) +- Mandate-based user isolation +- Token management for external connections +- Data neutralization configuration + +### Component Interface (`interfaceDbComponentObjects.py`) + +**Type**: Database Interface +**Database**: PostgreSQL (Component/Management database) +**Access Control**: `interfaceDbComponentAccess.py` → `ComponentAccess` + +**Purpose**: Manages component-level data including files, prompts, and voice settings used across the application. + +**Why Database Connector**: Files, prompts, and voice settings require persistent storage with metadata, ownership tracking, and mandate isolation. Files need binary storage with preview generation. + +**CRUD Operations**: + +```mermaid +graph TB + subgraph "File Operations" + FileGetAll[getAllFiles - pagination] + FileGet[getFile - by ID] + FileCreate[createFile - name, mimeType, content] + FileUpdate[updateFile] + FileDelete[deleteFile] + FileGetData[getFileData - binary content] + FileGetContent[getFileContent - preview] + FileCreateData[createFileData - store binary] + end + + subgraph "Prompt Operations" + PromptGetAll[getAllPrompts - pagination] + PromptGet[getPrompt - by ID] + PromptCreate[createPrompt] + PromptUpdate[updatePrompt] + PromptDelete[deletePrompt] + end + + subgraph "VoiceSettings Operations" + VoiceGet[getVoiceSettings - by userId] + VoiceCreate[createVoiceSettings] + VoiceUpdate[updateVoiceSettings] + VoiceDelete[deleteVoiceSettings] + VoiceGetOrCreate[getOrCreateVoiceSettings] + end + + subgraph "Utilities" + MimeType[getMimeType - from fileName] + end +``` + +**Complete CRUD List**: + +**FileItem**: +- `getAllFiles(pagination: Optional[PaginationParams]) → Union[List[FileItem], PaginatedResult]` +- `getFile(fileId: str) → Optional[FileItem]` +- `createFile(name: str, mimeType: str, content: bytes) → FileItem` (creates FileItem and FileData) +- `updateFile(fileId: str, updateData: Dict) → Dict` +- `deleteFile(fileId: str) → bool` (deletes FileItem and FileData) +- `getFileData(fileId: str) → Optional[bytes]` (raw binary content) +- `getFileContent(fileId: str) → Optional[FilePreview]` (generates preview) +- `createFileData(fileId: str, data: bytes) → bool` (stores binary data) + +**Prompt**: +- `getAllPrompts(pagination: Optional[PaginationParams]) → Union[List[Prompt], PaginatedResult]` +- `getPrompt(promptId: str) → Optional[Prompt]` +- `createPrompt(promptData: Dict) → Dict` +- `updatePrompt(promptId: str, updateData: Dict) → Dict` +- `deletePrompt(promptId: str) → bool` + +**VoiceSettings**: +- `getVoiceSettings(userId: Optional[str]) → Optional[VoiceSettings]` +- `createVoiceSettings(settingsData: Dict) → Dict` +- `updateVoiceSettings(userId: str, updateData: Dict) → Dict` +- `deleteVoiceSettings(userId: str) → bool` +- `getOrCreateVoiceSettings(userId: Optional[str]) → VoiceSettings` + +**Utility Methods**: +- `getMimeType(fileName: str) → str` (detects MIME type from extension) + +**Access Control Flow**: + +```mermaid +graph TB + Request[CRUD Request] --> Objects[ComponentObjects] + Objects --> Access[ComponentAccess] + + Access --> CheckPriv[Check Privilege] + CheckPriv -->|SYSADMIN| AllFiles[All Files] + CheckPriv -->|ADMIN| MandateFiles[Mandate Files] + CheckPriv -->|USER| OwnFiles[Own Files] + + AllFiles --> UAM[uam Method] + MandateFiles --> UAM + OwnFiles --> UAM + + UAM --> FilterMandate[Filter by mandateId] + UAM --> CheckOwn[Check _createdBy] + UAM --> AddFlags[Add access flags] + + FilterMandate --> Return[Return Filtered Data] + CheckOwn --> Return + AddFlags --> Return +``` + +**Key Features**: +- Binary file storage with metadata +- Automatic preview generation (text, images, etc.) +- MIME type detection +- Prompt template management with initialization +- Voice settings per user +- File hash calculation for deduplication + +### AI Interface (`interfaceAiObjects.py`) + +**Type**: External System Interface +**Connectors**: Dynamic discovery via `modelRegistry` +**Access Control**: None (system-level operations) + +**Purpose**: Provides centralized AI operations with dynamic model discovery, automatic model selection, and failover handling. + +**Why External Connector**: AI operations require communication with external APIs (OpenAI, Anthropic, Perplexity, Tavily) and internal AI services. The interface abstracts model selection, handles API calls, manages failover, and tracks costs. + +**Operations**: + +```mermaid +graph TB + subgraph "AI Call Operations" + Call[call - main entry point] + CallText[call with text/context] + CallParts[call with content parts] + end + + subgraph "Model Information" + ModelInfo[getModelInfo - by displayName] + ModelsByTag[getModelsByTag - filter by tag] + end + + subgraph "Internal Processing" + SelectModel[_selectModel - dynamic selection] + ProcessPart[_processContentPartWithFallback] + MergeResults[_mergePartResults] + CallWithModel[_callWithModel - execute API call] + end +``` + +**Complete Operations List**: + +**AI Calls**: +- `call(request: AiCallRequest, progressCallback=None) → AiCallResponse` (main entry point, handles text/context or content parts) +- `getModelInfo(displayName: str) → Dict[str, Any]` (get model metadata) +- `getModelsByTag(tag: str) → List[str]` (filter models by tag) + +**Internal Methods** (used by call): +- `_selectModel(prompt: str, context: str, options: AiCallOptions) → str` (selects best model) +- `_callWithTextContext(request: AiCallRequest) → AiCallResponse` (handles traditional text/context calls) +- `_callWithContentParts(request: AiCallRequest, progressCallback) → AiCallResponse` (handles content parts with chunking) +- `_processContentPartWithFallback(...) → AiCallResponse` (processes single part with failover) +- `_callWithModel(model, prompt, context, options) → AiCallResponse` (executes actual API call) + +**Model Selection and Failover Flow**: + +```mermaid +graph TB + Request[AI Call Request] --> CheckType{Request Type?} + + CheckType -->|Text/Context| TextPath[Text/Context Path] + CheckType -->|Content Parts| PartsPath[Content Parts Path] + + TextPath --> GetFailover[Get Failover Model List] + PartsPath --> GetFailover + + GetFailover --> SelectModel[Select Best Model] + SelectModel --> FilterOp[Filter by Operation Type] + FilterOp --> RateCap[Rate by Capabilities] + RateCap --> ApplyPriority[Apply Priority Rules] + ApplyPriority --> TryModel[Try Model Call] + + TryModel --> Success{Success?} + Success -->|Yes| Return[Return Response] + Success -->|No| NextModel{More Models?} + + NextModel -->|Yes| TryModel + NextModel -->|No| Error[Return Error Response] + + PartsPath --> ProcessEach[Process Each Part] + ProcessEach --> Chunk[Model-Aware Chunking] + Chunk --> TryModel + ProcessEach --> Merge[Merge Results] + Merge --> Return +``` + +**Supported External Systems**: +- **OpenAI**: GPT models via `aicorePluginOpenai` +- **Anthropic**: Claude models via `aicorePluginAnthropic` +- **Perplexity**: Search-enabled models via `aicorePluginPerplexity` +- **Tavily**: Web search API via `aicorePluginTavily` +- **Internal**: Custom models via `aicorePluginInternal` + +**Key Features**: +- Dynamic model discovery (auto-registers available connectors) +- Operation type-based model selection (e.g., IMAGE_ANALYSE, TEXT_GENERATION) +- Automatic failover (tries multiple models on failure) +- Model-aware chunking (respects model context limits) +- Content part processing (handles images, text, tables) +- Cost tracking (calculates USD cost per call) +- Progress callbacks for long-running operations + +### Ticket Interface (`interfaceTicketObjects.py`) + +**Type**: External System Interface +**Connectors**: `ConnectorTicketJira`, `ConnectorTicketClickup` +**Access Control**: Connector-level (API credentials) + +**Purpose**: Synchronizes data with external ticket systems (Jira, ClickUp) by transforming tickets to/from list format for Excel-like operations. + +**Why External Connector**: Ticket systems are external services with their own APIs. The interface provides bidirectional synchronization, field mapping, and data transformation between the application's list format and ticket system formats. + +**Operations**: + +```mermaid +graph TB + subgraph "Factory Method" + Factory[createTicketInterfaceByType - connectorType, params] + end + + subgraph "Export Operations" + Export[exportTicketsAsList - read from external] + end + + subgraph "Import Operations" + Import[importListToTickets - write to external] + end + + subgraph "Internal Transformation" + Transform[_transformTicketRecords - field mapping] + Extract[_extractFieldValue - path-based extraction] + FormatDate[_formatDateForExcel - date formatting] + FilterEmpty[_filterEmptyRecords - validation] + end +``` + +**Complete Operations List**: + +**Factory**: +- `createTicketInterfaceByType(taskSyncDefinition: dict, connectorType: str, connectorParams: dict) → TicketInterface` (creates interface with appropriate connector) + +**Export**: +- `exportTicketsAsList() → list[dict]` (reads tickets from external system, transforms to list format) + +**Import**: +- `importListToTickets(records: list[dict]) → None` (transforms list format, writes to external system) + +**Internal Methods**: +- `_transformTicketRecords(tasks: list[dict], includePut: bool) → list[dict]` (transforms according to task_sync_definition) +- `_extractFieldValue(issue_data: dict, field_path: list[str], field_name: str) → Any` (extracts value using path) +- `_formatDateForExcel(date_value: Any) → Optional[str]` (formats dates for Excel compatibility) +- `_isDateField(field_name: str) → bool` (detects date fields) +- `_filterEmptyRecords(records: list[dict]) → list[dict]` (removes invalid records) + +**Synchronization Flow**: + +```mermaid +graph TB + subgraph "Export Flow" + ExportStart[exportTicketsAsList] --> ReadConn[Read from Connector] + ReadConn --> GetTasks[Get Tasks from API] + GetTasks --> Transform[Transform Fields] + Transform --> MapFields[Map via task_sync_definition] + MapFields --> FormatDates[Format Dates] + FormatDates --> Filter[Filter Empty Records] + Filter --> ReturnList[Return List Format] + end + + subgraph "Import Flow" + ImportStart[importListToTickets] --> ReceiveList[Receive List Format] + ReceiveList --> ExtractFields[Extract Fields] + ExtractFields --> MapToTicket[Map to Ticket Format] + MapToTicket --> BuildUpdate[Build Update Payload] + BuildUpdate --> WriteConn[Write via Connector] + WriteConn --> UpdateAPI[Update External API] + end + + subgraph "Field Mapping" + SyncDef[task_sync_definition] --> Direction[Direction: get/put] + SyncDef --> Path[Field Path: nested access] + Direction --> Transform + Path --> MapFields + Path --> MapToTicket + end +``` + +**Supported External Systems**: +- **Jira**: Via `ConnectorTicketJira` (uses Jira REST API) +- **ClickUp**: Via `ConnectorTicketClickup` (uses ClickUp API) + +**Key Features**: +- Dynamic connector selection (Jira or ClickUp) +- Field mapping configuration (task_sync_definition maps fields bidirectionally) +- Path-based field extraction (supports nested JSON structures) +- Date format handling (converts various formats to Excel-compatible) +- Bidirectional sync (export to list, import from list) +- Empty record filtering (validates records have IDs) + +### Voice Interface (`interfaceVoiceObjects.py`) + +**Type**: External System Interface +**Connector**: `ConnectorGoogleSpeech` +**Access Control**: User context for settings (stored in Component database) + +**Purpose**: Provides speech-to-text, text-to-speech, and translation services using Google Cloud APIs. + +**Why External Connector**: Voice operations require Google Cloud Speech-to-Text, Text-to-Speech, and Translation APIs. The interface abstracts API complexity, handles audio format conversion, and manages user voice settings (stored in database via Component interface). + +**Operations**: + +```mermaid +graph TB + subgraph "Speech-to-Text Operations" + STT[speechToText - audio to text] + STTTrans[speechToTranslatedText - audio to translated text] + end + + subgraph "Text-to-Speech Operations" + TTS[textToSpeech - text to audio] + TTSTrans[textToTranslatedSpeech - text to translated audio] + end + + subgraph "Translation Operations" + Trans[translateText - text translation] + end + + subgraph "Voice Settings" + GetSettings[getVoiceSettings - from Component DB] + CreateSettings[createVoiceSettings - to Component DB] + UpdateSettings[updateVoiceSettings - in Component DB] + GetOrCreate[getOrCreateVoiceSettings] + end + + subgraph "Metadata Operations" + GetLangs[getAvailableLanguages - from Google API] + GetVoices[getAvailableVoices - from Google API] + end +``` + +**Complete Operations List**: + +**Speech-to-Text**: +- `speechToText(audioContent: bytes, language: str, sampleRate: Optional[int], channels: Optional[int]) → Dict[str, Any]` (converts audio to text) +- `speechToTranslatedText(audioContent: bytes, fromLanguage: str, toLanguage: str, sampleRate: Optional[int], channels: Optional[int]) → Dict[str, Any]` (converts audio to translated text) + +**Text-to-Speech**: +- `textToSpeech(text: str, language: Optional[str], voice: Optional[str]) → Dict[str, Any]` (converts text to audio) +- `textToTranslatedSpeech(text: str, fromLanguage: str, toLanguage: str, voice: Optional[str]) → Dict[str, Any]` (converts text to translated audio) + +**Translation**: +- `translateText(text: str, sourceLanguage: str, targetLanguage: str) → Dict[str, Any]` (translates text) + +**Voice Settings** (delegates to Component interface): +- `getVoiceSettings(userId: str) → Optional[VoiceSettings]` +- `createVoiceSettings(settingsData: Dict) → Optional[VoiceSettings]` +- `updateVoiceSettings(userId: str, settingsData: Dict) → Optional[VoiceSettings]` +- `getOrCreateVoiceSettings(userId: str) → Optional[VoiceSettings]` + +**Metadata**: +- `getAvailableLanguages() → Dict[str, Any]` (lists supported languages from Google API) +- `getAvailableVoices(languageCode: Optional[str]) → Dict[str, Any]` (lists available voices, optionally filtered by language) + +**Operation Flow**: + +```mermaid +graph TB + Request[Voice Operation Request] --> Voice[VoiceObjects] + Voice --> CheckSettings{Need Settings?} + + CheckSettings -->|Yes| GetSettings[Get from Component DB] + CheckSettings -->|No| Connector[Get Connector] + + GetSettings --> Connector + Connector --> GoogleAPI[Google Cloud API] + + GoogleAPI --> STT{Operation Type?} + STT -->|Speech-to-Text| STTAPI[Speech-to-Text API] + STT -->|Text-to-Speech| TTSAPI[Text-to-Speech API] + STT -->|Translation| TransAPI[Translation API] + + STTAPI --> Process[Process Response] + TTSAPI --> Process + TransAPI --> Process + + Process --> Format[Format Response] + Format --> Return[Return Result] +``` + +**Supported External Systems**: +- **Google Cloud Speech-to-Text**: Audio transcription +- **Google Cloud Text-to-Speech**: Audio synthesis +- **Google Cloud Translation**: Text translation + +**Key Features**: +- Multi-language support (detects and supports many languages) +- Audio format handling (auto-detects sample rate, channels) +- Combined operations (speech-to-translated-text, text-to-translated-speech) +- User voice preferences (stored in Component database) +- Language and voice discovery (queries Google API for available options) +- Error handling with detailed error messages + +### Access Control Summary + +**Privilege Levels**: +1. **SYSADMIN**: Full system access, all mandates +2. **ADMIN**: Full access within mandate +3. **USER**: Access to own records only + +**Access Methods**: +- `uam()`: Filters recordsets by privilege, adds `_hideView`, `_hideEdit`, `_hideDelete` flags +- `canModify()`: Checks if user can create/update/delete records based on ownership and privilege + +**Singleton Pattern**: Interfaces use factory functions that cache instances per user context for efficient memory usage. + +**User Context**: All database interfaces require user context (User object with mandateId, userId, privilege) for access control and data filtering. + +--- \ No newline at end of file diff --git a/docs/code-documentation/features-component.md b/docs/code-documentation/features-component.md new file mode 100644 index 00000000..a84b138d --- /dev/null +++ b/docs/code-documentation/features-component.md @@ -0,0 +1,981 @@ +# Features Component Documentation + +Comprehensive documentation of the Features layer in the Gateway application, explaining the architecture, patterns, and implementation details of all feature modules and their relationship to connectors, services, and workflows. + +## Table of Contents + +1. [Overview](#overview) +2. [What is a Feature?](#what-is-a-feature) +3. [Features vs Services vs Workflows](#features-vs-services-vs-workflows) +4. [Feature Architecture](#feature-architecture) +5. [Feature Lifecycle Management](#feature-lifecycle-management) +6. [Connectors in the Architecture](#connectors-in-the-architecture) +7. [Individual Features](#individual-features) +8. [Feature Patterns and Best Practices](#feature-patterns-and-best-practices) + +--- + +## Overview + +The **Features Layer** is a domain-specific business logic layer that implements core functionality for specific use cases. Features serve as **temporary solutions** that bridge the gap between initial requirements and full service implementation or workflow integration. They provide rapid prototyping capabilities while maintaining clean architectural boundaries. + +```mermaid +graph TB + subgraph "Application Layers" + Routes[Routes Layer
API Endpoints] + Features[Features Layer
Domain-Specific Logic] + Services[Services Layer
Reusable Components] + Workflows[Workflows Layer
Orchestration Engine] + Interfaces[Interfaces Layer
Data Access] + Connectors[Connectors Layer
External Systems] + end + + Routes --> Features + Routes --> Services + Routes --> Workflows + Features --> Services + Features --> Interfaces + Workflows --> Services + Services --> Interfaces + Interfaces --> Connectors + + style Features fill:#e8f5e9,stroke:#1b5e20,stroke-width:3px + style Connectors fill:#fff3e0,stroke:#e65100,stroke-width:2px +``` + +### Key Characteristics + +- **Domain-Specific**: Each feature addresses a specific business domain or use case +- **Temporary by Design**: Features are intended to be migrated to services or workflows over time +- **Stateless**: Features operate without maintaining session state +- **Service-Dependent**: Features leverage services for cross-cutting functionality +- **Interface-Dependent**: Features use interfaces to access data through connectors +- **Lifecycle-Managed**: Background features are managed through the Features Lifecycle system + +--- + +## What is a Feature? + +A **Feature** is a domain-specific business logic module that implements functionality for a particular use case. Features are designed to: + +1. **Rapid Prototyping**: Enable quick implementation of new functionality without full service architecture +2. **Domain Encapsulation**: Group related business logic for a specific domain (e.g., Real Estate, Chat, Data Synchronization) +3. **Temporary Solutions**: Serve as interim implementations before migration to services or workflows +4. **Orchestration**: Coordinate between services, interfaces, and external systems to fulfill business requirements +5. **Background Processing**: Support scheduled tasks, event-driven operations, and background managers + +### Feature Lifecycle Philosophy + +Features follow a natural evolution path: + +```mermaid +graph LR + A[Initial Requirement] --> B[Feature Implementation] + B --> C{Stability & Usage} + C -->|Mature| D[Service Migration] + C -->|Complex Workflow| E[Workflow Integration] + C -->|Still Experimental| B + + D --> F[Production Service] + E --> G[Workflow Component] + + style B fill:#fff3e0,stroke:#e65100 + style D fill:#e8f5e9,stroke:#1b5e20 + style E fill:#e1f5ff,stroke:#01579b +``` + +**When to Use Features:** +- New functionality that needs rapid development +- Domain-specific logic that may not be reusable +- Experimental or proof-of-concept implementations +- Background tasks requiring scheduled execution +- Integrations that are still being refined + +**When to Migrate to Services:** +- Functionality becomes reusable across multiple domains +- The feature is stable and well-tested +- Multiple features or routes need the same functionality +- The logic should be part of the core service layer + +**When to Migrate to Workflows:** +- The feature involves complex multi-step user interactions +- Task planning and adaptive learning are required +- The feature needs workflow orchestration capabilities +- User interactions require state management and progress tracking + +--- + +## Features vs Services vs Workflows + +Understanding the distinction between Features, Services, and Workflows is crucial for architectural decisions. + +```mermaid +graph TB + subgraph "Comparison Matrix" + A[Route Request] --> B{What Type?} + B -->|Domain-Specific
Single Use Case| C[Feature] + B -->|Reusable
Cross-Cutting| D[Service] + B -->|Complex Multi-Step
User Interaction| E[Workflow] + + C --> F[Uses Services] + C --> G[Uses Interfaces] + D --> H[Uses Other Services] + D --> G + E --> D + E --> I[Uses Methods] + + G --> J[Uses Connectors] + end + + style C fill:#fff3e0,stroke:#e65100 + style D fill:#e8f5e9,stroke:#1b5e20 + style E fill:#e1f5ff,stroke:#01579b + style J fill:#fce4ec,stroke:#880e4f +``` + +| Aspect | Feature | Service | Workflow | +|--------|---------|---------|----------| +| **Purpose** | Domain-specific business logic | Cross-cutting, reusable functionality | Complex multi-step orchestration | +| **Scope** | Single use case or domain | Multiple use cases | User interaction flows | +| **Reusability** | Low (domain-specific) | High (cross-domain) | Medium (workflow patterns) | +| **State Management** | Stateless | Stateless | Stateful (workflow state) | +| **Dependencies** | Uses services and interfaces | Uses other services and interfaces | Uses services and methods | +| **Lifecycle** | Temporary, may migrate | Permanent core component | Permanent orchestration engine | +| **Examples** | Real Estate queries, Chat Althaus scheduler | AI processing, Document extraction | Chat workflows, Task planning | + +### Decision Flow + +```mermaid +flowchart TD + Start[New Functionality Required] --> Q1{Is it reusable
across domains?} + Q1 -->|Yes| Service[Implement as Service] + Q1 -->|No| Q2{Does it require
complex multi-step
user interaction?} + Q2 -->|Yes| Workflow[Implement as Workflow] + Q2 -->|No| Q3{Is it domain-specific
or experimental?} + Q3 -->|Yes| Feature[Implement as Feature] + Q3 -->|No| Service + + Feature --> Q4{Feature Matures} + Q4 -->|Stable & Reusable| Service + Q4 -->|Complex Interactions| Workflow + Q4 -->|Still Experimental| Feature + + style Feature fill:#fff3e0,stroke:#e65100 + style Service fill:#e8f5e9,stroke:#1b5e20 + style Workflow fill:#e1f5ff,stroke:#01579b +``` + +--- + +## Feature Architecture + +### High-Level Architecture + +```mermaid +graph TB + subgraph "Entry Point" + App[app.py
FastAPI Application] + Lifecycle[Features Lifecycle
featuresLifecycle.py] + end + + subgraph "API Layer" + Routes[Routes
routeRealEstate.py
routeChatPlayground.py
routeDataNeutralization.py] + end + + subgraph "Feature Layer" + RE[Real Estate Feature
mainRealEstate.py] + CA[Chat Althaus Feature
mainChatAlthaus.py] + SD[Sync Delta Feature
mainSyncDelta.py] + CP[Chat Playground Feature
mainChatPlayground.py] + NP[Neutralize Playground Feature
mainNeutralizePlayground.py] + end + + subgraph "Service Layer" + Services[Services Container
AI, Chat, SharePoint, etc.] + end + + subgraph "Interface Layer" + Interfaces[Interfaces
Database, Ticket, etc.] + end + + subgraph "Connector Layer" + DBConn[Database Connector
connectorDbPostgre.py] + TicketConn[Ticket Connectors
connectorTicketsJira.py
connectorTicketsClickup.py] + VoiceConn[Voice Connector
connectorVoiceGoogle.py] + JsonConn[JSON Connector
connectorDbJson.py] + end + + subgraph "External Systems" + DB[(PostgreSQL Database)] + Jira[Jira API] + ClickUp[ClickUp API] + SharePoint[SharePoint API] + GoogleVoice[Google Voice API] + end + + App --> Lifecycle + App --> Routes + Lifecycle --> CA + Lifecycle --> SD + Routes --> RE + Routes --> CP + Routes --> NP + + RE --> Services + CA --> Services + SD --> Services + CP --> Services + NP --> Services + + Services --> Interfaces + Interfaces --> DBConn + Interfaces --> TicketConn + Interfaces --> VoiceConn + Interfaces --> JsonConn + + DBConn --> DB + TicketConn --> Jira + TicketConn --> ClickUp + VoiceConn --> GoogleVoice + Services --> SharePoint + + style Features fill:#fff3e0,stroke:#e65100 + style Connectors fill:#fce4ec,stroke:#880e4f +``` + +### Feature Request Flow + +```mermaid +sequenceDiagram + participant Client + participant Route as Route
routeRealEstate.py + participant Feature as Feature
mainRealEstate.py + participant Service as Service
Services Container + participant Interface as Interface
interfaceDbRealEstateObjects.py + participant Connector as Connector
connectorDbPostgre.py + participant DB as Database
PostgreSQL + + Client->>Route: HTTP Request + Route->>Route: Validate Request Data + Route->>Feature: Call Feature Function + Feature->>Service: Use Service (e.g., AI Service) + Service-->>Feature: Service Result + Feature->>Interface: Request Data Access + Interface->>Connector: Execute Query + Connector->>DB: SQL Query + DB-->>Connector: Raw Data + Connector-->>Interface: Raw Data + Interface->>Interface: Transform to Domain Objects + Interface-->>Feature: Domain Objects + Feature->>Feature: Process Business Logic + Feature-->>Route: Processed Result + Route->>Route: Serialize Response + Route-->>Client: HTTP Response +``` + +--- + +## Feature Lifecycle Management + +Features that require background processing, scheduled tasks, or event-driven operations are managed through the **Features Lifecycle** system. + +### Lifecycle Architecture + +```mermaid +graph TB + subgraph "Application Startup" + App[app.py
FastAPI Application] + Lifespan[Lifespan Context Manager] + end + + subgraph "Features Lifecycle" + Lifecycle[featuresLifecycle.py] + Start[start function] + Stop[stop function] + end + + subgraph "Background Features" + SyncDelta[SyncDelta Manager
startSyncManager] + ChatAlthaus[ChatAlthaus Manager
startDataScheduler] + AutomationEvents[Automation Events
syncAutomationEvents] + end + + App --> Lifespan + Lifespan -->|On Startup| Lifecycle + Lifecycle --> Start + Start --> SyncDelta + Start --> ChatAlthaus + Start --> AutomationEvents + + Lifespan -->|On Shutdown| Lifecycle + Lifecycle --> Stop + Stop --> SyncDelta + Stop --> ChatAlthaus + + style Lifecycle fill:#e8f5e9,stroke:#1b5e20 + style SyncDelta fill:#fff3e0,stroke:#e65100 + style ChatAlthaus fill:#fff3e0,stroke:#e65100 +``` + +### Lifecycle Sequence + +```mermaid +sequenceDiagram + participant App as app.py + participant Lifespan as Lifespan Manager + participant Lifecycle as featuresLifecycle + participant EventUser as Event User + participant SyncDelta as SyncDelta Manager + participant ChatAlthaus as ChatAlthaus Manager + + App->>Lifespan: Application Startup + Lifespan->>Lifecycle: start() + Lifecycle->>EventUser: getRootInterface().getUserByUsername("event") + EventUser-->>Lifecycle: Event User Object + + Lifecycle->>ChatAlthaus: syncAutomationEvents() + ChatAlthaus-->>Lifecycle: Events Synced + + Lifecycle->>SyncDelta: startSyncManager(eventUser) + SyncDelta->>SyncDelta: Initialize Background Thread + SyncDelta-->>Lifecycle: Manager Started + + Lifecycle->>ChatAlthaus: startDataScheduler(eventUser) + ChatAlthaus->>ChatAlthaus: Initialize Scheduler + ChatAlthaus-->>Lifecycle: Scheduler Started + + Lifecycle->>ChatAlthaus: performDataUpdate(eventUser) + ChatAlthaus-->>Lifecycle: Initial Update Complete + + Lifecycle-->>Lifespan: Startup Complete + Lifespan-->>App: Application Ready + + Note over App: Application Running... + + App->>Lifespan: Application Shutdown + Lifespan->>Lifecycle: stop() + Lifecycle->>SyncDelta: Stop Manager + Lifecycle->>ChatAlthaus: Stop Scheduler + Lifecycle-->>Lifespan: Shutdown Complete +``` + +### Lifecycle-Managed Features + +Features managed through the lifecycle system include: + +1. **SyncDelta**: Background synchronization manager for ticket synchronization +2. **ChatAlthaus**: Scheduled data updates for Althaus preprocessing service +3. **Automation Events**: Event synchronization for chat automation + +These features run continuously in the background and require proper initialization and cleanup during application startup and shutdown. + +--- + +## Connectors in the Architecture + +Connectors are the lowest-level abstraction for communicating with external systems. They provide concrete implementations for database connections, API integrations, and external service communication. + +### Connector Architecture + +```mermaid +graph TB + subgraph "Connector Types" + DBConn[Database Connectors
connectorDbPostgre.py
connectorDbJson.py] + TicketConn[Ticket Connectors
connectorTicketsJira.py
connectorTicketsClickup.py] + VoiceConn[Voice Connectors
connectorVoiceGoogle.py] + end + + subgraph "Connector Responsibilities" + Connection[Connection Management
Establish & Maintain Connections] + Query[Query Execution
Execute Queries & API Calls] + Transform[Data Transformation
Raw Data ↔ Application Format] + Error[Error Handling
Connection Errors & Retries] + end + + subgraph "External Systems" + PostgreSQL[(PostgreSQL Database)] + JSONFile[JSON Files] + JiraAPI[Jira API] + ClickUpAPI[ClickUp API] + GoogleVoiceAPI[Google Voice API] + end + + DBConn --> Connection + TicketConn --> Connection + VoiceConn --> Connection + + Connection --> Query + Query --> Transform + Transform --> Error + + DBConn --> PostgreSQL + DBConn --> JSONFile + TicketConn --> JiraAPI + TicketConn --> ClickUpAPI + VoiceConn --> GoogleVoiceAPI + + style DBConn fill:#fce4ec,stroke:#880e4f + style TicketConn fill:#fce4ec,stroke:#880e4f + style VoiceConn fill:#fce4ec,stroke:#880e4f +``` + +### Connector Usage Flow + +```mermaid +sequenceDiagram + participant Feature as Feature + participant Service as Service + participant Interface as Interface + participant Connector as Connector + participant External as External System + + Feature->>Service: Use Service + Service->>Interface: Request Data Access + Interface->>Connector: Initialize Connection + Connector->>External: Establish Connection + External-->>Connector: Connection Established + + Interface->>Connector: Execute Query/API Call + Connector->>Connector: Format Request + Connector->>External: Send Request + External-->>Connector: Raw Response + Connector->>Connector: Parse Response + Connector-->>Interface: Formatted Data + Interface->>Interface: Transform to Domain Objects + Interface-->>Service: Domain Objects + Service-->>Feature: Processed Result +``` + +### Available Connectors + +#### Database Connectors + +**connectorDbPostgre.py** - PostgreSQL Database Connector +- Manages PostgreSQL database connections +- Executes SQL queries with parameterization +- Handles JSONB column types +- Provides transaction support +- Used by: Real Estate interfaces, Chat interfaces, Application interfaces + +**connectorDbJson.py** - JSON File Database Connector +- Provides file-based data storage using JSON +- Useful for development and testing +- Lightweight alternative to PostgreSQL +- Used by: Development environments, Testing scenarios + +#### Ticket Connectors + +**connectorTicketsJira.py** - Jira Ticket Connector +- Integrates with Jira REST API +- Manages Jira tickets, issues, and projects +- Handles field mapping and synchronization +- Used by: SyncDelta feature, Ticket interfaces + +**connectorTicketsClickup.py** - ClickUp Ticket Connector +- Integrates with ClickUp API +- Manages ClickUp tasks and lists +- Handles task synchronization +- Used by: Ticket interfaces + +#### Voice Connectors + +**connectorVoiceGoogle.py** - Google Voice Connector +- Integrates with Google Voice API +- Handles voice transcription and processing +- Manages voice data and audio files +- Used by: Voice-related services and features + +### Connector Integration Pattern + +Connectors are never directly accessed by features. Instead, they follow this integration pattern: + +```mermaid +graph LR + A[Feature] --> B[Service] + B --> C[Interface] + C --> D[Connector] + D --> E[External System] + + style A fill:#fff3e0,stroke:#e65100 + style B fill:#e8f5e9,stroke:#1b5e20 + style C fill:#e1f5ff,stroke:#01579b + style D fill:#fce4ec,stroke:#880e4f + style E fill:#f5f5f5,stroke:#424242 +``` + +**Why This Pattern?** +- **Abstraction**: Interfaces hide connector implementation details +- **Flexibility**: Connectors can be swapped without affecting features +- **Testability**: Interfaces can be mocked for testing +- **Consistency**: All data access follows the same pattern +- **User Context**: Interfaces handle user context and access control + +--- + +## Individual Features + +### Real Estate Feature + +**Location**: `modules/features/realEstate/mainRealEstate.py` + +**Purpose**: Provides AI-powered natural language processing for Real Estate database operations. Enables users to interact with Real Estate data using natural language commands that are translated into CRUD operations. + +**Architecture**: + +```mermaid +graph TB + subgraph "Real Estate Feature" + Route[routeRealEstate.py] + Feature[mainRealEstate.py] + Intent[Intent Analysis
analyzeUserIntent] + CRUD[CRUD Operations
executeIntentBasedOperation] + Query[Direct Queries
executeDirectQuery] + end + + subgraph "Dependencies" + AIService[AI Service
Intent Recognition] + REInterface[Real Estate Interface
interfaceDbRealEstateObjects.py] + DBConnector[Database Connector
connectorDbPostgre.py] + end + + subgraph "Data Models" + REModels[Real Estate Models
Projekt, Parzelle, etc.] + end + + Route --> Feature + Feature --> Intent + Feature --> CRUD + Feature --> Query + + Intent --> AIService + CRUD --> REInterface + Query --> REInterface + + REInterface --> DBConnector + REInterface --> REModels + + style Feature fill:#fff3e0,stroke:#e65100 +``` + +**Key Functions**: +- `processNaturalLanguageCommand()`: Main entry point for natural language processing +- `analyzeUserIntent()`: Uses AI to analyze user input and extract intent, entity, and parameters +- `executeIntentBasedOperation()`: Executes CRUD operations based on analyzed intent +- `executeDirectQuery()`: Executes direct SQL queries without AI processing + +**Connector Usage**: +- Uses **Database Connector** (`connectorDbPostgre.py`) through the Real Estate Interface +- Accesses PostgreSQL database for Real Estate data +- Handles CRUD operations on entities like Projekt, Parzelle, Dokument + +**Service Integration**: +- Uses **AI Service** for intent recognition and natural language understanding +- Leverages AI planning capabilities to analyze user commands + +**Migration Path**: +- May evolve into a **Service** if Real Estate operations become reusable across domains +- Could integrate with **Workflows** for complex multi-step Real Estate processes + +--- + +### Chat Althaus Feature + +**Location**: `modules/features/chatAlthaus/mainChatAlthaus.py` + +**Purpose**: Manages scheduled data updates for the Althaus preprocessing service. Triggers daily updates to synchronize database configuration with external preprocessing service. + +**Architecture**: + +```mermaid +graph TB + subgraph "Chat Althaus Feature" + Lifecycle[featuresLifecycle.py] + Manager[ManagerChatAlthaus] + Scheduler[Data Scheduler] + Updater[Data Updater
updateDatabaseWithConfig] + end + + subgraph "Dependencies" + Services[Services Container] + HTTPClient[HTTP Client
aiohttp] + Config[Configuration
APP_CONFIG] + end + + subgraph "External System" + AlthausAPI[Althaus Preprocessing API
Azure Function] + end + + Lifecycle --> Manager + Manager --> Scheduler + Manager --> Updater + + Updater --> Services + Updater --> HTTPClient + Updater --> Config + + Updater --> AlthausAPI + + style Manager fill:#fff3e0,stroke:#e65100 +``` + +**Key Functions**: +- `startDataScheduler()`: Initializes and starts the scheduled data update manager +- `performDataUpdate()`: Executes immediate data update +- `updateDatabaseWithConfig()`: Sends configuration to Althaus preprocessing service + +**Scheduling**: +- Runs daily at 01:00 UTC +- Uses background scheduler for automated execution +- Managed through Features Lifecycle system + +**Connector Usage**: +- Uses **HTTP Client** (aiohttp) for API communication +- No database connector (uses external API) + +**Service Integration**: +- Uses **Services Container** for configuration access +- Leverages shared configuration utilities + +**Migration Path**: +- Could become a **Service** if data synchronization becomes a core capability +- May integrate with **Workflows** for complex data processing pipelines + +--- + +### Sync Delta Feature + +**Location**: `modules/features/syncDelta/mainSyncDelta.py` + +**Purpose**: Synchronizes tickets between Jira and SharePoint. Manages bidirectional synchronization of ticket data, supporting both CSV and Excel file formats. + +**Architecture**: + +```mermaid +graph TB + subgraph "Sync Delta Feature" + Lifecycle[featuresLifecycle.py] + Manager[ManagerSyncDelta] + Sync[syncTicketsOverSharepoint] + Merge[Data Merging Logic] + Audit[Audit Logging] + end + + subgraph "Dependencies" + TicketService[Ticket Service] + SharePointService[SharePoint Service] + JiraConnector[Jira Connector
connectorTicketsJira.py] + end + + subgraph "External Systems" + Jira[Jira API] + SharePoint[SharePoint API] + end + + Lifecycle --> Manager + Manager --> Sync + Sync --> Merge + Sync --> Audit + + Sync --> TicketService + Sync --> SharePointService + + TicketService --> JiraConnector + JiraConnector --> Jira + SharePointService --> SharePoint + + style Manager fill:#fff3e0,stroke:#e65100 +``` + +**Key Functions**: +- `startSyncManager()`: Initializes background synchronization manager +- `syncTicketsOverSharepoint()`: Performs synchronization between Jira and SharePoint +- `initializeInterface()`: Sets up connectors and validates connections +- `_logAuditEvent()`: Logs synchronization events for auditing + +**Synchronization Modes**: +- **CSV Mode**: Uses CSV files for data exchange +- **Excel Mode**: Uses Excel (.xlsx) files for data exchange + +**Connector Usage**: +- Uses **Jira Connector** (`connectorTicketsJira.py`) through Ticket Service +- Uses **SharePoint Service** for file operations +- Manages field mapping between Jira and SharePoint formats + +**Service Integration**: +- Uses **Ticket Service** for ticket interface creation +- Uses **SharePoint Service** for file upload/download +- Leverages **Services Container** for configuration and utilities + +**Migration Path**: +- Likely candidate for **Service** migration as ticket synchronization becomes core functionality +- Could integrate with **Workflows** for complex synchronization scenarios + +--- + +### Chat Playground Feature + +**Location**: `modules/features/chatPlayground/mainChatPlayground.py` + +**Purpose**: Provides entry point for chat workflow functionality. Acts as a thin wrapper around the WorkflowManager for chat-based interactions. + +**Architecture**: + +```mermaid +graph TB + subgraph "Chat Playground Feature" + Route[routeChatPlayground.py] + Feature[mainChatPlayground.py] + Start[chatStart] + Stop[chatStop] + end + + subgraph "Workflow System" + WorkflowManager[WorkflowManager] + WorkflowProcessor[WorkflowProcessor] + Methods[Workflow Methods] + end + + subgraph "Dependencies" + Services[Services Container] + end + + Route --> Feature + Feature --> Start + Feature --> Stop + + Start --> WorkflowManager + Stop --> WorkflowManager + + WorkflowManager --> WorkflowProcessor + WorkflowProcessor --> Methods + + WorkflowManager --> Services + + style Feature fill:#fff3e0,stroke:#e65100 + style WorkflowManager fill:#e1f5ff,stroke:#01579b +``` + +**Key Functions**: +- `chatStart()`: Starts a new chat workflow or continues an existing one +- `chatStop()`: Stops a running chat workflow + +**Relationship to Workflows**: +- This feature is a **bridge** between routes and the workflow system +- Delegates all processing to WorkflowManager +- Demonstrates how features can integrate with workflows + +**Connector Usage**: +- No direct connector usage (delegates to workflows) +- Workflows use connectors through services and methods + +**Service Integration**: +- Uses **Services Container** for workflow management +- Leverages workflow services for chat operations + +**Migration Path**: +- Already integrated with **Workflows** system +- May be simplified or removed as workflows become the primary interface + +--- + +### Neutralize Playground Feature + +**Location**: `modules/features/neutralizePlayground/mainNeutralizePlayground.py` + +**Purpose**: Provides a playground interface for data neutralization functionality. Wraps the Neutralization Service for testing and experimentation. + +**Architecture**: + +```mermaid +graph TB + subgraph "Neutralize Playground Feature" + Route[routeDataNeutralization.py] + Feature[NeutralizationPlayground] + ProcessText[processText] + ProcessFiles[processFiles] + CleanAttributes[cleanAttributes] + Stats[getStats] + Config[getConfig/saveConfig] + end + + subgraph "Dependencies" + NeutralizationService[Neutralization Service] + end + + Route --> Feature + Feature --> ProcessText + Feature --> ProcessFiles + Feature --> CleanAttributes + Feature --> Stats + Feature --> Config + + ProcessText --> NeutralizationService + ProcessFiles --> NeutralizationService + CleanAttributes --> NeutralizationService + Stats --> NeutralizationService + Config --> NeutralizationService + + style Feature fill:#fff3e0,stroke:#e65100 + style NeutralizationService fill:#e8f5e9,stroke:#1b5e20 +``` + +**Key Functions**: +- `processText()`: Processes text for data neutralization +- `processFiles()`: Processes files for data neutralization +- `cleanAttributes()`: Cleans neutralization attributes +- `getStats()`: Retrieves neutralization statistics +- `getConfig()` / `saveConfig()`: Manages neutralization configuration + +**Purpose as Playground**: +- Provides testing interface for neutralization functionality +- Allows experimentation with neutralization patterns +- Demonstrates service usage patterns + +**Connector Usage**: +- No direct connector usage (uses Neutralization Service) +- Service handles all data access internally + +**Service Integration**: +- Wraps **Neutralization Service** for easy access +- Provides playground-specific functionality + +**Migration Path**: +- May be removed once neutralization is fully integrated +- Functionality may move directly to routes using the service + +--- + +## Feature Patterns and Best Practices + +### Pattern 1: Stateless Feature Design + +Features should be stateless and operate without session management. Each request should be independent and self-contained. + +```mermaid +graph LR + A[Request] --> B[Feature Function] + B --> C[Process Request] + C --> D[Return Result] + + style B fill:#fff3e0,stroke:#e65100 +``` + +**Benefits**: +- Simpler implementation +- Better scalability +- Easier testing +- No state management overhead + +### Pattern 2: Service Delegation + +Features should delegate cross-cutting functionality to services rather than implementing it directly. + +```mermaid +graph TB + A[Feature] --> B{Needs Functionality} + B -->|AI Processing| C[AI Service] + B -->|Data Access| D[Interface] + B -->|File Operations| E[File Service] + B -->|Other| F[Other Services] + + style A fill:#fff3e0,stroke:#e65100 + style C fill:#e8f5e9,stroke:#1b5e20 + style D fill:#e1f5ff,stroke:#01579b +``` + +**Benefits**: +- Code reuse +- Consistent behavior +- Easier maintenance +- Better separation of concerns + +### Pattern 3: Interface Abstraction + +Features should never directly access connectors. All data access should go through interfaces. + +```mermaid +graph LR + A[Feature] --> B[Interface] + B --> C[Connector] + C --> D[External System] + + style A fill:#fff3e0,stroke:#e65100 + style B fill:#e1f5ff,stroke:#01579b + style C fill:#fce4ec,stroke:#880e4f +``` + +**Benefits**: +- Abstraction of implementation details +- Flexibility to change connectors +- Consistent data access patterns +- User context handling + +### Pattern 4: Background Processing + +Features requiring background processing should use the Features Lifecycle system. + +```mermaid +sequenceDiagram + participant App as Application + participant Lifecycle as Features Lifecycle + participant Feature as Background Feature + participant Scheduler as Scheduler + + App->>Lifecycle: Startup + Lifecycle->>Feature: Initialize Manager + Feature->>Scheduler: Start Background Task + Scheduler-->>Feature: Task Running + + Note over Feature,Scheduler: Background Processing... + + App->>Lifecycle: Shutdown + Lifecycle->>Feature: Stop Manager + Feature->>Scheduler: Stop Background Task + Scheduler-->>Feature: Task Stopped +``` + +**Benefits**: +- Proper lifecycle management +- Clean startup/shutdown +- Resource management +- Error handling + +### Pattern 5: Migration Planning + +Features should be designed with migration in mind. Consider future migration to services or workflows during design. + +```mermaid +graph LR + A[Feature Design] --> B{Consider Migration} + B -->|Reusable Logic| C[Design as Service] + B -->|Complex Flow| D[Design for Workflow] + B -->|Temporary| E[Keep as Feature] + + style A fill:#fff3e0,stroke:#e65100 + style C fill:#e8f5e9,stroke:#1b5e20 + style D fill:#e1f5ff,stroke:#01579b +``` + +**Best Practices**: +- Document migration path +- Keep dependencies minimal +- Use standard patterns +- Plan for refactoring + +--- + +## Summary + +The Features component provides a flexible, domain-specific business logic layer that enables rapid development while maintaining architectural boundaries. Features serve as temporary solutions that bridge the gap between initial requirements and full service or workflow implementation. + +**Key Takeaways**: + +1. **Features are Temporary**: Designed to be migrated to services or workflows as they mature +2. **Domain-Specific**: Each feature addresses a specific business domain or use case +3. **Service-Dependent**: Features leverage services for cross-cutting functionality +4. **Interface-Abstracted**: Features access data through interfaces, never directly through connectors +5. **Lifecycle-Managed**: Background features are managed through the Features Lifecycle system +6. **Connector Integration**: Connectors are accessed through interfaces, providing abstraction and flexibility + +The architecture supports a natural evolution path from features to services or workflows, ensuring that the codebase remains maintainable and scalable as functionality matures. + diff --git a/docs/code-documentation/gateway-development-framework.md b/docs/code-documentation/gateway-development-framework.md new file mode 100644 index 00000000..9ce651a0 --- /dev/null +++ b/docs/code-documentation/gateway-development-framework.md @@ -0,0 +1,2281 @@ +# Gateway Development Framework: Connectors → Interfaces → Services → Workflows + +This document explains the gateway's code logic and development framework to build market customer journey features. It focuses on how connectors, interfaces, services, and workflows compose a standardized services landscape that can be consumed by routes, features, and agent models to perform tasks and actions. + +--- + +## Purpose + +- **Unify external tools**: Combine many third‑party APIs and utilities behind a consistent interface. + +- **Standardize service design**: Model capabilities as reusable services with clear contracts. + +- **Enable workflow automation**: Let agent models orchestrate multi‑step tasks using the centralized services. + +- **Abstract complexity**: Hide implementation details behind clean, well-defined APIs. + +- **Enforce security and governance**: Apply consistent access control, audit trails, and data isolation across all layers. + +--- + +## High‑Level Architecture + +The Gateway follows a layered architecture pattern with clear separation of concerns: + +1. **Connectors**: Vendor-specific adapters for external systems (databases, APIs, cloud services) handling auth, transport, retries, and basic mapping. + +2. **Interfaces**: Normalization layer exposing common contracts independent of any single vendor. Provides CRUD operations, access control, and data transformation. + +3. **Services**: Business‑level capabilities built on interfaces, composed into feature‑ready functions. Orchestrate multiple interfaces and apply business rules. + +4. **Service Center**: Central registry/factory (`Services` class) that instantiates and exposes services with consistent configuration, user context, and lifecycle management. + +5. **Workflows & Methods**: Orchestration engine that calls services to perform tasks/actions. Methods provide extensible, plugin-like actions that workflows can invoke. + +**Data/control flow**: Client or Workflow → Service Center → Service → Interface → Connector → External Tool/Database + +--- + +## Directory Overview (gateway) + +``` +gateway/ +├── modules/ +│ ├── connectors/ # Vendor-specific adapters +│ │ ├── connectorDbPostgre.py # PostgreSQL database +│ │ ├── connectorDbJson.py # JSON file-based database +│ │ ├── connectorVoiceGoogle.py # Google Cloud Speech services +│ │ ├── connectorTicketsJira.py # JIRA integration +│ │ └── connectorTicketsClickup.py # ClickUp integration +│ │ +│ ├── datamodels/ # Pydantic models defining data structures +│ │ ├── datamodelRealEstate.py +│ │ ├── datamodelChat.py +│ │ ├── datamodelAi.py +│ │ ├── datamodelUam.py # User & Mandate models +│ │ └── ... +│ │ +│ ├── interfaces/ # Data access layer +│ │ ├── interfaceDbRealEstateObjects.py # CRUD operations +│ │ ├── interfaceDbRealEstateAccess.py # Access control +│ │ ├── interfaceDbChatObjects.py +│ │ ├── interfaceDbChatAccess.py +│ │ ├── interfaceDbAppObjects.py +│ │ ├── interfaceDbComponentObjects.py +│ │ ├── interfaceAiObjects.py # AI operations +│ │ ├── interfaceTicketObjects.py # Ticket systems +│ │ └── interfaceVoiceObjects.py # Voice operations +│ │ +│ ├── services/ # Business-level capabilities +│ │ ├── __init__.py # Services container (Service Center) +│ │ ├── serviceAi/ # AI operations +│ │ ├── serviceChat/ # Workflow & document management +│ │ ├── serviceExtraction/ # Content extraction +│ │ ├── serviceGeneration/ # Document generation +│ │ ├── serviceNeutralization/ # Data anonymization +│ │ ├── serviceSharepoint/ # SharePoint integration +│ │ ├── serviceTicket/ # Ticket system integration +│ │ └── serviceUtils/ # Common utilities +│ │ +│ ├── workflows/ # Orchestration engine +│ │ ├── workflowManager.py # Main orchestration controller +│ │ ├── processing/ # Processing logic +│ │ │ ├── workflowProcessor.py +│ │ │ ├── core/ # Core components +│ │ │ ├── modes/ # Execution modes +│ │ │ └── shared/ # Shared utilities +│ │ └── methods/ # Extensible action methods +│ │ ├── methodBase.py +│ │ ├── methodAi.py +│ │ └── ... +│ │ +│ ├── routes/ # HTTP endpoints exposing capabilities +│ │ ├── routeChatPlayground.py +│ │ ├── routeWorkflows.py +│ │ └── ... +│ │ +│ ├── features/ # Domain-specific business logic +│ │ └── mainChatPlayground.py +│ │ +│ ├── security/ # Authentication, authorization, token management +│ │ ├── auth.py +│ │ ├── jwtService.py +│ │ ├── tokenManager.py +│ │ └── ... +│ │ +│ └── shared/ # Cross-cutting utilities +│ ├── config.py +│ ├── logging.py +│ └── ... +``` + +--- + +## 1) Connectors: Many External Tools, One Adapter Shape + +**Role**: Provide the lowest-level integration with external systems (databases, APIs, SDKs, auth, retries). + +**Responsibility**: + +- **Authentication and credential handling**: Manage API keys, OAuth tokens, database credentials +- **Transport**: HTTP/WebSocket clients, connection pooling, retry logic, circuit breaking +- **Response normalization**: Map vendor-specific responses to minimal internal shapes +- **Error handling**: Transform external errors into consistent internal error structures + +**Output**: Vendor‑flavored data mapped to connector models, not directly used by workflows or services. + +**Key Guidelines**: + +- Keep connectors vendor‑specific and replaceable (e.g., `connectorDbPostgre.py` vs `connectorDbJson.py`) +- No business logic; only integration concerns and basic mapping +- Use duck typing (no formal interfaces) for flexibility +- Handle retries, timeouts, and connection management internally +- Return structured error responses, never raise exceptions to application layer + +**Example Connector Types**: + +- **Database Connectors**: PostgreSQL (`connectorDbPostgre.py`), JSON file-based (`connectorDbJson.py`) +- **Voice Connectors**: Google Cloud Speech (`connectorVoiceGoogle.py`) +- **Ticket Connectors**: JIRA (`connectorTicketsJira.py`), ClickUp (`connectorTicketsClickup.py`) + +--- + +## 2) Interfaces: Stable Contracts Over Connectors + +**Role**: Define capability‑oriented contracts (e.g., `ChatObjects`, `AppObjects`, `AiObjects`) and map connector outputs into interface DTOs. + +**Responsibility**: + +- **Normalize differing vendors**: Convert vendor-specific data into consistent domain objects +- **Hide vendor peculiarities**: Abstract away implementation details behind clean, typed DTOs +- **Provide CRUD operations**: Create, Read, Update, Delete methods for domain entities +- **Enforce access control**: Apply user privilege checks and mandate-based filtering +- **Offer capability toggles**: Sensible defaults and configuration options + +**Output**: Clean, stable methods used by services (e.g., `getWorkflow()`, `createMessage()`, `call()`). + +**Interface Structure**: + +Interfaces are split into two file types: + +- **Objects Files** (`interface*Objects.py`): CRUD operations and business logic +- **Access Files** (`interface*Access.py`): Permission checking and data filtering + +**Key Guidelines**: + +- Prefer capability names over vendor names (e.g., `ChatObjects` not `PostgreChatObjects`) +- Keep interfaces small, cohesive, and testable with mocks +- Always require user context for database interfaces (enables access control) +- Use Pydantic models (datamodels) for type safety +- Apply Unified Access Management (UAM) for all database queries + +**Example Interface Types**: + +- **Database Interfaces**: `interfaceDbChatObjects`, `interfaceDbAppObjects`, `interfaceDbRealEstateObjects` +- **External System Interfaces**: `interfaceAiObjects`, `interfaceTicketObjects`, `interfaceVoiceObjects` + +--- + +## 3) Services: Business‑Level Capabilities + +**Role**: Compose one or more interfaces to implement feature‑ready operations (e.g., "answer question with web grounding", "extract and analyze documents"). + +**Responsibility**: + +- **Apply business rules**: Validation, guardrails, transformations, data enrichment +- **Orchestrate multiple interfaces**: Coordinate between interfaces and other services +- **Emit domain events/metrics**: Track operations, costs, performance +- **Enforce security policies**: Apply additional security checks beyond interface layer +- **Handle complex workflows**: Multi-step operations with error recovery + +**Output**: High‑level operations that workflows and routes can call atomically. + +**Service Container Pattern**: + +All services are initialized through the `Services` container. Initialize with user context using `Services(user=current_user, workflow=current_workflow)`, then access services via `services.ai.callAiDocuments()`, `services.chat.storeMessageWithDocuments()`, etc. + +**Key Guidelines**: + +- Services depend on interfaces, not connectors directly +- Keep input/output DTOs explicit and versioned when necessary +- Services can call other services via `self.services` +- Use `PublicService` wrapper to expose only public methods +- Keep services stateless (no session state, use database for persistence) + +**Core Services**: + +- **AI Service**: AI model operations, planning, document processing +- **Chat Service**: Workflow management, message handling, document resolution +- **Extraction Service**: Multi-format document extraction and processing +- **Generation Service**: Document rendering in various formats +- **Neutralization Service**: Data anonymization for GDPR compliance +- **SharePoint Service**: SharePoint integration +- **Ticket Service**: Ticket system integration (Jira, ClickUp) +- **Utils Service**: Common utilities (config, events, time, debug) + +--- + +## 4) Centralized Service Center + +**Role**: A registry/factory (`Services` class) that instantiates and exposes services with consistent configuration and lifecycle. + +**Responsibility**: + +- **Discoverability**: List/get services by capability key (e.g., `services.ai`, `services.chat`) +- **Configuration**: Environment, credentials, routing to specific vendors +- **Cross‑cutting**: User context, workflow context, interface access +- **Lifecycle management**: Initialize services with proper dependencies +- **Access control**: Provide user context to all services and interfaces + +**Usage Pattern**: + +1. Route receives request with authenticated user (via `getCurrentUser` dependency) +2. Create Services container with user context using `Services(user=currentUser)` +3. Call service method with typed input (e.g., `services.ai.callAiDocuments()`) +4. Receive typed output + +**Service Center Structure**: + +The `Services` class initializes with user and optional workflow context. It initializes interfaces via `getChatInterface()`, `getAppInterface()`, `getComponentInterface()`, and wraps all services in `PublicService` wrappers (e.g., `PublicService(AiService(self))`). + +**Key Features**: + +- **User Context**: Every service has access to `self.services.user` for access control +- **Workflow Context**: Services can access `self.services.workflow` for workflow-aware operations +- **Interface Access**: Services access interfaces via `self.services.interfaceDbChat`, etc. +- **Service Composition**: Services call other services via `self.services.otherService.method()` + +--- + +## 5) Workflows & Agent Models + +**Role**: Coordinate tasks and actions by invoking services in sequence/branches/loops. + +**Responsibility**: + +- **Maintain execution state**: Track workflow progress, round/task/action counters +- **Choose actions**: Use agent models (AI) or predefined plans to determine next steps +- **Handle retries/compensation**: Retry failed tasks with improvements, rollback on failure +- **Record audit logs**: Track all workflow steps, decisions, and outcomes +- **Manage document flow**: Resolve document references, track document lineage + +**Typical Pattern**: + +1. **Ingest user intent/context**: Analyze user input, extract documents, detect language +2. **Plan next action**: Use AI to generate task plan or follow predefined JSON plan +3. **Call services via Service Center**: Invoke services to perform operations +4. **Persist outputs**: Store results, update state, decide next step +5. **Generate feedback**: Create completion messages, summarize results + +**Workflow Modes**: + +- **Actionplan Mode**: Batch planning with quality review and intelligent retry +- **Dynamic Mode**: Iterative, just-in-time action generation +- **Automation Mode**: Predefined JSON-based deterministic execution + +**Method System**: + +Workflows invoke actions through an extensible method system: + +- **Methods**: Plugin-like classes that expose actions via `@action` decorator +- **Actions**: Async methods that perform specific operations (e.g., `methodAi.process()`, `methodSharepoint.search()`) +- **Automatic Discovery**: Methods are discovered at runtime via introspection +- **Signature Generation**: Action signatures are generated for AI prompt generation + +--- + +## Standardized Interface Example (Actual Implementation) + +Interfaces like `ChatObjects` provide methods such as `getWorkflow()` and `createMessage()`. The `AiObjects` interface provides `call()` for AI model operations. Vendors like OpenAI/Anthropic implement `AiObjects` through connectors; database connectors implement `ChatObjects`. Services compose these interfaces. + +--- + +## Example Service Composition (Actual Implementation) + +The `AiService.callAiDocuments()` method demonstrates service composition: + +**Steps**: +1. `ExtractionService.extractContent()` → extracts content from documents +2. `AiObjects.call()` → processes with AI model +3. `ChatService.storeWorkflowStat()` → records statistics + +**Outputs**: AI-generated content, processing statistics, cost tracking + +--- + +## Adding a New Capability + +### Step 1: Create Connector (if needed) + +Add vendor adapter in `modules/connectors/` (e.g., `connectorNewVendor.py`). The connector class should initialize with configuration, handle API calls, and return structured responses with `{"success": True/False, "data": ...}` format. + +### Step 2: Create Interface + +Implement capability contract in `modules/interfaces/` (e.g., `interfaceNewCapabilityObjects.py`). The interface class should initialize with user context, use the connector, and provide normalized methods like `performOperation()` that return domain objects. + +### Step 3: Create Service + +Compose the interface in `modules/services/serviceNewCapability/mainServiceNewCapability.py`. The service class should initialize with the services container, access the interface, and provide business-level methods like `performBusinessOperation()` that apply validation, call the interface, and enrich results. + +### Step 4: Register in Service Center + +Wire into `Services` class in `modules/services/__init__.py`. Import the service class and wrap it in `PublicService()` (e.g., `self.newCapability = PublicService(NewCapabilityService(self))`). + +### Step 5: Expose via Route (if needed) + +Add HTTP endpoint in `modules/routes/routeNewCapability.py`. Create a route handler that uses `getCurrentUser` dependency, creates a `Services` instance, calls the service method, and returns the result. + +### Step 6: Use in Workflows (if needed) + +Create method action in `modules/workflows/methods/methodNewCapability.py`. Inherit from `MethodBase`, use the `@action` decorator on async methods, and return `ActionResult` objects with success status and documents. + +--- + +## Adding a New Database Domain + +Adding a completely new database domain (like Real Estate, Projects, Inventory) requires creating datamodels, database interfaces, and access control. This section covers creating a new domain from scratch. + +### Overview + +A new database domain consists of: +1. **Datamodels**: Pydantic models defining data structures +2. **Database Interface Objects**: CRUD operations for domain entities +3. **Database Interface Access**: Access control and permission checking +4. **Database Configuration**: Connection settings for the new database +5. **Service Integration**: Optional service layer for business logic + +### Step 1: Create Datamodels + +Create a new datamodel file in `modules/datamodels/datamodel[Domain].py` (e.g., `datamodelRealEstate.py`, `datamodelProject.py`). + +**Structure**: +- Define Pydantic models inheriting from `BaseModel` +- Include enums for status fields and categories +- Add helper models for complex nested structures +- Use Field() with frontend metadata for UI generation +- Include standard fields: `id`, `mandateId`, `_createdBy`, `_createdAt`, `_modifiedBy`, `_modifiedAt` + +**Example Structure**: +``` +datamodel[Domain].py +├── Enums (StatusEnum, CategoryEnum, etc.) +├── Helper Models (GeoPoint, Address, etc.) +├── Main Entity Models +│ ├── Entity1 (id, mandateId, fields, timestamps) +│ ├── Entity2 (id, mandateId, fields, timestamps) +│ └── Entity3 (id, mandateId, fields, timestamps) +└── Relationship Models (if needed) +``` + +**Key Requirements**: +- All main entities must have `id: str` (UUID) +- All main entities must have `mandateId: str` for multi-tenant isolation +- Include audit fields: `_createdBy`, `_createdAt`, `_modifiedBy`, `_modifiedAt` +- Use `Field()` with `frontend_type`, `frontend_readonly`, `frontend_required` for UI metadata +- Define relationships using ForwardRef if models reference each other + +**Example**: +``` +from pydantic import BaseModel, Field +from enum import Enum +import uuid + +class StatusEnum(str, Enum): + ACTIVE = "active" + INACTIVE = "inactive" + ARCHIVED = "archived" + +class Project(BaseModel): + id: str = Field(default_factory=lambda: str(uuid.uuid4())) + mandateId: str + name: str = Field(..., frontend_type="text", frontend_required=True) + status: StatusEnum = Field(..., frontend_type="select") + description: Optional[str] = Field(None, frontend_type="textarea") + _createdBy: Optional[str] = None + _createdAt: Optional[int] = None + _modifiedBy: Optional[str] = None + _modifiedAt: Optional[int] = None +``` + +### Step 2: Create Database Interface Objects + +Create `modules/interfaces/interfaceDb[Domain]Objects.py` (e.g., `interfaceDbRealEstateObjects.py`). + +**Structure**: +- `[Domain]Objects` class that initializes database connector +- CRUD methods for each entity: `create[Entity]()`, `get[Entity]()`, `get[Entities]()`, `update[Entity]()`, `delete[Entity]()` +- Query execution method: `executeQuery()` for custom SQL queries +- User context management: `setUserContext()` + +**Key Components**: + +**1. Database Initialization**: +``` +def _initializeDatabase(self): + dbHost = APP_CONFIG.get("DB_[DOMAIN]_HOST", "localhost") + dbDatabase = APP_CONFIG.get("DB_[DOMAIN]_DATABASE", "poweron_[domain]") + dbUser = APP_CONFIG.get("DB_[DOMAIN]_USER") + dbPassword = APP_CONFIG.get("DB_[DOMAIN]_PASSWORD_SECRET") + dbPort = int(APP_CONFIG.get("DB_[DOMAIN]_PORT", 5432)) + + self.db = DatabaseConnector( + dbHost=dbHost, + dbDatabase=dbDatabase, + dbUser=dbUser, + dbPassword=dbPassword, + dbPort=dbPort, + userId=self.userId if self.userId else None, + ) + + self.db.initDbSystem() +``` + +**2. CRUD Pattern**: +``` +def create[Entity](self, entity: [Entity]) -> [Entity]: + # Ensure mandateId is set + if not entity.mandateId: + entity.mandateId = self.mandateId + + # Apply access control + self.access.uam([Entity], []) + + # Save to database + self.db.recordCreate([Entity], entity.model_dump()) + + return entity + +def get[Entity](self, entityId: str) -> Optional[[Entity]]: + records = self.db.getRecordset( + [Entity], + recordFilter={"id": entityId} + ) + + if not records: + return None + + # Apply access control + filtered = self.access.uam([Entity], records) + + if not filtered: + return None + + return [Entity](**filtered[0]) + +def get[Entities](self, filters: Optional[Dict] = None) -> List[[Entity]]: + records = self.db.getRecordset([Entity], recordFilter=filters or {}) + filtered = self.access.uam([Entity], records) + return [Entity](**r) for r in filtered] + +def update[Entity](self, entityId: str, updates: Dict) -> Optional[[Entity]]: + # Check access control + self.access.canModify([Entity], entityId) + + # Update in database + self.db.recordUpdate([Entity], entityId, updates) + + # Return updated entity + return self.get[Entity](entityId) + +def delete[Entity](self, entityId: str) -> bool: + # Check access control + self.access.canModify([Entity], entityId) + + # Delete from database + self.db.recordDelete([Entity], entityId) + + return True +``` + +**3. Singleton Factory Pattern**: +``` +_[domain]Interfaces = {} + +def get[Domain]Interface(currentUser: Optional[User] = None) -> [Domain]Objects: + """Factory function to get or create interface instance.""" + userId = currentUser.id if currentUser else None + if userId not in _[domain]Interfaces: + _[domain]Interfaces[userId] = [Domain]Objects(currentUser) + return _[domain]Interfaces[userId] +``` + +### Step 3: Create Database Interface Access + +Create `modules/interfaces/interfaceDb[Domain]Access.py` (e.g., `interfaceDbRealEstateAccess.py`). + +**Structure**: +- `[Domain]Access` class that handles permission checking +- `uam()` method for Unified Access Management (filtering and flagging) +- `canModify()` method for write permission checking + +**Key Components**: + +**1. Access Control Class**: +``` +class [Domain]Access: + def __init__(self, currentUser: User, db: DatabaseConnector): + self.currentUser = currentUser + self.userId = currentUser.id + self.mandateId = currentUser.mandateId + self.userRole = currentUser.role + self.db = db + + def uam(self, model: Type[BaseModel], records: List[Dict]) -> List[Dict]: + """Unified Access Management: Filter records and add access flags.""" + if self.userRole == "SYSADMIN": + # SYSADMIN sees all records + filtered = records + elif self.userRole == "ADMIN": + # ADMIN sees all records in their mandate + filtered = [r for r in records if r.get("mandateId") == self.mandateId] + else: + # USER sees only their own records + filtered = [r for r in records if r.get("_createdBy") == self.userId] + + # Add access flags + for record in filtered: + record["_hideView"] = False + record["_hideEdit"] = not self.canModify(model, record.get("id")) + record["_hideDelete"] = not self.canModify(model, record.get("id")) + + return filtered + + def canModify(self, model: Type[BaseModel], recordId: str) -> bool: + """Check if user can modify a record.""" + if self.userRole == "SYSADMIN": + return True + + # Get record to check ownership + records = self.db.getRecordset(model, recordFilter={"id": recordId}) + if not records: + return False + + record = records[0] + + if self.userRole == "ADMIN": + # ADMIN can modify records in their mandate + return record.get("mandateId") == self.mandateId + else: + # USER can only modify their own records + return record.get("_createdBy") == self.userId +``` + +**2. Import in Objects File**: +``` +from modules.interfaces.interfaceDb[Domain]Access import [Domain]Access +``` + +### Step 4: Configure Database Connection + +Add database configuration to `config.ini`: + +``` +[Database] +DB_[DOMAIN]_HOST=localhost +DB_[DOMAIN]_DATABASE=poweron_[domain] +DB_[DOMAIN]_USER=postgres +DB_[DOMAIN]_PASSWORD_SECRET=your_password +DB_[DOMAIN]_PORT=5432 +``` + +**Database Creation**: +- The `DatabaseConnector.initDbSystem()` method automatically creates the database if it doesn't exist +- Tables are created on-demand when first accessed via `_ensureTableExists()` +- No manual database schema creation needed + +### Step 5: Register Interface in Services (Optional) + +If you need business logic, create a service that uses the interface: + +**Create Service** (`modules/services/service[Domain]/mainService[Domain].py`): +``` +class [Domain]Service: + def __init__(self, services: 'Services'): + self.services = services + + def get[Domain]Interface(self) -> [Domain]Objects: + """Get interface instance with current user context.""" + return get[Domain]Interface(self.services.workflow.currentUser) + + def performBusinessOperation(self, ...): + """Business-level method that uses interface.""" + interface = self.get[Domain]Interface() + # Apply business logic + # Call interface methods + # Return enriched results +``` + +**Register in Service Center** (`modules/services/__init__.py`): +``` +from modules.services.service[Domain].mainService[Domain] import [Domain]Service + +class Services: + def __init__(self, ...): + ... + self.[domain] = PublicService([Domain]Service(self)) +``` + +### Step 6: Create Routes (Optional) + +If you need HTTP endpoints, create `modules/routes/route[Domain].py`: + +``` +from fastapi import APIRouter, Depends +from modules.features.shared.dependencies import getCurrentUser +from modules.datamodels.datamodelUam import User +from modules.services import Services +from modules.interfaces.interfaceDb[Domain]Objects import get[Domain]Interface + +router = APIRouter() + +@router.get("/[domain]/entities") +async def getEntities( + currentUser: User = Depends(getCurrentUser) +): + """Get all entities.""" + interface = get[Domain]Interface(currentUser) + entities = interface.get[Entities]() + return {"success": True, "data": [e.model_dump() for e in entities]} +``` + +### Step 7: Use in Workflows (Optional) + +If you need workflow actions, create `modules/workflows/methods/method[Domain].py`: + +``` +from modules.workflows.methods.methodBase import MethodBase +from modules.workflows.methods.methodBase import action +from modules.workflows.methods.methodBase import ActionResult + +class Method[Domain](MethodBase): + name = "[domain]" + description = "[Domain] operations" + + def __init__(self, services): + super().__init__(services) + + @action + async def performOperation(self, parameters: Dict[str, Any]) -> ActionResult: + """Perform domain operation.""" + interface = get[Domain]Interface(self.services.workflow.currentUser) + # Perform operation + # Return ActionResult +``` + +### Complete Example: Real Estate Domain + +**Datamodels** (`datamodelRealEstate.py`): +- `Projekt`, `Parzelle`, `Dokument`, `Kanton`, `Gemeinde`, `Land` +- `GeoPunkt`, `GeoPolylinie` (geographic data) +- `Kontext` (context/notes) +- Enums: `StatusProzess`, `DokumentTyp`, `GeoTag` + +**Interface Objects** (`interfaceDbRealEstateObjects.py`): +- `RealEstateObjects` class +- CRUD methods for all entities +- `executeQuery()` for custom SQL +- Database initialization with `DB_REALESTATE_*` config + +**Interface Access** (`interfaceDbRealEstateAccess.py`): +- `RealEstateAccess` class +- `uam()` method for filtering +- `canModify()` method for permissions + +**Configuration**: +``` +DB_REALESTATE_HOST=localhost +DB_REALESTATE_DATABASE=poweron_realestate +DB_REALESTATE_USER=postgres +DB_REALESTATE_PASSWORD_SECRET=... +DB_REALESTATE_PORT=5432 +``` + +### Best Practices + +**1. Naming Conventions**: +- Datamodel file: `datamodel[Domain].py` (PascalCase domain name) +- Interface Objects: `interfaceDb[Domain]Objects.py` +- Interface Access: `interfaceDb[Domain]Access.py` +- Database config: `DB_[DOMAIN]_*` (uppercase with underscores) + +**2. Mandate Isolation**: +- Always set `mandateId` on create operations +- Filter by `mandateId` in access control +- Never expose data across mandates + +**3. Access Control**: +- Always call `self.access.uam()` before returning records +- Always call `self.access.canModify()` before write operations +- Respect role hierarchy: SYSADMIN > ADMIN > USER + +**4. Error Handling**: +- Validate user context before operations +- Handle missing records gracefully (return None, not raise) +- Log errors with context (user ID, mandate ID, operation) + +**5. Database Management**: +- Let `DatabaseConnector` handle table creation automatically +- Use `_ensureTableExists()` for supporting tables with foreign keys +- Don't manually create database schemas + +**6. Testing**: +- Test CRUD operations with different user roles +- Test mandate isolation (users can't see other mandates' data) +- Test access control (users can't modify others' records) + +### Common Patterns + +**Pattern 1: Simple Domain (Single Entity)** +- One main entity model +- Basic CRUD operations +- Standard access control + +**Pattern 2: Hierarchical Domain (Parent-Child)** +- Multiple related entities +- Foreign key relationships +- Cascade operations (delete children when parent deleted) + +**Pattern 3: Complex Domain (Multiple Entities + Relationships)** +- Multiple entities with relationships +- Supporting tables (lookup tables, reference data) +- Custom query methods for complex operations + +--- + +## Security & Governance + +### Access Control + +- **RBAC**: Role-based access control enforced at Interface layer (`interface*Access.py`) + - **SYSADMIN**: Full system access, all mandates + - **ADMIN**: Full access within mandate + - **USER**: Access to own records only +- **UAM**: Unified Access Management filters recordsets by privilege and adds access flags (`_hideView`, `_hideEdit`, `_hideDelete`) + +### Secrets Management + +- **Centralized Configuration**: Credentials stored in `config.ini` with encryption +- **Interface-Level Access**: Connectors receive credentials through interfaces, not directly +- **No Leakage**: Credentials never exposed to workflows or services + +### Audit + +- **Automatic Tracking**: All database operations include `_createdBy`, `_modifiedBy`, `_createdAt`, `_modifiedAt` +- **Workflow Logging**: Workflow steps logged via `ChatService.storeLog()` +- **Security Events**: Authentication events logged via `auditLogger.logSecurityEvent()` + +### Quotas + +- **Rate Limiting**: Applied at route level using `slowapi.Limiter` +- **Token Refresh Limits**: OAuth token refresh limited to 3 attempts per hour per connection +- **Cost Tracking**: AI operations track costs via `ChatService.storeWorkflowStat()` + +--- + +## Observability + +### Structured Logging + +- **Layer-Specific Loggers**: Each layer uses module-specific loggers (e.g., `logging.getLogger("modules.services.serviceAi")`) +- **Context Information**: Logs include user ID, workflow ID, operation context +- **Error Details**: Exceptions logged with full stack traces and context + +### Tracing + +- **Operation IDs**: Long-running operations use unique operation IDs for tracking +- **Progress Logging**: `ChatService.progressLogStart()`, `progressLogUpdate()`, `progressLogFinish()` +- **Workflow State**: Workflow state persisted to database for debugging + +### Metrics + +- **Per-Capability Tracking**: Services track operation counts, costs, processing time +- **Workflow Statistics**: `ChatStat` records track bytes sent/received, error counts, prices +- **Performance Monitoring**: Processing time tracked for all AI calls and service operations + +--- + +## Minimal Request Lifecycle + +```mermaid +sequenceDiagram + participant Client + participant Route + participant Services as Service Center + participant Service + participant Interface + participant Connector + participant External as External System/DB + + Client->>Route: HTTP Request + Route->>Route: Authenticate (getCurrentUser) + Route->>Services: Create(user, workflow) + Services->>Services: Initialize interfaces + Services->>Services: Initialize services + Services-->>Route: services instance + + Route->>Service: services.capability.operation() + Service->>Interface: interface.method(params) + Interface->>Interface: Apply access control (UAM) + Interface->>Connector: connector.operation(params) + Connector->>External: API call / DB query + External-->>Connector: Response + Connector-->>Interface: Normalized data + Interface-->>Service: Domain object + Service-->>Services: Business result + Services-->>Route: Result + Route-->>Client: HTTP Response +``` + +**Steps**: +1. Route receives request or workflow triggers an action +2. Service Center resolves service instance and validates user context +3. Service executes using interfaces; interfaces call connectors +4. Results propagate back; logs/metrics recorded; workflow advances state + +--- + +## Benefits + +- **Replace vendors without breaking services**: Interfaces shield changes (e.g., swap PostgreSQL for JSON connector) +- **Accelerate feature delivery**: Services are reusable building blocks +- **Improve reliability and security**: Centralized policies and observability +- **Empower workflows/agents**: Perform complex tasks with simple, typed calls +- **Type safety**: Pydantic models ensure data consistency +- **Testability**: Clear boundaries enable mocking and unit testing +- **Maintainability**: Separation of concerns makes code easier to understand and modify + +--- + +## Quick Map to Code (for orientation) + +- `gateway/modules/connectors/` → Vendor adapters (e.g., `connectorDbPostgre.py`, `connectorVoiceGoogle.py`) +- `gateway/modules/interfaces/` → Capability contracts (e.g., `interfaceDbChatObjects.py`, `interfaceAiObjects.py`) +- `gateway/modules/services/` → Composed capabilities (e.g., `serviceAi/mainServiceAi.py`, `serviceChat/mainServiceChat.py`) +- `gateway/modules/workflows/` → Orchestrations/agents (e.g., `workflowManager.py`, `methods/methodAi.py`) +- `gateway/modules/routes/` → HTTP endpoints (e.g., `routeChatPlayground.py`, `routeWorkflows.py`) + +This framework is the backbone for market customer journey features: build once as services, reuse everywhere in workflows. + +--- + +## Visuals + +### Layered Architecture + +```mermaid +flowchart TB + subgraph ClientOrWorkflow[Client / Workflow Engine] + C[Feature or Agent Task] + end + + subgraph ServiceCenter[Service Center] + SC[Services Container\nUser Context, Interfaces, Services] + end + + subgraph Services[Services] + S1[AI Service] + S2[Chat Service] + S3[Extraction Service] + S4[Generation Service] + end + + subgraph Interfaces[Interfaces] + I1[ChatObjects] + I2[AppObjects] + I3[AiObjects] + I4[ComponentObjects] + end + + subgraph Connectors[Connectors] + K1[PostgreSQL Connector] + K2[JSON Connector] + K3[Google Speech Connector] + K4[AI Provider Connectors] + end + + subgraph External[External Systems] + E1[(PostgreSQL Database)] + E2[Google Cloud APIs] + E3[AI APIs\nOpenAI, Anthropic] + end + + C --> SC --> S1 & S2 & S3 & S4 + S1 --> I3 + S2 --> I1 + S3 --> I4 + S4 --> I1 & I4 + + I1 --> K1 + I2 --> K1 + I3 --> K4 + I4 --> K1 + + K1 --> E1 + K2 --> E1 + K3 --> E2 + K4 --> E3 +``` + +### Request / Action Sequence + +```mermaid +sequenceDiagram + participant Client as Client / Workflow + participant SC as Service Center + participant S as Service + participant I as Interface + participant AC as Access Control + participant K as Connector + participant EXT as External Tool/DB + + Client->>SC: Request capability (e.g., services.ai.callAiDocuments) + SC->>SC: Initialize with user context + SC->>S: Get service instance + S->>I: Call normalized method (e.g., aiObjects.call) + I->>AC: Check permissions (UAM) + AC-->>I: Permission granted + I->>K: Prepare vendor-specific request + K->>EXT: API/DB call (auth, retries) + EXT-->>K: Response + K-->>I: Map to normalized DTO + I-->>S: Return normalized result + S->>S: Apply business logic + S-->>SC: Business output (validated, enriched) + SC-->>Client: Typed response, telemetry recorded +``` + +### Service Center Components + +```mermaid +graph LR + subgraph SC[Service Center - Services Class] + REG[Service Registry] + CTX[User Context] + WF[Workflow Context] + INT[Interface Access] + FAC[Service Factory] + end + + REG --> FAC + CTX --> FAC + WF --> FAC + INT --> FAC + + FAC -->|builds| Svc[(Service Instances)] + + subgraph Layers[Below Services] + IF[Interfaces] + CON[Connectors] + end + + Svc --> IF --> CON + + subgraph Services[Services] + AI[AI Service] + Chat[Chat Service] + Extract[Extraction Service] + Gen[Generation Service] + end + + Svc --> AI & Chat & Extract & Gen +``` + +### Workflow State Machine (Conceptual) + +```mermaid +stateDiagram-v2 + [*] --> Plan + + Plan: Decide next action (AI or rules) + Plan --> CallService: needs external capability + Plan --> Done: no more steps + + CallService: Invoke via Service Center + CallService --> HandleResult + + HandleResult: Persist, evaluate, log + HandleResult --> Plan: more work + HandleResult --> Done: goal achieved + + Done --> [*] +``` + +### Interface Access Control Flow + +```mermaid +sequenceDiagram + participant Service + participant Interface as Interface Objects + participant Access as Access Control + participant Connector + participant DB as Database + + Service->>Interface: CRUD Operation + Interface->>Access: Check permissions (uam) + Access->>Access: Check user privilege + Access->>Access: Filter by mandateId + Access->>Access: Check ownership (_createdBy) + Access->>Access: Add access flags + Access-->>Interface: Filtered data + flags + Interface->>Connector: Execute query + Connector->>DB: SQL Query + DB-->>Connector: Results + Connector-->>Interface: Raw data + Interface->>Interface: Transform to datamodel + Interface-->>Service: Domain objects with access flags +``` + +--- + +## Development Best Practices + +### 1. Always Use Service Center + +✅ **GOOD**: Use Service Center via `Services(user=current_user)` and call `services.ai.callAiDocuments()`, `services.chat.storeMessageWithDocuments()`, etc. + +❌ **BAD**: Direct interface access bypasses the service layer (e.g., calling `getChatInterface(user).getWorkflow()` directly). + +### 2. Keep Services Stateless + +✅ **GOOD**: Stateless services use the database for persistence (e.g., `self.services.interfaceDbApp.getCache()`). + +❌ **BAD**: Stateful services store data in instance variables (e.g., `self.cache = {}`). + +### 3. Use Datamodels for Type Safety + +✅ **GOOD**: Use Pydantic models like `ChatWorkflow`, `ChatMessage` from `modules.datamodels.datamodelChat`. Create instances with `ChatWorkflow(**data)` and return typed results. + +❌ **BAD**: Use raw dictionaries without type safety. + +### 4. Apply Access Control + +✅ **GOOD**: Interfaces apply UAM automatically (e.g., `self.interfaceDbChat.getWorkflows()` filters by user privilege). + +❌ **BAD**: Bypass access control by calling connectors directly (e.g., `self.connector.getRecordset()` has no filtering). + +### 5. Handle Errors Gracefully + +✅ **GOOD**: Return structured errors with `{"success": True/False, "data": ..., "error": ...}` format. Log exceptions with context. + +❌ **BAD**: Let exceptions propagate to callers without handling. + +--- + +## Workflow Engineering + +Workflow engineering is the process of designing, building, and maintaining workflows that orchestrate multi-step tasks using the gateway's service layer. Workflows transform user requests into structured execution plans, coordinate action execution, and manage state throughout the process. + +### Understanding Workflow Architecture + +Workflows operate at the highest level of the gateway architecture, orchestrating services to accomplish complex goals. They provide: + +- **Intelligent Planning**: AI-powered task breakdown and action generation +- **State Management**: Track progress, maintain context, and handle errors +- **Document Flow**: Manage document references and lineage throughout execution +- **Adaptive Execution**: Retry failed tasks, learn from results, improve over time +- **Multi-Mode Support**: Different execution strategies for different use cases + +### Workflow Components + +**WorkflowManager**: Main orchestration controller that manages workflow lifecycle (`workflowStart()`, `workflowStop()`, `_workflowProcess()`) + +**WorkflowProcessor**: Delegates to mode-specific implementations (Actionplan, Dynamic, Automation) + +**TaskPlanner**: Generates structured task plans from user input using AI + +**ActionExecutor**: Executes individual actions by invoking methods from the global methods catalog + +**MessageCreator**: Creates and persists workflow messages with document associations + +**Method System**: Extensible plugin framework for defining reusable actions + +### Workflow Execution Pipeline + +Every workflow follows a four-stage pipeline: + +1. **Send First Message**: Analyze user intent, extract documents, detect language, normalize request +2. **Plan Tasks**: Generate structured task plan with objectives, success criteria, and dependencies +3. **Execute Tasks**: Execute each task sequentially, maintaining context between tasks +4. **Process Results**: Generate feedback, create completion message, update workflow status + +--- + +## Workflow Modes + +The gateway supports three distinct workflow modes, each optimized for different use cases: + +### Actionplan Mode + +**Strategy**: Batch planning with quality review and intelligent retry + +**Characteristics**: +- Plans all actions upfront before execution begins +- Reviews results against success criteria after execution +- Retries failed tasks up to 3 times with cumulative improvements +- Best for complex multi-step workflows with specific requirements + +**Use Cases**: Data processing pipelines, document analysis with requirements, complex transformations + +**Execution Flow**: +1. Generate complete action plan for entire task +2. Execute all actions sequentially +3. Review results against success criteria +4. Retry with improvements if criteria not met +5. Return final result + +### Dynamic Mode + +**Strategy**: Iterative, just-in-time action generation + +**Characteristics**: +- Generates one action at a time based on current state +- Each action's result influences the next action +- Workflow path emerges organically based on findings +- Limited by `maxSteps` (default: 5) to prevent infinite loops + +**Use Cases**: Research workflows, exploratory data analysis, iterative problem solving, uncertain paths + +**Execution Flow**: +1. Generate single next action based on current context +2. Execute action immediately +3. Evaluate if task objective is met +4. Continue if objective not met and under max steps +5. Return result when objective met or max steps reached + +### Automation Mode + +**Strategy**: Predefined JSON-based deterministic execution + +**Characteristics**: +- No AI planning or action generation +- User provides complete task and action plan in JSON format +- Deterministic execution (same input always produces same sequence) +- Fastest execution time (no planning overhead) + +**Use Cases**: Repeated workflows, automated jobs, batch processing, template execution, routine operations + +**Execution Flow**: +1. Parse predefined JSON plan from user input +2. Execute actions in order specified in JSON +3. Collect results without review +4. Return execution summary + +--- + +## Building New Workflows + +New workflows are typically built using Actionplan or Dynamic modes, where AI generates the execution plan based on user input. This section covers how to create workflows that adapt to user requests. + +### Starting a New Workflow + +**Entry Point**: `WorkflowManager.workflowStart()` + +**Required Parameters**: +- `userInput`: UserInputRequest containing prompt, file IDs, and language +- `workflowMode`: WorkflowModeEnum (WORKFLOW_ACTIONPLAN, WORKFLOW_DYNAMIC, or WORKFLOW_AUTOMATION) +- `workflowId`: Optional ID to continue existing workflow + +**Process**: +1. Create or load `ChatWorkflow` record in database +2. Initialize workflow state (status="running", currentRound=1, counters=0) +3. Discover and update method instances with current services +4. Launch asynchronous processing pipeline +5. Return workflow object immediately (non-blocking) + +**Example Flow**: +``` +Route → chatStart() → WorkflowManager.workflowStart() → _workflowProcess() +``` + +### Workflow Input Processing + +The first stage (`_sendFirstMessage()`) processes user input: + +**Intent Analysis**: AI analyzes user input to extract: +- Detected language (ISO 639-1 code) +- Normalized request (full, explicit restatement) +- Core intent (primary goals and requirements) +- Bulky context items (large data blocks extracted as separate documents) + +**Document Management**: +- Processes user-uploaded files (converts file IDs to ChatDocument objects) +- Extracts large content blocks from prompt (code snippets, tables, lists) +- Creates document records in component database +- Applies neutralization if enabled in user settings +- Associates documents with labels (e.g., "round1_usercontext") + +**Message Creation**: Creates first message with role="user", status="first", and all associated documents + +### Task Planning + +The second stage (`_planTasks()`) generates structured task plans: + +**Planning Process**: +1. Uses cleaned user intent from previous stage +2. Calls `WorkflowProcessor.generateTaskPlan()` which delegates to mode-specific implementation +3. For Actionplan/Dynamic modes: Uses `TaskPlanner.generateTaskPlan()` with AI +4. For Automation mode: Parses predefined JSON plan from user input + +**TaskPlan Structure**: +- `overview`: High-level description of the plan +- `tasks`: Array of TaskStep objects +- `userMessage`: Original user request + +**TaskStep Structure**: +- `id`: Unique task identifier +- `objective`: What the task should accomplish +- `dependencies`: Array of task IDs this task depends on +- `successCriteria`: Array of measurable criteria for task completion +- `estimatedComplexity`: Complexity estimate (simple, medium, complex) +- `userMessage`: User-facing description of the task + +**AI Planning**: Uses `services.ai.callAiPlanning()` with quality settings to generate detailed task breakdown. The AI receives: +- User prompt and normalized intent +- Available methods and actions (from method discovery) +- Available documents and connections +- Workflow context and history + +### Task Execution + +The third stage (`_executeTasks()`) executes each task sequentially: + +**For Each Task**: +1. Build `TaskContext` containing: + - Task details (objective, success criteria, dependencies) + - Workflow state (current round, task, action numbers) + - Available documents (from current and previous rounds) + - Available connections (user's OAuth connections) + - Previous task results (for context and dependencies) + +2. Call `WorkflowProcessor.executeTask()` which delegates to mode-specific execution + +3. Receive `TaskResult` with: + - `success`: Boolean indicating task completion status + - `feedback`: Human-readable summary of what was accomplished + - `documents`: List of ChatDocument objects created during task execution + - `reviewResult`: Optional ReviewResult if quality review was performed + +4. Prepare task handover data for subsequent tasks + +5. Accumulate results for use by dependent tasks + +**Mode-Specific Execution**: + +**Actionplan Mode**: +- Generates complete action plan for entire task upfront +- Executes all actions sequentially +- Reviews results against success criteria +- Retries with improvements if criteria not met (max 3 attempts) + +**Dynamic Mode**: +- Generates single next action based on current state +- Executes action immediately +- Evaluates if task objective is met +- Continues generating actions until objective met or max steps reached + +**Automation Mode**: +- Uses predefined action list from JSON plan +- Executes actions in order specified +- No retry logic or quality review + +### Action Execution + +Actions are executed by `ActionExecutor.executeSingleAction()`: + +**Process**: +1. Resolve parameters (document references, connections, etc.) +2. Look up method in global methods catalog +3. Validate action exists within method +4. Invoke action method with parameters +5. Extract result text from ActionDocument objects +6. Convert ActionDocuments to ChatDocuments for persistence +7. Create action completion message +8. Return ActionResult with success status and documents + +**Action Invocation**: Actions are invoked using compound names (e.g., "ai.process", "sharepoint.search") or separate method/action names. + +**Document References**: Actions receive document references in three formats: +- `docItem::`: Single document by ID +- `docList: