From bacf2a96867060cf12ab8d282d89a76f82fb99bc Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Wed, 3 Sep 2025 12:19:07 +0200 Subject: [PATCH 01/27] feat: add TicketInterface; add CRUD connector JIRA --- modules/connectors/connectorTicketJira.py | 240 +++++++++++++++++++ modules/interfaces/interfaceTicketModel.py | 26 ++ modules/interfaces/interfaceTicketObjects.py | 10 + 3 files changed, 276 insertions(+) create mode 100644 modules/connectors/connectorTicketJira.py create mode 100644 modules/interfaces/interfaceTicketModel.py create mode 100644 modules/interfaces/interfaceTicketObjects.py diff --git a/modules/connectors/connectorTicketJira.py b/modules/connectors/connectorTicketJira.py new file mode 100644 index 00000000..f2edd200 --- /dev/null +++ b/modules/connectors/connectorTicketJira.py @@ -0,0 +1,240 @@ +"""Jira connector for CRUD operations.""" + +from dataclasses import dataclass +import logging +import aiohttp +import json +from typing import Optional + +from modules.interfaces.interfaceTicketModel import ( + TicketBase, + TicketFieldAttribute, + Task, +) + + +logger = logging.getLogger(__name__) + + +@dataclass +class ConnectorTicketJira(TicketBase): + jira_username: str + jira_api_token: str + jira_url: str + project_code: str + issue_type: str + + @classmethod + async def create( + cls, + *, + jira_username: str, + jira_api_token: str, + jira_url: str, + project_code: str, + issue_type: str, + ): + return ConnectorTicketJira( + jira_username=jira_username, + jira_api_token=jira_api_token, + jira_url=jira_url, + project_code=project_code, + issue_type=issue_type, + ) + + async def read_attributes(self) -> list[TicketFieldAttribute]: + """ + Read field attributes from Jira by querying for a single issue + and extracting the field mappings. + + Returns: + list[TicketFieldAttribute]: List of field attributes with names and IDs + """ + jql_query = f"project={self.project_code} AND issuetype={self.issue_type}" + + # Prepare the request URL and parameters + url = f"{self.jira_url}/rest/api/2/search" + params = {"jql": jql_query, "maxResults": 1, "expand": "names"} + + # Prepare authentication + auth = aiohttp.BasicAuth(self.jira_username, self.jira_api_token) + + try: + async with aiohttp.ClientSession() as session: + async with session.get(url, params=params, auth=auth) as response: + if response.status != 200: + error_text = await response.text() + logger.error( + f"Jira API request failed with status {response.status}: {error_text}" + ) + raise Exception( + f"Jira API request failed with status {response.status}" + ) + + data = await response.json() + + # Extract issues and field names + issues = data.get("issues", []) + field_names = data.get("names", {}) + + if not issues: + logger.warning(f"No issues found for query: {jql_query}") + return [] + + # Extract field attributes from the first issue + attributes = [] + issue = issues[0] + fields = issue.get("fields", {}) + + for field_id, value in fields.items(): + field_name = field_names.get(field_id, field_id) + attributes.append( + TicketFieldAttribute(field_name=field_name, field=field_id) + ) + + logger.info( + f"Successfully retrieved {len(attributes)} field attributes from Jira" + ) + return attributes + + except aiohttp.ClientError as e: + logger.error(f"HTTP client error while fetching Jira attributes: {str(e)}") + raise Exception(f"Failed to connect to Jira: {str(e)}") + except json.JSONDecodeError as e: + logger.error(f"Failed to parse Jira API response: {str(e)}") + raise Exception(f"Invalid response from Jira API: {str(e)}") + except Exception as e: + logger.error(f"Unexpected error while fetching Jira attributes: {str(e)}") + raise + + async def read_tasks(self, *, limit: int = 0) -> list[Task]: + """ + Read tasks from Jira with pagination support. + + Args: + limit: Maximum number of tasks to retrieve. 0 means no limit. + + Returns: + list[Task]: List of tasks with their data + """ + jql_query = f"project={self.project_code} AND issuetype={self.issue_type}" + + # Initialize variables for pagination + start_at = 0 + max_results = 50 + total = 1 # Initialize with a value greater than 0 to enter the loop + tasks = [] + + # Prepare authentication + auth = aiohttp.BasicAuth(self.jira_username, self.jira_api_token) + url = f"{self.jira_url}/rest/api/2/search" + + try: + async with aiohttp.ClientSession() as session: + while start_at < total and (limit == 0 or len(tasks) < limit): + # Prepare request parameters + params = { + "jql": jql_query, + "startAt": start_at, + "maxResults": max_results, + } + + headers = {"Content-Type": "application/json"} + + async with session.get( + url, params=params, auth=auth, headers=headers + ) as response: + if response.status != 200: + error_text = await response.text() + logger.error( + f"Failed to fetch tasks from Jira. Status code: {response.status}, Response: {error_text}" + ) + break + + data = await response.json() + issues = data.get("issues", []) + total = data.get("total", 0) + + for issue in issues: + # Create task with all issue data + task_data = { + "id": issue.get("id"), + "key": issue.get("key"), + "fields": issue.get("fields", {}), + "self": issue.get("self"), + "expand": issue.get("expand", ""), + } + + task = Task(data=task_data) + tasks.append(task) + + # Check limit + if limit > 0 and len(tasks) >= limit: + break + + start_at += max_results + logger.debug(f"Issues packages reading: {len(tasks)}") + + logger.info(f"JIRA issues read: {len(tasks)}") + return tasks + + except aiohttp.ClientError as e: + logger.error(f"HTTP client error while fetching Jira tasks: {str(e)}") + raise Exception(f"Failed to connect to Jira: {str(e)}") + except json.JSONDecodeError as e: + logger.error(f"Failed to parse Jira API response: {str(e)}") + raise Exception(f"Invalid response from Jira API: {str(e)}") + except Exception as e: + logger.error(f"Unexpected error while fetching Jira tasks: {str(e)}") + raise + + async def write_tasks(self, tasklist: list[Task]) -> None: + """ + Write/update tasks to Jira. + + Args: + tasklist: List of Task objects containing task data to update + """ + headers = {"Accept": "application/json", "Content-Type": "application/json"} + auth = aiohttp.BasicAuth(self.jira_username, self.jira_api_token) + + try: + async with aiohttp.ClientSession() as session: + for task in tasklist: + task_data = task.data + task_id = task_data.get("id") or task_data.get("key") + + if not task_id: + logger.warning("Task missing ID or key, skipping update") + continue + + # Extract fields to update from task data + fields = task_data.get("fields", {}) + + if not fields: + logger.debug(f"No fields to update for task {task_id}") + continue + + # Prepare update data + update_data = {"fields": fields} + + # Make the update request + url = f"{self.jira_url}/rest/api/2/issue/{task_id}" + + async with session.put( + url, json=update_data, headers=headers, auth=auth + ) as response: + if response.status == 204: + logger.info(f"JIRA task {task_id} updated successfully.") + else: + error_text = await response.text() + logger.error( + f"JIRA failed to update task {task_id}: {response.status} - {error_text}" + ) + + except aiohttp.ClientError as e: + logger.error(f"HTTP client error while updating Jira tasks: {str(e)}") + raise Exception(f"Failed to connect to Jira: {str(e)}") + except Exception as e: + logger.error(f"Unexpected error while updating Jira tasks: {str(e)}") + raise diff --git a/modules/interfaces/interfaceTicketModel.py b/modules/interfaces/interfaceTicketModel.py new file mode 100644 index 00000000..151eabac --- /dev/null +++ b/modules/interfaces/interfaceTicketModel.py @@ -0,0 +1,26 @@ +"""Base class for ticket classes.""" + +from typing import Any, Dict +from pydantic import BaseModel, Field +from abc import ABC, abstractmethod + + +class TicketFieldAttribute(BaseModel): + field_name: str = Field(description="Human-readable field name") + field: str = Field(description="JIRA field ID/key") + + +class Task(BaseModel): + # A very flexible approach for now. Might want to be more strict in the future. + data: Dict[str, Any] = Field(default_factory=dict, description="Task data") + + +class TicketBase(ABC): + @abstractmethod + async def read_attributes(self) -> list[TicketFieldAttribute]: ... + + @abstractmethod + async def read_tasks(self) -> list[Task]: ... + + @abstractmethod + async def write_tasks(self, tasklist: list[Task]) -> None: ... diff --git a/modules/interfaces/interfaceTicketObjects.py b/modules/interfaces/interfaceTicketObjects.py new file mode 100644 index 00000000..e1ac75a8 --- /dev/null +++ b/modules/interfaces/interfaceTicketObjects.py @@ -0,0 +1,10 @@ +from dataclasses import dataclass + + +SUPPORTED_SYSTEMS = ["jira"] + + +@dataclass(slots=True) +class TicketInterface: + # TODO: user must create instance of Ticket connector + ticketConnector = None From f1f6bd210b8fc9ebbbfadec41b445ea2c063522e Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Wed, 3 Sep 2025 17:16:42 +0200 Subject: [PATCH 02/27] feat: add sharepoint; jira connections --- modules/connectors/connectorSharepoint.py | 55 +++++++++++++++++++ modules/interfaces/interfaceTicketObjects.py | 46 +++++++++++++++- modules/routes/routeJira.py | 58 ++++++++++++++++++++ requirements.txt | 1 + 4 files changed, 157 insertions(+), 3 deletions(-) create mode 100644 modules/connectors/connectorSharepoint.py create mode 100644 modules/routes/routeJira.py diff --git a/modules/connectors/connectorSharepoint.py b/modules/connectors/connectorSharepoint.py new file mode 100644 index 00000000..33e220a0 --- /dev/null +++ b/modules/connectors/connectorSharepoint.py @@ -0,0 +1,55 @@ +"""Connector for CRUD sharepoint operations.""" + +from dataclasses import dataclass +from office365.sharepoint.client_context import ClientContext + + +@dataclass +class ConnectorSharepoint: + ctx: ClientContext + + @classmethod + async def create(cls, ctx: ClientContext) -> "ConnectorSharepoint": + """Creates an instance of the Sharepoint connector. + + Params: + ctx: The ClientContext instance. + + Returns: + ConnectorSharepoint: An instance of the Sharepoint connector. + """ + return cls(ctx=ctx) + + @classmethod + def get_client_context_from_username_password( + cls, site_url: str, username: str, password: str + ) -> ClientContext: + """Creates a ClientContext instance from username and password. + + Params: + site_url: The URL of the SharePoint site. + username: The username for authentication. + password: The password for authentication. + + Returns: + ClientContext: An instance of the ClientContext. + """ + return ClientContext(site_url).with_user_credentials(username, password) + + @classmethod + def get_client_context_from_app( + cls, site_url: str, client_id: str, client_secret: str + ) -> ClientContext: + """Creates a ClientContext instance from client ID and client secret. + + Params: + site_url: The URL of the SharePoint site. + client_id: The client ID for authentication. + client_secret: The client secret for authentication. + + Returns: + ClientContext: An instance of the ClientContext. + """ + return ClientContext(site_url).with_client_credentials( + client_id=client_id, client_secret=client_secret + ) diff --git a/modules/interfaces/interfaceTicketObjects.py b/modules/interfaces/interfaceTicketObjects.py index e1ac75a8..3eb7a2bd 100644 --- a/modules/interfaces/interfaceTicketObjects.py +++ b/modules/interfaces/interfaceTicketObjects.py @@ -1,10 +1,50 @@ from dataclasses import dataclass +from shareplum import Site +from shareplum import Office365 +from shareplum.site import Version + +from modules.interfaces.interfaceTicketModel import TicketBase SUPPORTED_SYSTEMS = ["jira"] @dataclass(slots=True) -class TicketInterface: - # TODO: user must create instance of Ticket connector - ticketConnector = None +class TicketSharepointSyncInterface: + ticketConnector: TicketBase + task_sync_definition: dict + + # TODO: shareplum instance + + @classmethod + async def create( + cls, + ticket_connector: TicketBase, + ) -> "TicketSharepointSyncInterface": + instance = cls() + instance.ticketConnector = ticket_connector + return instance + + # TODO: 1. Read JIRA tickets + # TODO: 2. Transform tasks according to task_sync_definition (get_task_object) l. 79ff + + # TODO: 3. Create export file: Save transformed tasks to a timestamped export file in sharepoint + # - maybe not needed? + + # TODO: 4. Backup current main sync file + + # TODO: 5. Compare JIRA data (export file) with current main sync file and update line by line + # - update GET only + # - important so that we don't overwrite the changes from SELISE in the main sync file + + # TODO: 6. Take PUT changes from the main sync file and write it back to JIRA. + + # TODO: Write file to sharepoint folder + # TODO: Remove file from sharepoint folder + # TODO: Rename file in sharepoint folder + + # Next steps: + # - Complete connectorSharepoint CRUD-ish + # - pytest sharepoint connector + # - pytest JIRA connector + # - connect logic here... diff --git a/modules/routes/routeJira.py b/modules/routes/routeJira.py new file mode 100644 index 00000000..0a1b8195 --- /dev/null +++ b/modules/routes/routeJira.py @@ -0,0 +1,58 @@ +# Configure logger +import logging +from fastapi import APIRouter + +from modules.connectors.connectorTicketJira import ConnectorTicketJira + + +logger = logging.getLogger(__name__) + +router = APIRouter( + prefix="/api/users", + tags=["Manage Users"], +) + + +@router.post("/sync/delta-group") +async def sync_jira(): + logger.info("Syncing Jira issues...") + # Implement synchronization logic here + + jira_username = None + jira_api_token = None + sharepoint_client_id = None + sharepoint_client_secret = None + jira_url = "https://deltasecurity.atlassian.net" + project_code = "DCS" + issue_type = "Task" + task_sync_definition = { + # key=excel-header, [get:jira>excel | put: excel>jira, jira-xml-field-list] + "ID": ["get", ["key"]], + "Module Category": ["get", ["fields", "customfield_10058", "value"]], + "Summary": ["get", ["fields", "summary"]], + "Description": ["get", ["fields", "description"]], + "References": ["get", ["fields", "customfield_10066"]], + "Priority": ["get", ["fields", "priority", "name"]], + "Issue Status": ["get", ["fields", "customfield_10062"]], + "Assignee": ["get", ["fields", "assignee", "displayName"]], + "Issue Created": ["get", ["fields", "created"]], + "Due Date": ["get", ["fields", "duedate"]], + "DELTA Comments": ["get", ["fields", "customfield_10060"]], + "SELISE Ticket References": ["put", ["fields", "customfield_10067"]], + "SELISE Status Values": ["put", ["fields", "customfield_10065"]], + "SELISE Comments": ["put", ["fields", "customfield_10064"]], + } + + # Create the jira connector instance + jira_connector = ConnectorTicketJira( + jira_username=jira_username, + jira_api_token=jira_api_token, + jira_url=jira_url, + project_code=project_code, + issue_type=issue_type, + ) + + # Read the JIRA tickets + jira_attributes = await jira_connector.read_tasks(limit=0) + + return {"message": "Jira issues synchronized successfully"} diff --git a/requirements.txt b/requirements.txt index 783db728..75f2d078 100644 --- a/requirements.txt +++ b/requirements.txt @@ -43,6 +43,7 @@ chardet>=5.0.0 # Für Zeichensatzerkennung bei Webinhalten aiohttp>=3.8.0 # Required for SharePoint operations (async HTTP) selenium>=4.15.0 # Required for web automation and JavaScript-heavy pages tavily-python==0.7.11 # Tavily SDK +Office365-REST-Python-Client==2.6.2 # Easy Sharepoint integration ## Image Processing Pillow>=10.0.0 # Für Bildverarbeitung (als PIL importiert) From 1a7ca4fa13a445ff26ae5e78a9f696011c04cfb1 Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Fri, 5 Sep 2025 11:58:25 +0200 Subject: [PATCH 03/27] feat: finish routejira implementation (untested) --- modules/connectors/connectorSharepoint.py | 126 +++++ modules/connectors/connectorTicketJira.py | 21 +- modules/interfaces/interfaceTicketModel.py | 2 +- modules/interfaces/interfaceTicketObjects.py | 551 ++++++++++++++++++- modules/routes/routeJira.py | 65 ++- 5 files changed, 712 insertions(+), 53 deletions(-) diff --git a/modules/connectors/connectorSharepoint.py b/modules/connectors/connectorSharepoint.py index 33e220a0..0b1c8370 100644 --- a/modules/connectors/connectorSharepoint.py +++ b/modules/connectors/connectorSharepoint.py @@ -1,7 +1,13 @@ """Connector for CRUD sharepoint operations.""" +import asyncio +from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass +from datetime import datetime +from io import BytesIO +from typing import Optional from office365.sharepoint.client_context import ClientContext +from office365.sharepoint.files.file import File @dataclass @@ -53,3 +59,123 @@ class ConnectorSharepoint: return ClientContext(site_url).with_client_credentials( client_id=client_id, client_secret=client_secret ) + + def copy_file( + self, *, source_folder: str, source_file: str, dest_folder: str, dest_file: str + ) -> bool: + """Copy a file from one SharePoint location to another. + + Params: + source_folder: Source folder path (server-relative) + source_file: Source file name + dest_folder: Destination folder path (server-relative) + dest_file: Destination file name + + Returns: + bool: True if successful, False otherwise + """ + source_path = f"{source_folder.rstrip('/')}/{source_file}" + dest_path = f"{dest_folder.rstrip('/')}/{dest_file}" + + source_file_obj = self.ctx.web.get_file_by_server_relative_url(source_path) + source_file_obj.copyto(dest_path).execute_query() + return True + + async def copy_file_async( + self, *, source_folder: str, source_file: str, dest_folder: str, dest_file: str + ) -> bool: + """Copy a file from one SharePoint location to another (async version). + + Params: + source_folder: Source folder path (server-relative) + source_file: Source file name + dest_folder: Destination folder path (server-relative) + dest_file: Destination file name + + Returns: + bool: True if successful, False otherwise + """ + loop = asyncio.get_event_loop() + with ThreadPoolExecutor() as executor: + return await loop.run_in_executor( + executor, + lambda: self.copy_file( + source_folder=source_folder, + source_file=source_file, + dest_folder=dest_folder, + dest_file=dest_file, + ), + ) + + def read_file(self, *, folder_path: str, file_name: str) -> bytes: + """Read a file from SharePoint and return its content as bytes. + + Params: + folder_path: Folder path (server-relative) + file_name: File name + + Returns: + bytes: File content as bytes + """ + file_path = f"{folder_path.rstrip('/')}/{file_name}" + response = File.open_binary(self.ctx, file_path) + return response.content + + async def read_file_async(self, *, folder_path: str, file_name: str) -> bytes: + """Read a file from SharePoint and return its content as bytes (async version). + + Params: + folder_path: Folder path (server-relative) + file_name: File name + + Returns: + bytes: File content as bytes + """ + loop = asyncio.get_event_loop() + with ThreadPoolExecutor() as executor: + return await loop.run_in_executor( + executor, + lambda: self.read_file(folder_path=folder_path, file_name=file_name), + ) + + def overwrite_file( + self, *, folder_path: str, file_name: str, content: bytes + ) -> bool: + """Write content to a SharePoint file, overwriting if it exists. + + Params: + folder_path: Target folder path (server-relative) + file_name: Target file name + content: File content as bytes + + Returns: + bool: True if successful, False otherwise + """ + target_folder = self.ctx.web.get_folder_by_server_relative_url(folder_path) + buffer = BytesIO(content) + target_folder.files.upload(buffer, file_name).execute_query() + return True + + async def overwrite_file_async( + self, *, folder_path: str, file_name: str, content: bytes + ) -> bool: + """Write content to a SharePoint file, overwriting if it exists (async version). + + Params: + folder_path: Target folder path (server-relative) + file_name: Target file name + content: File content as bytes + + Returns: + bool: True if successful, False otherwise + """ + loop = asyncio.get_event_loop() + with ThreadPoolExecutor() as executor: + return await loop.run_in_executor( + executor, + lambda: self.overwrite_file( + folder_path=folder_path, + file_name=file_name, + content=content, + ), + ) diff --git a/modules/connectors/connectorTicketJira.py b/modules/connectors/connectorTicketJira.py index f2edd200..93020f2c 100644 --- a/modules/connectors/connectorTicketJira.py +++ b/modules/connectors/connectorTicketJira.py @@ -4,7 +4,6 @@ from dataclasses import dataclass import logging import aiohttp import json -from typing import Optional from modules.interfaces.interfaceTicketModel import ( TicketBase, @@ -156,16 +155,9 @@ class ConnectorTicketJira(TicketBase): total = data.get("total", 0) for issue in issues: - # Create task with all issue data - task_data = { - "id": issue.get("id"), - "key": issue.get("key"), - "fields": issue.get("fields", {}), - "self": issue.get("self"), - "expand": issue.get("expand", ""), - } - - task = Task(data=task_data) + # Store the raw JIRA issue data directly + # This matches what the reference implementation expects + task = Task(data=issue) tasks.append(task) # Check limit @@ -202,13 +194,18 @@ class ConnectorTicketJira(TicketBase): async with aiohttp.ClientSession() as session: for task in tasklist: task_data = task.data - task_id = task_data.get("id") or task_data.get("key") + task_id = ( + task_data.get("ID") + or task_data.get("id") + or task_data.get("key") + ) if not task_id: logger.warning("Task missing ID or key, skipping update") continue # Extract fields to update from task data + # The task data should contain the field updates in a "fields" key fields = task_data.get("fields", {}) if not fields: diff --git a/modules/interfaces/interfaceTicketModel.py b/modules/interfaces/interfaceTicketModel.py index 151eabac..98329a7b 100644 --- a/modules/interfaces/interfaceTicketModel.py +++ b/modules/interfaces/interfaceTicketModel.py @@ -20,7 +20,7 @@ class TicketBase(ABC): async def read_attributes(self) -> list[TicketFieldAttribute]: ... @abstractmethod - async def read_tasks(self) -> list[Task]: ... + async def read_tasks(self, limit: int = 0) -> list[Task]: ... @abstractmethod async def write_tasks(self, tasklist: list[Task]) -> None: ... diff --git a/modules/interfaces/interfaceTicketObjects.py b/modules/interfaces/interfaceTicketObjects.py index 3eb7a2bd..cebb043f 100644 --- a/modules/interfaces/interfaceTicketObjects.py +++ b/modules/interfaces/interfaceTicketObjects.py @@ -1,50 +1,541 @@ from dataclasses import dataclass -from shareplum import Site -from shareplum import Office365 -from shareplum.site import Version +from io import BytesIO +from typing import Any +import pandas as pd +from modules.shared.timezoneUtils import get_utc_now + +from modules.connectors.connectorSharepoint import ConnectorSharepoint from modules.interfaces.interfaceTicketModel import TicketBase - - -SUPPORTED_SYSTEMS = ["jira"] +from modules.interfaces.interfaceTicketModel import Task @dataclass(slots=True) class TicketSharepointSyncInterface: - ticketConnector: TicketBase + connector_ticket: TicketBase + connector_sharepoint: ConnectorSharepoint task_sync_definition: dict - - # TODO: shareplum instance + sync_folder: str + sync_file: str + backup_folder: str + audit_folder: str @classmethod async def create( cls, - ticket_connector: TicketBase, + connector_ticket: TicketBase, + connector_sharepoint: ConnectorSharepoint, + task_sync_definition: dict, + sync_folder: str, + sync_file: str, + backup_folder: str, + audit_folder: str, ) -> "TicketSharepointSyncInterface": - instance = cls() - instance.ticketConnector = ticket_connector - return instance + return cls( + connector_ticket=connector_ticket, + connector_sharepoint=connector_sharepoint, + task_sync_definition=task_sync_definition, + sync_folder=sync_folder, + sync_file=sync_file, + backup_folder=backup_folder, + audit_folder=audit_folder, + ) - # TODO: 1. Read JIRA tickets - # TODO: 2. Transform tasks according to task_sync_definition (get_task_object) l. 79ff + async def create_backup(self): + """Creates a backup of the current sync file in the backup folder.""" + timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S") + backup_filename = f"backup_{timestamp}_{self.sync_file}" - # TODO: 3. Create export file: Save transformed tasks to a timestamped export file in sharepoint - # - maybe not needed? + await self.connector_sharepoint.copy_file_async( + source_folder=self.sync_folder, + source_file=self.sync_file, + dest_folder=self.backup_folder, + dest_file=backup_filename, + ) - # TODO: 4. Backup current main sync file + async def sync_from_jira_to_csv(self): + """Syncs tasks from JIRA to a CSV file in SharePoint.""" + start_time = get_utc_now() + audit_log = [] - # TODO: 5. Compare JIRA data (export file) with current main sync file and update line by line - # - update GET only - # - important so that we don't overwrite the changes from SELISE in the main sync file + audit_log.append("=== JIRA TO CSV SYNC STARTED ===") + audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}") + audit_log.append(f"Sync File: {self.sync_file}") + audit_log.append(f"Sync Folder: {self.sync_folder}") + audit_log.append("") - # TODO: 6. Take PUT changes from the main sync file and write it back to JIRA. + try: + # 1. Read JIRA tickets + audit_log.append("Step 1: Reading JIRA tickets...") + tickets = await self.connector_ticket.read_tasks(limit=0) + audit_log.append(f"JIRA issues read: {len(tickets)}") + audit_log.append("") - # TODO: Write file to sharepoint folder - # TODO: Remove file from sharepoint folder - # TODO: Rename file in sharepoint folder + # 2. Transform tasks according to task_sync_definition + audit_log.append("Step 2: Transforming JIRA data...") + transformed_tasks = self._transform_tasks(tickets) + jira_data = [task.data for task in transformed_tasks] + audit_log.append(f"JIRA issues transformed: {len(jira_data)}") + audit_log.append("") - # Next steps: - # - Complete connectorSharepoint CRUD-ish - # - pytest sharepoint connector - # - pytest JIRA connector - # - connect logic here... + # 3. Create JIRA export file in audit folder + audit_log.append("Step 3: Creating JIRA export file...") + try: + timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S") + jira_export_filename = f"jira_export_{timestamp}.csv" + jira_export_content = self._create_csv_content(jira_data) + await self.connector_sharepoint.overwrite_file_async( + folder_path=self.audit_folder, + file_name=jira_export_filename, + content=jira_export_content, + ) + audit_log.append(f"JIRA export file created: {jira_export_filename}") + except Exception as e: + audit_log.append(f"Failed to create JIRA export file: {str(e)}") + audit_log.append("") + + # 4. Create backup of existing sync file (if it exists) + audit_log.append("Step 4: Creating backup...") + backup_created = False + try: + await self.create_backup() + backup_created = True + audit_log.append("Backup created successfully") + except Exception as e: + audit_log.append( + f"Backup creation failed (file might not exist): {str(e)}" + ) + audit_log.append("") + + # 5. Try to read existing CSV file from SharePoint + audit_log.append("Step 5: Reading existing CSV file...") + existing_data = [] + existing_file_found = False + try: + csv_content = await self.connector_sharepoint.read_file_async( + folder_path=self.sync_folder, file_name=self.sync_file + ) + df_existing = pd.read_csv( + BytesIO(csv_content), skiprows=2 + ) # Skip header rows + existing_data = df_existing.to_dict("records") + existing_file_found = True + audit_log.append( + f"Existing CSV file found with {len(existing_data)} records" + ) + except Exception as e: + audit_log.append(f"No existing CSV file found or read error: {str(e)}") + audit_log.append("") + + # 6. Merge JIRA data with existing data and track changes + audit_log.append("Step 6: Merging JIRA data with existing data...") + merged_data, change_details = self._merge_jira_with_existing_detailed( + jira_data, existing_data + ) + + # Log detailed changes + audit_log.append(f"Total records after merge: {len(merged_data)}") + audit_log.append(f"Records updated: {change_details['updated']}") + audit_log.append(f"Records added: {change_details['added']}") + audit_log.append(f"Records unchanged: {change_details['unchanged']}") + audit_log.append("") + + # Log individual changes + if change_details["changes"]: + audit_log.append("DETAILED CHANGES:") + for change in change_details["changes"]: + audit_log.append(f"- {change}") + audit_log.append("") + + # 7. Create CSV with 4-row structure and write to SharePoint + audit_log.append("Step 7: Writing updated CSV to SharePoint...") + csv_content = self._create_csv_content(merged_data) + await self.connector_sharepoint.overwrite_file_async( + folder_path=self.sync_folder, + file_name=self.sync_file, + content=csv_content, + ) + audit_log.append("CSV file successfully written to SharePoint") + audit_log.append("") + + # Success summary + end_time = get_utc_now() + duration = (end_time - start_time).total_seconds() + audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===") + audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") + audit_log.append(f"Duration: {duration:.2f} seconds") + audit_log.append(f"Total JIRA issues processed: {len(jira_data)}") + audit_log.append(f"Total records in final CSV: {len(merged_data)}") + + except Exception as e: + # Error handling + end_time = get_utc_now() + duration = (end_time - start_time).total_seconds() + audit_log.append("") + audit_log.append("=== SYNC FAILED ===") + audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") + audit_log.append(f"Duration before failure: {duration:.2f} seconds") + audit_log.append(f"Error: {str(e)}") + raise + finally: + # Write audit log to SharePoint + await self._write_audit_log(audit_log, "jira_to_csv") + + async def sync_from_csv_to_jira(self): + """Syncs tasks from a CSV file in SharePoint to JIRA.""" + start_time = get_utc_now() + audit_log = [] + + audit_log.append("=== CSV TO JIRA SYNC STARTED ===") + audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}") + audit_log.append(f"Sync File: {self.sync_file}") + audit_log.append(f"Sync Folder: {self.sync_folder}") + audit_log.append("") + + try: + # 1. Read CSV file from SharePoint + audit_log.append("Step 1: Reading CSV file from SharePoint...") + try: + csv_content = await self.connector_sharepoint.read_file_async( + folder_path=self.sync_folder, file_name=self.sync_file + ) + df = pd.read_csv(BytesIO(csv_content), skiprows=2) # Skip header rows + csv_data = df.to_dict("records") + audit_log.append( + f"CSV file read successfully with {len(csv_data)} records" + ) + except Exception as e: + audit_log.append(f"Failed to read CSV file: {str(e)}") + audit_log.append("CSV to JIRA sync aborted - no file to process") + return + audit_log.append("") + + # 2. Read current JIRA data for comparison + audit_log.append("Step 2: Reading current JIRA data for comparison...") + try: + current_jira_tasks = await self.connector_ticket.read_tasks(limit=0) + current_jira_data = self._transform_tasks(current_jira_tasks) + jira_lookup = { + task.data.get("ID"): task.data for task in current_jira_data + } + audit_log.append(f"Current JIRA data read: {len(jira_lookup)} tasks") + except Exception as e: + audit_log.append(f"Failed to read current JIRA data: {str(e)}") + raise + audit_log.append("") + + # 3. Detect actual changes in "put" fields + audit_log.append("Step 3: Detecting changes in 'put' fields...") + actual_changes = {} + records_with_changes = 0 + total_changes = 0 + + for row in csv_data: + task_id = row.get("ID") + if not task_id or task_id not in jira_lookup: + continue + + current_jira_task = jira_lookup[task_id] + task_changes = {} + + for field_name, field_config in self.task_sync_definition.items(): + if field_config[0] == "put": # Only process "put" fields + csv_value = row.get(field_name, "") + jira_value = current_jira_task.get(field_name, "") + + # Convert None to empty string for comparison + csv_value = "" if csv_value is None else str(csv_value).strip() + jira_value = ( + "" if jira_value is None else str(jira_value).strip() + ) + + # Only include if values are different and CSV has non-empty value + if csv_value != jira_value and csv_value: + task_changes[field_name] = csv_value + + if task_changes: + actual_changes[task_id] = task_changes + records_with_changes += 1 + total_changes += len(task_changes) + + audit_log.append(f"Records with actual changes: {records_with_changes}") + audit_log.append(f"Total field changes detected: {total_changes}") + audit_log.append("") + + # Log detailed changes + if actual_changes: + audit_log.append("DETAILED CHANGES TO APPLY TO JIRA:") + for task_id, changes in actual_changes.items(): + change_list = [ + f"{field}: '{value}'" for field, value in changes.items() + ] + audit_log.append(f"- Task ID {task_id}: {', '.join(change_list)}") + audit_log.append("") + + # 4. Update JIRA tasks with actual changes + if actual_changes: + audit_log.append("Step 4: Updating JIRA tasks...") + + # Convert to Task objects for the connector + tasks_to_update = [] + for task_id, changes in actual_changes.items(): + # Create task data structure expected by JIRA connector + # Build the nested fields structure that JIRA expects + fields = {} + for field_name, new_value in changes.items(): + # Map back to JIRA field structure using task_sync_definition + field_config = self.task_sync_definition[field_name] + field_path = field_config[1] + + # Extract the JIRA field ID from the path + # For "put" fields, the path is like ['fields', 'customfield_10067'] + if len(field_path) >= 2 and field_path[0] == "fields": + jira_field_id = field_path[1] + fields[jira_field_id] = new_value + + if fields: + task_data = {"ID": task_id, "fields": fields} + task = Task(data=task_data) + tasks_to_update.append(task) + + # Write tasks back to JIRA + try: + await self.connector_ticket.write_tasks(tasks_to_update) + audit_log.append( + f"Successfully updated {len(tasks_to_update)} JIRA tasks" + ) + except Exception as e: + audit_log.append(f"Failed to update JIRA tasks: {str(e)}") + raise + else: + audit_log.append("Step 4: No changes to apply to JIRA") + audit_log.append("") + + # Success summary + end_time = get_utc_now() + duration = (end_time - start_time).total_seconds() + audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===") + audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") + audit_log.append(f"Duration: {duration:.2f} seconds") + audit_log.append(f"Total CSV records processed: {len(csv_data)}") + audit_log.append(f"Records with actual changes: {records_with_changes}") + audit_log.append(f"JIRA tasks updated: {len(actual_changes)}") + + except Exception as e: + # Error handling + end_time = get_utc_now() + duration = (end_time - start_time).total_seconds() + audit_log.append("") + audit_log.append("=== SYNC FAILED ===") + audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") + audit_log.append(f"Duration before failure: {duration:.2f} seconds") + audit_log.append(f"Error: {str(e)}") + raise + finally: + # Write audit log to SharePoint + await self._write_audit_log(audit_log, "csv_to_jira") + + def _transform_tasks(self, tasks: list[Task]) -> list[Task]: + """Transforms tasks according to the task_sync_definition.""" + transformed_tasks = [] + + for task in tasks: + transformed_data = {} + + # Process each field in the sync definition + for field_name, field_config in self.task_sync_definition.items(): + direction = field_config[0] # "get" or "put" + field_path = field_config[1] # List of keys to navigate + + # Only process "get" fields (JIRA → CSV) + if direction == "get": + # Extract value using the field path + value = self._extract_field_value(task.data, field_path) + transformed_data[field_name] = value + + # Create new Task with transformed data + transformed_task = Task(data=transformed_data) + transformed_tasks.append(transformed_task) + + return transformed_tasks + + def _extract_field_value(self, issue_data: dict, field_path: list[str]) -> Any: + """Extract field value from JIRA issue data using field path.""" + value = issue_data + try: + for key in field_path: + if value is not None: + value = value[key] + + if value is None: + return None + + # Handle complex objects that have a 'value' field (like custom field options) + if isinstance(value, dict) and "value" in value: + value = value["value"] + # Handle lists of objects with 'value' fields + elif ( + isinstance(value, list) + and len(value) > 0 + and isinstance(value[0], dict) + and "value" in value[0] + ): + value = value[0]["value"] + + return value + except (KeyError, TypeError): + return None + + def _merge_jira_with_existing( + self, jira_data: list[dict], existing_data: list[dict] + ) -> list[dict]: + """Merge JIRA data with existing CSV data, updating only 'get' fields.""" + # Create a lookup for existing data by ID + existing_lookup = {row.get("ID"): row for row in existing_data if row.get("ID")} + + merged_data = [] + for jira_row in jira_data: + jira_id = jira_row.get("ID") + if jira_id and jira_id in existing_lookup: + # Update existing row with JIRA data (only 'get' fields) + existing_row = existing_lookup[jira_id].copy() + for field_name, field_config in self.task_sync_definition.items(): + if field_config[0] == "get": # Only update 'get' fields + existing_row[field_name] = jira_row.get(field_name) + merged_data.append(existing_row) + # Remove from lookup to track processed items + del existing_lookup[jira_id] + else: + # New row from JIRA + merged_data.append(jira_row) + + # Add any remaining existing rows that weren't in JIRA data + merged_data.extend(existing_lookup.values()) + + return merged_data + + def _merge_jira_with_existing_detailed( + self, jira_data: list[dict], existing_data: list[dict] + ) -> tuple[list[dict], dict]: + """Merge JIRA data with existing CSV data and track detailed changes.""" + # Create a lookup for existing data by ID + existing_lookup = {row.get("ID"): row for row in existing_data if row.get("ID")} + + merged_data = [] + changes = [] + updated_count = 0 + added_count = 0 + unchanged_count = 0 + + for jira_row in jira_data: + jira_id = jira_row.get("ID") + if jira_id and jira_id in existing_lookup: + # Update existing row with JIRA data (only 'get' fields) + existing_row = existing_lookup[jira_id].copy() + row_changes = [] + + for field_name, field_config in self.task_sync_definition.items(): + if field_config[0] == "get": # Only update 'get' fields + old_value = existing_row.get(field_name, "") + new_value = jira_row.get(field_name, "") + + # Convert None to empty string for comparison + old_value = "" if old_value is None else str(old_value) + new_value = "" if new_value is None else str(new_value) + + if old_value != new_value: + row_changes.append( + f"{field_name}: '{old_value}' → '{new_value}'" + ) + + existing_row[field_name] = jira_row.get(field_name) + + merged_data.append(existing_row) + + if row_changes: + updated_count += 1 + changes.append( + f"Row ID {jira_id} updated: {', '.join(row_changes)}" + ) + else: + unchanged_count += 1 + + # Remove from lookup to track processed items + del existing_lookup[jira_id] + else: + # New row from JIRA + merged_data.append(jira_row) + added_count += 1 + changes.append(f"Row ID {jira_id} added as new record") + + # Add any remaining existing rows that weren't in JIRA data + for remaining_row in existing_lookup.values(): + merged_data.append(remaining_row) + unchanged_count += 1 + + change_details = { + "updated": updated_count, + "added": added_count, + "unchanged": unchanged_count, + "changes": changes, + } + + return merged_data, change_details + + async def _write_audit_log(self, audit_log: list[str], operation_type: str): + """Write audit log to SharePoint.""" + try: + timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S") + audit_filename = f"audit_{operation_type}_{timestamp}.log" + + # Convert audit log to bytes + audit_content = "\n".join(audit_log).encode("utf-8") + + # Write to SharePoint + await self.connector_sharepoint.overwrite_file_async( + folder_path=self.audit_folder, + file_name=audit_filename, + content=audit_content, + ) + except Exception as e: + # If audit logging fails, we don't want to break the main sync process + # Just log the error (this could be enhanced with fallback logging) + print(f"Failed to write audit log: {str(e)}") + + def _create_csv_content(self, data: list[dict]) -> bytes: + """Create CSV content with 4-row structure matching reference code.""" + if not data: + return b"" + + # Create DataFrame from data + df = pd.DataFrame(data) + + # Force all columns to be object (string) type to preserve empty cells + for column in df.columns: + df[column] = df[column].astype("object") + df[column] = df[column].fillna("") + + # Create the 4-row structure + # Row 1: Static header row 1 + header_row1 = pd.DataFrame( + [["Header 1"] + [""] * (len(df.columns) - 1)], columns=df.columns + ) + + # Row 2: Static header row 2 with timestamp + timestamp = get_utc_now().strftime("%Y-%m-%d %H:%M:%S") + header_row2 = pd.DataFrame( + [[f"{timestamp}"] + [""] * (len(df.columns) - 1)], columns=df.columns + ) + + # Row 3: Table headers (column names) + table_headers = pd.DataFrame([df.columns.tolist()], columns=df.columns) + + # Concatenate all rows: header1 + header2 + table_headers + data + final_df = pd.concat( + [header_row1, header_row2, table_headers, df], ignore_index=True + ) + + # Convert to CSV bytes + csv_buffer = BytesIO() + final_df.to_csv(csv_buffer, index=False, header=False) + return csv_buffer.getvalue() diff --git a/modules/routes/routeJira.py b/modules/routes/routeJira.py index 0a1b8195..66564e55 100644 --- a/modules/routes/routeJira.py +++ b/modules/routes/routeJira.py @@ -3,28 +3,41 @@ import logging from fastapi import APIRouter from modules.connectors.connectorTicketJira import ConnectorTicketJira - +from modules.connectors.connectorSharepoint import ConnectorSharepoint +from modules.interfaces.interfaceTicketObjects import TicketSharepointSyncInterface logger = logging.getLogger(__name__) router = APIRouter( - prefix="/api/users", - tags=["Manage Users"], + prefix="/api/jira", + tags=["JIRA Sync"], ) @router.post("/sync/delta-group") async def sync_jira(): logger.info("Syncing Jira issues...") - # Implement synchronization logic here - jira_username = None - jira_api_token = None + # Sharepoint connection parameters sharepoint_client_id = None sharepoint_client_secret = None + sharepoint_site_url = None + + # Jira connection parameters + jira_username = None + jira_api_token = None jira_url = "https://deltasecurity.atlassian.net" project_code = "DCS" issue_type = "Task" + + # Basic validation (credentials will be added later) + if not all([sharepoint_client_id, sharepoint_client_secret, sharepoint_site_url]): + logger.warning("SharePoint credentials not configured - sync will fail") + + if not all([jira_username, jira_api_token]): + logger.warning("JIRA credentials not configured - sync will fail") + + # Define the task sync definition task_sync_definition = { # key=excel-header, [get:jira>excel | put: excel>jira, jira-xml-field-list] "ID": ["get", ["key"]], @@ -43,8 +56,14 @@ async def sync_jira(): "SELISE Comments": ["put", ["fields", "customfield_10064"]], } + # SharePoint file configuration + sync_folder = "Shared Documents/TicketSync" + sync_file = "delta_group_selise_ticket_exchange_list.csv" + backup_folder = "Shared Documents/TicketSync/Backups" + audit_folder = "Shared Documents/TicketSync/AuditLogs" + # Create the jira connector instance - jira_connector = ConnectorTicketJira( + jira_connector = await ConnectorTicketJira.create( jira_username=jira_username, jira_api_token=jira_api_token, jira_url=jira_url, @@ -52,7 +71,33 @@ async def sync_jira(): issue_type=issue_type, ) - # Read the JIRA tickets - jira_attributes = await jira_connector.read_tasks(limit=0) + # Create the sharepoint connector instance + ctx = ConnectorSharepoint.get_client_context_from_app( + site_url=sharepoint_site_url, + client_id=sharepoint_client_id, + client_secret=sharepoint_client_secret, + ) + sharepoint_connector = await ConnectorSharepoint.create(ctx=ctx) - return {"message": "Jira issues synchronized successfully"} + # Create the sync interface instance + sync_interface = await TicketSharepointSyncInterface.create( + connector_ticket=jira_connector, + connector_sharepoint=sharepoint_connector, + task_sync_definition=task_sync_definition, + sync_folder=sync_folder, + sync_file=sync_file, + backup_folder=backup_folder, + audit_folder=audit_folder, + ) + + # Create a backup of the current sync file + await sync_interface.create_backup() + + # Sync from JIRA to CSV in Sharepoint + await sync_interface.sync_from_jira_to_csv() + + # Sync from CSV in Sharepoint to JIRA + await sync_interface.sync_from_csv_to_jira() + + # Return a response + return {"status": "Sync completed"} From fe459731cdb31077b6295271d0a9137843a1de01 Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Fri, 5 Sep 2025 14:23:18 +0200 Subject: [PATCH 04/27] fix: include put for csv -> jira --- modules/interfaces/interfaceTicketObjects.py | 12 ++++++++---- modules/routes/routeJira.py | 6 +++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/modules/interfaces/interfaceTicketObjects.py b/modules/interfaces/interfaceTicketObjects.py index cebb043f..d7bed987 100644 --- a/modules/interfaces/interfaceTicketObjects.py +++ b/modules/interfaces/interfaceTicketObjects.py @@ -214,7 +214,9 @@ class TicketSharepointSyncInterface: audit_log.append("Step 2: Reading current JIRA data for comparison...") try: current_jira_tasks = await self.connector_ticket.read_tasks(limit=0) - current_jira_data = self._transform_tasks(current_jira_tasks) + current_jira_data = self._transform_tasks( + current_jira_tasks, include_put=True + ) jira_lookup = { task.data.get("ID"): task.data for task in current_jira_data } @@ -335,7 +337,9 @@ class TicketSharepointSyncInterface: # Write audit log to SharePoint await self._write_audit_log(audit_log, "csv_to_jira") - def _transform_tasks(self, tasks: list[Task]) -> list[Task]: + def _transform_tasks( + self, tasks: list[Task], include_put: bool = False + ) -> list[Task]: """Transforms tasks according to the task_sync_definition.""" transformed_tasks = [] @@ -347,8 +351,8 @@ class TicketSharepointSyncInterface: direction = field_config[0] # "get" or "put" field_path = field_config[1] # List of keys to navigate - # Only process "get" fields (JIRA → CSV) - if direction == "get": + # Get the right fields + if direction == "get" or include_put: # Extract value using the field path value = self._extract_field_value(task.data, field_path) transformed_data[field_name] = value diff --git a/modules/routes/routeJira.py b/modules/routes/routeJira.py index 66564e55..f812da7e 100644 --- a/modules/routes/routeJira.py +++ b/modules/routes/routeJira.py @@ -57,10 +57,10 @@ async def sync_jira(): } # SharePoint file configuration - sync_folder = "Shared Documents/TicketSync" + sync_folder = "/sites//Shared Documents/TicketSync" sync_file = "delta_group_selise_ticket_exchange_list.csv" - backup_folder = "Shared Documents/TicketSync/Backups" - audit_folder = "Shared Documents/TicketSync/AuditLogs" + backup_folder = "/sites//Shared Documents/TicketSync/Backups" + audit_folder = "/sites//Shared Documents/TicketSync/AuditLogs" # Create the jira connector instance jira_connector = await ConnectorTicketJira.create( From 9837bc1a19ed429605d5347bbeae050a75624189 Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Fri, 5 Sep 2025 14:41:05 +0200 Subject: [PATCH 05/27] fix: remove redundant backup --- modules/routes/routeJira.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/modules/routes/routeJira.py b/modules/routes/routeJira.py index f812da7e..847010ec 100644 --- a/modules/routes/routeJira.py +++ b/modules/routes/routeJira.py @@ -90,9 +90,6 @@ async def sync_jira(): audit_folder=audit_folder, ) - # Create a backup of the current sync file - await sync_interface.create_backup() - # Sync from JIRA to CSV in Sharepoint await sync_interface.sync_from_jira_to_csv() From e1618c9ffb6d84b32afa3fa3b6bb4455d8ad6425 Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Fri, 5 Sep 2025 14:48:23 +0200 Subject: [PATCH 06/27] fix: row 2 formatting --- modules/interfaces/interfaceTicketObjects.py | 5 ++--- modules/routes/routeJira.py | 17 ++++++++++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/modules/interfaces/interfaceTicketObjects.py b/modules/interfaces/interfaceTicketObjects.py index d7bed987..d55daad3 100644 --- a/modules/interfaces/interfaceTicketObjects.py +++ b/modules/interfaces/interfaceTicketObjects.py @@ -525,10 +525,9 @@ class TicketSharepointSyncInterface: [["Header 1"] + [""] * (len(df.columns) - 1)], columns=df.columns ) - # Row 2: Static header row 2 with timestamp - timestamp = get_utc_now().strftime("%Y-%m-%d %H:%M:%S") + # Row 2: Static header row 2 with strict compatibility header_row2 = pd.DataFrame( - [[f"{timestamp}"] + [""] * (len(df.columns) - 1)], columns=df.columns + [["Header 2"] + [""] * (len(df.columns) - 1)], columns=df.columns ) # Row 3: Table headers (column names) diff --git a/modules/routes/routeJira.py b/modules/routes/routeJira.py index 847010ec..31bcbe73 100644 --- a/modules/routes/routeJira.py +++ b/modules/routes/routeJira.py @@ -15,8 +15,18 @@ router = APIRouter( @router.post("/sync/delta-group") -async def sync_jira(): - logger.info("Syncing Jira issues...") +async def sync_jira_delta_group(): + """Endpoint to trigger JIRA-SharePoint sync for Delta Group project.""" + + logger.info("Received request to sync JIRA Delta Group project") + await perform_sync_jira_delta_group() + + # Return a response + return {"status": "Sync completed"} + + +async def perform_sync_jira_delta_group(): + logger.info("Syncing Jira issues for Delta Group...") # Sharepoint connection parameters sharepoint_client_id = None @@ -95,6 +105,3 @@ async def sync_jira(): # Sync from CSV in Sharepoint to JIRA await sync_interface.sync_from_csv_to_jira() - - # Return a response - return {"status": "Sync completed"} From 949a3c97aee16ee5762474b06d611252661d33c8 Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Fri, 5 Sep 2025 14:57:13 +0200 Subject: [PATCH 07/27] fix: fail fast; include put in jira -> CSV --- modules/interfaces/interfaceTicketObjects.py | 2 +- modules/routes/routeJira.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/interfaces/interfaceTicketObjects.py b/modules/interfaces/interfaceTicketObjects.py index d55daad3..94cf8427 100644 --- a/modules/interfaces/interfaceTicketObjects.py +++ b/modules/interfaces/interfaceTicketObjects.py @@ -73,7 +73,7 @@ class TicketSharepointSyncInterface: # 2. Transform tasks according to task_sync_definition audit_log.append("Step 2: Transforming JIRA data...") - transformed_tasks = self._transform_tasks(tickets) + transformed_tasks = self._transform_tasks(tickets, include_put=False) jira_data = [task.data for task in transformed_tasks] audit_log.append(f"JIRA issues transformed: {len(jira_data)}") audit_log.append("") diff --git a/modules/routes/routeJira.py b/modules/routes/routeJira.py index 31bcbe73..e49c3815 100644 --- a/modules/routes/routeJira.py +++ b/modules/routes/routeJira.py @@ -42,10 +42,10 @@ async def perform_sync_jira_delta_group(): # Basic validation (credentials will be added later) if not all([sharepoint_client_id, sharepoint_client_secret, sharepoint_site_url]): - logger.warning("SharePoint credentials not configured - sync will fail") + raise ValueError("SharePoint credentials not configured") if not all([jira_username, jira_api_token]): - logger.warning("JIRA credentials not configured - sync will fail") + raise ValueError("JIRA credentials not configured") # Define the task sync definition task_sync_definition = { From b4481dc92fcc55bfb74282a95679f42328a5543d Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Fri, 5 Sep 2025 15:05:20 +0200 Subject: [PATCH 08/27] fix: populate put columns on first run --- modules/interfaces/interfaceTicketObjects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/interfaces/interfaceTicketObjects.py b/modules/interfaces/interfaceTicketObjects.py index 94cf8427..4ae4051e 100644 --- a/modules/interfaces/interfaceTicketObjects.py +++ b/modules/interfaces/interfaceTicketObjects.py @@ -73,7 +73,7 @@ class TicketSharepointSyncInterface: # 2. Transform tasks according to task_sync_definition audit_log.append("Step 2: Transforming JIRA data...") - transformed_tasks = self._transform_tasks(tickets, include_put=False) + transformed_tasks = self._transform_tasks(tickets, include_put=True) jira_data = [task.data for task in transformed_tasks] audit_log.append(f"JIRA issues transformed: {len(jira_data)}") audit_log.append("") From 4b9b80563236e295c860ea649e451cde6eb85dc9 Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Fri, 5 Sep 2025 15:26:33 +0200 Subject: [PATCH 09/27] fix: minor fixes --- modules/connectors/connectorSharepoint.py | 3 +-- modules/interfaces/interfaceTicketObjects.py | 14 +++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/modules/connectors/connectorSharepoint.py b/modules/connectors/connectorSharepoint.py index 0b1c8370..b5eaa703 100644 --- a/modules/connectors/connectorSharepoint.py +++ b/modules/connectors/connectorSharepoint.py @@ -152,8 +152,7 @@ class ConnectorSharepoint: bool: True if successful, False otherwise """ target_folder = self.ctx.web.get_folder_by_server_relative_url(folder_path) - buffer = BytesIO(content) - target_folder.files.upload(buffer, file_name).execute_query() + target_folder.upload_file(file_name, content).execute_query() return True async def overwrite_file_async( diff --git a/modules/interfaces/interfaceTicketObjects.py b/modules/interfaces/interfaceTicketObjects.py index 4ae4051e..d65e3457 100644 --- a/modules/interfaces/interfaceTicketObjects.py +++ b/modules/interfaces/interfaceTicketObjects.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from io import BytesIO +from io import BytesIO, StringIO from typing import Any import pandas as pd from modules.shared.timezoneUtils import get_utc_now @@ -251,8 +251,8 @@ class TicketSharepointSyncInterface: "" if jira_value is None else str(jira_value).strip() ) - # Only include if values are different and CSV has non-empty value - if csv_value != jira_value and csv_value: + # Include if values are different (allow empty strings to clear fields like the reference does) + if csv_value != jira_value: task_changes[field_name] = csv_value if task_changes: @@ -538,7 +538,7 @@ class TicketSharepointSyncInterface: [header_row1, header_row2, table_headers, df], ignore_index=True ) - # Convert to CSV bytes - csv_buffer = BytesIO() - final_df.to_csv(csv_buffer, index=False, header=False) - return csv_buffer.getvalue() + # Convert to CSV bytes (write text, then encode) + csv_text = StringIO() + final_df.to_csv(csv_text, index=False, header=False) + return csv_text.getvalue().encode("utf-8") From e02b250a5123de2285639b2c8d720dfb75df0483 Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Fri, 5 Sep 2025 20:51:10 +0200 Subject: [PATCH 10/27] feat: connect router; add hourly scheduling --- app.py | 3 +++ modules/routes/routeJira.py | 36 +++++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/app.py b/app.py index 4740357b..7622d20e 100644 --- a/app.py +++ b/app.py @@ -211,3 +211,6 @@ app.include_router(msftRouter) from modules.routes.routeSecurityGoogle import router as googleRouter app.include_router(googleRouter) + +from modules.routes.routeJira import router as jiraRouter +app.include_router(jiraRouter) \ No newline at end of file diff --git a/modules/routes/routeJira.py b/modules/routes/routeJira.py index e49c3815..7874b181 100644 --- a/modules/routes/routeJira.py +++ b/modules/routes/routeJira.py @@ -1,16 +1,50 @@ # Configure logger import logging -from fastapi import APIRouter +from fastapi import APIRouter, FastAPI +from contextlib import asynccontextmanager +from zoneinfo import ZoneInfo + from modules.connectors.connectorTicketJira import ConnectorTicketJira from modules.connectors.connectorSharepoint import ConnectorSharepoint from modules.interfaces.interfaceTicketObjects import TicketSharepointSyncInterface +from apscheduler.schedulers.asyncio import AsyncIOScheduler +from apscheduler.triggers.cron import CronTrigger + + logger = logging.getLogger(__name__) + +scheduler = AsyncIOScheduler(timezone=ZoneInfo("Europe/Zurich")) + + +@asynccontextmanager +async def router_lifespan(app: FastAPI): + # start scheduler when this router is mounted + scheduler.add_job( + perform_sync_jira_delta_group, + CronTrigger(minute="0"), # run at the top of every hour + id="jira_delta_group_sync", + replace_existing=True, + coalesce=True, + max_instances=1, + misfire_grace_time=1800, + ) + scheduler.start() + logger.info("APScheduler started (jira_delta_group_sync hourly)") + try: + yield + finally: + if scheduler.running: + scheduler.shutdown(wait=False) + logger.info("APScheduler stopped") + + router = APIRouter( prefix="/api/jira", tags=["JIRA Sync"], + lifespan=router_lifespan, ) From 24f2e7718b2d3851526bcd71f0bfd8b9bd857731 Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Fri, 5 Sep 2025 21:06:19 +0200 Subject: [PATCH 11/27] chore: updated requirements --- requirements.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/requirements.txt b/requirements.txt index 75f2d078..385a652e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -74,6 +74,9 @@ chardet>=4.0.0 # For encoding detection pytest>=8.0.0 pytest-asyncio>=0.21.0 +## For Scheduling / Repeated Tasks +APScheduler==3.11.0 + ## Missing Dependencies for IPython and other tools decorator>=5.0.0 jedi>=0.16 From 2f0f87ea8cccbb7e2f475c0a5d7ab7701e1d74f1 Mon Sep 17 00:00:00 2001 From: Christopher Gondek Date: Fri, 5 Sep 2025 21:20:00 +0200 Subject: [PATCH 12/27] feat: harden against empty jira --- modules/interfaces/interfaceTicketObjects.py | 23 +++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/modules/interfaces/interfaceTicketObjects.py b/modules/interfaces/interfaceTicketObjects.py index d65e3457..3df6464f 100644 --- a/modules/interfaces/interfaceTicketObjects.py +++ b/modules/interfaces/interfaceTicketObjects.py @@ -509,7 +509,28 @@ class TicketSharepointSyncInterface: def _create_csv_content(self, data: list[dict]) -> bytes: """Create CSV content with 4-row structure matching reference code.""" if not data: - return b"" + # Build an empty table with the expected columns from schema + cols = list(self.task_sync_definition.keys()) + + df = pd.DataFrame(columns=cols) + + # Row 1 & 2: keep your current banner lines + header_row1 = pd.DataFrame( + [["Header 1"] + [""] * (len(cols) - 1)], columns=cols + ) + header_row2 = pd.DataFrame( + [["Header 2"] + [""] * (len(cols) - 1)], columns=cols + ) + + # Row 3: table headers + table_headers = pd.DataFrame([cols], columns=cols) + + final_df = pd.concat( + [header_row1, header_row2, table_headers, df], ignore_index=True + ) + csv_text = StringIO() + final_df.to_csv(csv_text, index=False, header=False) + return csv_text.getvalue().encode("utf-8") # Create DataFrame from data df = pd.DataFrame(data) From 98a4323b363d4ebae1d4f698321ed75825d032b2 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Fri, 5 Sep 2025 23:35:01 +0200 Subject: [PATCH 13/27] database user session separation --- modules/connectors/connectorDbJson.py | 160 +++++++----- modules/connectors/connectorPool.py | 178 +++++++++++++ modules/interfaces/interfaceAppObjects.py | 19 +- modules/interfaces/interfaceChatObjects.py | 14 +- .../interfaces/interfaceComponentObjects.py | 14 +- notes/changelog.txt | 4 +- test_concurrency_fixes.py | 237 ++++++++++++++++++ 7 files changed, 555 insertions(+), 71 deletions(-) create mode 100644 modules/connectors/connectorPool.py create mode 100644 test_concurrency_fixes.py diff --git a/modules/connectors/connectorDbJson.py b/modules/connectors/connectorDbJson.py index 5ecb88dd..56111ad3 100644 --- a/modules/connectors/connectorDbJson.py +++ b/modules/connectors/connectorDbJson.py @@ -131,6 +131,11 @@ class DatabaseConnector: return lock + def _get_table_lock(self, table: str, timeout_seconds: int = 30): + """Get table-level lock for metadata operations""" + table_lock_key = f"table_{table}" + return self._get_file_lock(table_lock_key, timeout_seconds) + def _ensureTableDirectory(self, table: str) -> bool: """Ensures the table directory exists.""" if table == self._systemTableName: @@ -145,7 +150,9 @@ class DatabaseConnector: return False def _loadTableMetadata(self, table: str) -> Dict[str, Any]: - """Loads table metadata (list of record IDs) without loading actual records.""" + """Loads table metadata (list of record IDs) without loading actual records. + NOTE: This method is safe to call without additional locking. + """ if table in self._tableMetadataCache: return self._tableMetadataCache[table] @@ -159,7 +166,7 @@ class DatabaseConnector: try: if os.path.exists(tablePath): for fileName in os.listdir(tablePath): - if fileName.endswith('.json'): + if fileName.endswith('.json') and fileName != '_metadata.json': recordId = fileName[:-5] # Remove .json extension metadata["recordIds"].append(recordId) @@ -183,17 +190,23 @@ class DatabaseConnector: return None def _saveRecord(self, table: str, recordId: str, record: Dict[str, Any]) -> bool: - """Saves a single record to the table.""" + """Saves a single record to the table with atomic metadata operations.""" recordPath = self._getRecordPath(table, recordId) - lock = self._get_file_lock(recordPath) + record_lock = self._get_file_lock(recordPath) + table_lock = self._get_table_lock(table) try: - # Acquire lock with timeout - if not lock.acquire(timeout=30): # 30 second timeout - raise TimeoutError(f"Could not acquire lock for {recordPath} within 30 seconds") + # Acquire both locks with timeout - record lock first, then table lock + if not record_lock.acquire(timeout=30): + raise TimeoutError(f"Could not acquire record lock for {recordPath} within 30 seconds") + + if not table_lock.acquire(timeout=30): + record_lock.release() + raise TimeoutError(f"Could not acquire table lock for {table} within 30 seconds") # Record lock acquisition time self._lock_timeouts[recordPath] = time.time() + self._lock_timeouts[f"table_{table}"] = time.time() # Ensure table directory exists if not self._ensureTableDirectory(table): @@ -239,14 +252,14 @@ class DatabaseConnector: # Atomic move from temp to final location os.replace(tempPath, recordPath) - # Update metadata + # ATOMIC: Update metadata while holding both locks metadata = self._loadTableMetadata(table) if recordId not in metadata["recordIds"]: metadata["recordIds"].append(recordId) metadata["recordIds"].sort() self._saveTableMetadata(table, metadata) - # Update cache if it exists + # Update cache if it exists (also protected by table lock) if table in self._tablesCache: # Find and update existing record or append new one found = False @@ -272,14 +285,22 @@ class DatabaseConnector: return False finally: - # ALWAYS release lock, even on error + # ALWAYS release both locks, even on error try: - if lock.locked(): - lock.release() + if table_lock.locked(): + table_lock.release() + if f"table_{table}" in self._lock_timeouts: + del self._lock_timeouts[f"table_{table}"] + except Exception as release_error: + logger.error(f"Error releasing table lock for {table}: {release_error}") + + try: + if record_lock.locked(): + record_lock.release() if recordPath in self._lock_timeouts: del self._lock_timeouts[recordPath] except Exception as release_error: - logger.error(f"Error releasing lock for {recordPath}: {release_error}") + logger.error(f"Error releasing record lock for {recordPath}: {release_error}") def _loadTable(self, table: str) -> List[Dict[str, Any]]: """Loads all records from a table folder.""" @@ -403,40 +424,21 @@ class DatabaseConnector: def _saveTableMetadata(self, table: str, metadata: Dict[str, Any]) -> bool: - """Saves table metadata to a metadata file.""" + """Saves table metadata to a metadata file. + NOTE: This method assumes the caller already holds the table lock. + """ try: # Create metadata file path metadataPath = os.path.join(self._getTablePath(table), "_metadata.json") - # Get lock for metadata file - lock = self._get_file_lock(metadataPath) + # Save metadata (caller should already hold table lock) + with open(metadataPath, 'w', encoding='utf-8') as f: + json.dump(metadata, f, indent=2, ensure_ascii=False) - try: - # Acquire lock with timeout - if not lock.acquire(timeout=30): - raise TimeoutError(f"Could not acquire lock for metadata {metadataPath} within 30 seconds") - - # Record lock acquisition time - self._lock_timeouts[metadataPath] = time.time() - - # Save metadata - with open(metadataPath, 'w', encoding='utf-8') as f: - json.dump(metadata, f, indent=2, ensure_ascii=False) - - # Update cache - self._tableMetadataCache[table] = metadata - - return True - - finally: - # ALWAYS release lock - try: - if lock.locked(): - lock.release() - if metadataPath in self._lock_timeouts: - del self._lock_timeouts[metadataPath] - except Exception as release_error: - logger.error(f"Error releasing metadata lock for {metadataPath}: {release_error}") + # Update cache + self._tableMetadataCache[table] = metadata + + return True except Exception as e: logger.error(f"Error saving metadata for table {table}: {e}") @@ -582,39 +584,73 @@ class DatabaseConnector: return existingRecord def recordDelete(self, table: str, recordId: str) -> bool: - """Deletes a record from the table.""" - # Load metadata - metadata = self._loadTableMetadata(table) - - if recordId not in metadata["recordIds"]: - return False - - # Check if it's an initial record - initialId = self.getInitialId(table) - if initialId is not None and initialId == recordId: - self._removeInitialId(table) - logger.info(f"Initial ID {recordId} for table {table} has been removed from the system table") - - # Delete the record file + """Deletes a record from the table with atomic metadata operations.""" recordPath = self._getRecordPath(table, recordId) + record_lock = self._get_file_lock(recordPath) + table_lock = self._get_table_lock(table) + try: + # Acquire both locks with timeout - record lock first, then table lock + if not record_lock.acquire(timeout=30): + raise TimeoutError(f"Could not acquire record lock for {recordPath} within 30 seconds") + + if not table_lock.acquire(timeout=30): + record_lock.release() + raise TimeoutError(f"Could not acquire table lock for {table} within 30 seconds") + + # Record lock acquisition time + self._lock_timeouts[recordPath] = time.time() + self._lock_timeouts[f"table_{table}"] = time.time() + + # Load metadata + metadata = self._loadTableMetadata(table) + + if recordId not in metadata["recordIds"]: + return False + + # Check if it's an initial record + initialId = self.getInitialId(table) + if initialId is not None and initialId == recordId: + self._removeInitialId(table) + logger.info(f"Initial ID {recordId} for table {table} has been removed from the system table") + + # Delete the record file if os.path.exists(recordPath): os.remove(recordPath) - # Update metadata cache + # ATOMIC: Update metadata while holding both locks metadata["recordIds"].remove(recordId) - self._tableMetadataCache[table] = metadata + self._saveTableMetadata(table, metadata) - # Update table cache if it exists + # Update table cache if it exists (also protected by table lock) if table in self._tablesCache: self._tablesCache[table] = [r for r in self._tablesCache[table] if r.get("id") != recordId] return True + else: + return False + except Exception as e: - logger.error(f"Error deleting record file {recordPath}: {e}") + logger.error(f"Error deleting record {recordId} from table {table}: {e}") return False - - return False + + finally: + # ALWAYS release both locks, even on error + try: + if table_lock.locked(): + table_lock.release() + if f"table_{table}" in self._lock_timeouts: + del self._lock_timeouts[f"table_{table}"] + except Exception as release_error: + logger.error(f"Error releasing table lock for {table}: {release_error}") + + try: + if record_lock.locked(): + record_lock.release() + if recordPath in self._lock_timeouts: + del self._lock_timeouts[recordPath] + except Exception as release_error: + logger.error(f"Error releasing record lock for {recordPath}: {release_error}") def getInitialId(self, table: str) -> Optional[str]: """Returns the initial ID for a table.""" diff --git a/modules/connectors/connectorPool.py b/modules/connectors/connectorPool.py new file mode 100644 index 00000000..3137c468 --- /dev/null +++ b/modules/connectors/connectorPool.py @@ -0,0 +1,178 @@ +import threading +import queue +import time +import logging +from typing import Optional, Dict, Any +from .connectorDbJson import DatabaseConnector + +logger = logging.getLogger(__name__) + +class DatabaseConnectorPool: + """ + A connection pool for DatabaseConnector instances to manage resources efficiently + and ensure proper isolation between users. + """ + + def __init__(self, max_connections: int = 100, max_idle_time: int = 300): + """ + Initialize the connection pool. + + Args: + max_connections: Maximum number of connections in the pool + max_idle_time: Maximum idle time in seconds before connection is considered stale + """ + self.max_connections = max_connections + self.max_idle_time = max_idle_time + self._pool = queue.Queue(maxsize=max_connections) + self._created_connections = 0 + self._lock = threading.Lock() + self._connection_times = {} # Track when connections were created + + def _create_connector(self, dbHost: str, dbDatabase: str, dbUser: str = None, + dbPassword: str = None, userId: str = None) -> DatabaseConnector: + """Create a new DatabaseConnector instance.""" + with self._lock: + if self._created_connections >= self.max_connections: + raise RuntimeError(f"Maximum connections ({self.max_connections}) exceeded") + + self._created_connections += 1 + logger.debug(f"Creating new database connector (total: {self._created_connections})") + + connector = DatabaseConnector( + dbHost=dbHost, + dbDatabase=dbDatabase, + dbUser=dbUser, + dbPassword=dbPassword, + userId=userId + ) + + # Track creation time + connector_id = id(connector) + self._connection_times[connector_id] = time.time() + + return connector + + def get_connector(self, dbHost: str, dbDatabase: str, dbUser: str = None, + dbPassword: str = None, userId: str = None) -> DatabaseConnector: + """ + Get a database connector from the pool or create a new one. + + Args: + dbHost: Database host path + dbDatabase: Database name + dbUser: Database user (optional) + dbPassword: Database password (optional) + userId: User ID for context (optional) + + Returns: + DatabaseConnector instance + """ + try: + # Try to get an existing connector from the pool + connector = self._pool.get_nowait() + + # Check if connector is stale + connector_id = id(connector) + if connector_id in self._connection_times: + idle_time = time.time() - self._connection_times[connector_id] + if idle_time > self.max_idle_time: + logger.debug(f"Connector {connector_id} is stale (idle: {idle_time}s), creating new one") + # Remove stale connector from tracking + if connector_id in self._connection_times: + del self._connection_times[connector_id] + # Create new connector + return self._create_connector(dbHost, dbDatabase, dbUser, dbPassword, userId) + + # Update user context if provided + if userId is not None: + connector.updateContext(userId) + + logger.debug(f"Reusing existing connector {connector_id}") + return connector + + except queue.Empty: + # Pool is empty, create new connector + return self._create_connector(dbHost, dbDatabase, dbUser, dbPassword, userId) + + def return_connector(self, connector: DatabaseConnector) -> None: + """ + Return a connector to the pool for reuse. + + Args: + connector: DatabaseConnector instance to return + """ + try: + # Update connection time + connector_id = id(connector) + self._connection_times[connector_id] = time.time() + + # Try to return to pool + self._pool.put_nowait(connector) + logger.debug(f"Returned connector {connector_id} to pool") + + except queue.Full: + # Pool is full, discard connector + logger.debug(f"Pool full, discarding connector {id(connector)}") + with self._lock: + self._created_connections -= 1 + if id(connector) in self._connection_times: + del self._connection_times[id(connector)] + + def cleanup_stale_connections(self) -> int: + """ + Clean up stale connections from the pool. + + Returns: + Number of connections cleaned up + """ + cleaned = 0 + current_time = time.time() + + # Check all tracked connections + stale_connectors = [] + for connector_id, creation_time in list(self._connection_times.items()): + if current_time - creation_time > self.max_idle_time: + stale_connectors.append(connector_id) + + # Remove stale connections from tracking + for connector_id in stale_connectors: + if connector_id in self._connection_times: + del self._connection_times[connector_id] + cleaned += 1 + + logger.debug(f"Cleaned up {cleaned} stale connections") + return cleaned + + def get_stats(self) -> Dict[str, Any]: + """Get pool statistics.""" + with self._lock: + return { + "max_connections": self.max_connections, + "created_connections": self._created_connections, + "available_connections": self._pool.qsize(), + "tracked_connections": len(self._connection_times) + } + +# Global pool instance +_connector_pool = None +_pool_lock = threading.Lock() + +def get_connector_pool() -> DatabaseConnectorPool: + """Get the global connector pool instance.""" + global _connector_pool + if _connector_pool is None: + with _pool_lock: + if _connector_pool is None: + _connector_pool = DatabaseConnectorPool() + return _connector_pool + +def get_connector(dbHost: str, dbDatabase: str, dbUser: str = None, + dbPassword: str = None, userId: str = None) -> DatabaseConnector: + """Get a database connector from the global pool.""" + pool = get_connector_pool() + return pool.get_connector(dbHost, dbDatabase, dbUser, dbPassword, userId) + +def return_connector(connector: DatabaseConnector) -> None: + """Return a database connector to the global pool.""" + pool = get_connector_pool() + pool.return_connector(connector) diff --git a/modules/interfaces/interfaceAppObjects.py b/modules/interfaces/interfaceAppObjects.py index e9683158..25183fe2 100644 --- a/modules/interfaces/interfaceAppObjects.py +++ b/modules/interfaces/interfaceAppObjects.py @@ -13,6 +13,7 @@ from passlib.context import CryptContext import uuid from modules.connectors.connectorDbJson import DatabaseConnector +from modules.connectors.connectorPool import get_connector, return_connector from modules.shared.configuration import APP_CONFIG from modules.shared.timezoneUtils import get_utc_now, get_utc_timestamp from modules.interfaces.interfaceAppAccess import AppAccess @@ -79,8 +80,16 @@ class AppObjects: # Update database context self.db.updateContext(self.userId) + def __del__(self): + """Cleanup method to return connector to pool.""" + if hasattr(self, 'db') and self.db is not None: + try: + return_connector(self.db) + except Exception as e: + logger.error(f"Error returning connector to pool: {e}") + def _initializeDatabase(self): - """Initializes the database connection.""" + """Initializes the database connection using connection pool.""" try: # Get configuration values with defaults dbHost = APP_CONFIG.get("DB_APP_HOST", "_no_config_default_data") @@ -91,14 +100,16 @@ class AppObjects: # Ensure the database directory exists os.makedirs(dbHost, exist_ok=True) - self.db = DatabaseConnector( + # Get connector from pool with user context + self.db = get_connector( dbHost=dbHost, dbDatabase=dbDatabase, dbUser=dbUser, - dbPassword=dbPassword + dbPassword=dbPassword, + userId=self.userId ) - logger.info("Database initialized successfully") + logger.info(f"Database initialized successfully for user {self.userId}") except Exception as e: logger.error(f"Failed to initialize database: {str(e)}") raise diff --git a/modules/interfaces/interfaceChatObjects.py b/modules/interfaces/interfaceChatObjects.py index 95ebb3a5..239e76bd 100644 --- a/modules/interfaces/interfaceChatObjects.py +++ b/modules/interfaces/interfaceChatObjects.py @@ -19,6 +19,7 @@ from modules.interfaces.interfaceAppModel import User # DYNAMIC PART: Connectors to the Interface from modules.connectors.connectorDbJson import DatabaseConnector +from modules.connectors.connectorPool import get_connector, return_connector from modules.shared.timezoneUtils import get_utc_timestamp # Basic Configurations @@ -72,6 +73,14 @@ class ChatObjects: # Update database context self.db.updateContext(self.userId) + + def __del__(self): + """Cleanup method to return connector to pool.""" + if hasattr(self, 'db') and self.db is not None: + try: + return_connector(self.db) + except Exception as e: + logger.error(f"Error returning connector to pool: {e}") logger.debug(f"User context set: userId={self.userId}, mandateId={self.mandateId}") @@ -87,11 +96,12 @@ class ChatObjects: # Ensure the database directory exists os.makedirs(dbHost, exist_ok=True) - self.db = DatabaseConnector( + self.db = get_connector( dbHost=dbHost, dbDatabase=dbDatabase, dbUser=dbUser, - dbPassword=dbPassword + dbPassword=dbPassword, + userId=self.userId ) logger.info("Database initialized successfully") diff --git a/modules/interfaces/interfaceComponentObjects.py b/modules/interfaces/interfaceComponentObjects.py index 59b10ddf..36058cc7 100644 --- a/modules/interfaces/interfaceComponentObjects.py +++ b/modules/interfaces/interfaceComponentObjects.py @@ -18,6 +18,7 @@ from modules.interfaces.interfaceAppModel import User # DYNAMIC PART: Connectors to the Interface from modules.connectors.connectorDbJson import DatabaseConnector +from modules.connectors.connectorPool import get_connector, return_connector # Basic Configurations from modules.shared.configuration import APP_CONFIG @@ -87,6 +88,14 @@ class ComponentObjects: # Update database context self.db.updateContext(self.userId) + + def __del__(self): + """Cleanup method to return connector to pool.""" + if hasattr(self, 'db') and self.db is not None: + try: + return_connector(self.db) + except Exception as e: + logger.error(f"Error returning connector to pool: {e}") logger.debug(f"User context set: userId={self.userId}") @@ -102,11 +111,12 @@ class ComponentObjects: # Ensure the database directory exists os.makedirs(dbHost, exist_ok=True) - self.db = DatabaseConnector( + self.db = get_connector( dbHost=dbHost, dbDatabase=dbDatabase, dbUser=dbUser, - dbPassword=dbPassword + dbPassword=dbPassword, + userId=self.userId if hasattr(self, 'userId') else None ) logger.info("Database initialized successfully") diff --git a/notes/changelog.txt b/notes/changelog.txt index affd7a3d..60af5270 100644 --- a/notes/changelog.txt +++ b/notes/changelog.txt @@ -2,7 +2,9 @@ TODO # System -- sharepoint to fix +- database +- db initialization as separate function to create root mandate, then sysadmin with hashed passwords --> using the connector according to env configuration +- config page for: db reset - document handling centralized - ai handling centralized - neutralizer to activate AND put back placeholders to the returned data diff --git a/test_concurrency_fixes.py b/test_concurrency_fixes.py new file mode 100644 index 00000000..4613b999 --- /dev/null +++ b/test_concurrency_fixes.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python3 +""" +Test script to verify concurrency improvements in DatabaseConnector. +This script simulates multiple users accessing the database simultaneously. +""" + +import os +import sys +import time +import threading +import logging +from concurrent.futures import ThreadPoolExecutor, as_completed +import tempfile +import shutil + +# Add the gateway directory to the path +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from modules.connectors.connectorDbJson import DatabaseConnector +from modules.connectors.connectorPool import get_connector, return_connector + +# Configure logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +def test_concurrent_record_operations(): + """Test concurrent record creation, modification, and deletion.""" + + # Create temporary database directory + temp_dir = tempfile.mkdtemp() + db_host = temp_dir + db_database = "test_db" + + try: + logger.info("Starting concurrency test...") + + def user_operation(user_id: int, operation_count: int = 10): + """Simulate a user performing database operations.""" + try: + # Get a dedicated connector for this user + db = get_connector( + dbHost=db_host, + dbDatabase=db_database, + userId=f"user_{user_id}" + ) + + results = [] + + for i in range(operation_count): + # Create a record + record = { + "id": f"user_{user_id}_record_{i}", + "data": f"User {user_id} data {i}", + "timestamp": time.time() + } + + # Create record + created = db.recordCreate("test_table", record) + results.append(f"Created: {created['id']}") + + # Modify record + record["data"] = f"Modified by user {user_id} - {i}" + modified = db.recordModify("test_table", record["id"], record) + results.append(f"Modified: {modified['id']}") + + # Small delay to increase chance of race conditions + time.sleep(0.001) + + # Return connector to pool + return_connector(db) + + return results + + except Exception as e: + logger.error(f"User {user_id} error: {e}") + return [f"Error: {e}"] + + # Test with multiple concurrent users + num_users = 20 + operations_per_user = 5 + + logger.info(f"Testing with {num_users} users, {operations_per_user} operations each") + + start_time = time.time() + + with ThreadPoolExecutor(max_workers=num_users) as executor: + # Submit all user operations + futures = [ + executor.submit(user_operation, user_id, operations_per_user) + for user_id in range(num_users) + ] + + # Collect results + all_results = [] + for future in as_completed(futures): + try: + result = future.result() + all_results.extend(result) + except Exception as e: + logger.error(f"Future error: {e}") + + end_time = time.time() + + # Verify data integrity + db = get_connector(dbHost=db_host, dbDatabase=db_database, userId="verifier") + + # Check that all records exist and are consistent + all_records = db.getRecordset("test_table") + expected_count = num_users * operations_per_user + + logger.info(f"Expected records: {expected_count}") + logger.info(f"Actual records: {len(all_records)}") + logger.info(f"Test completed in {end_time - start_time:.2f} seconds") + + # Check for data consistency + record_ids = set(record["id"] for record in all_records) + expected_ids = set(f"user_{user_id}_record_{i}" for user_id in range(num_users) for i in range(operations_per_user)) + + missing_ids = expected_ids - record_ids + extra_ids = record_ids - expected_ids + + if missing_ids: + logger.error(f"Missing records: {missing_ids}") + if extra_ids: + logger.error(f"Extra records: {extra_ids}") + + # Check for data corruption (records with wrong user data) + corrupted_records = [] + for record in all_records: + record_id = record["id"] + user_id = int(record_id.split("_")[1]) + if f"Modified by user {user_id}" not in record.get("data", ""): + corrupted_records.append(record_id) + + if corrupted_records: + logger.error(f"Corrupted records: {corrupted_records}") + + success = len(missing_ids) == 0 and len(extra_ids) == 0 and len(corrupted_records) == 0 + + if success: + logger.info("✅ Concurrency test PASSED - No data corruption detected") + else: + logger.error("❌ Concurrency test FAILED - Data corruption detected") + + return success + + finally: + # Cleanup + try: + shutil.rmtree(temp_dir) + logger.info("Cleaned up temporary directory") + except Exception as e: + logger.error(f"Error cleaning up: {e}") + +def test_metadata_consistency(): + """Test that metadata operations are atomic.""" + + temp_dir = tempfile.mkdtemp() + db_host = temp_dir + db_database = "test_metadata" + + try: + logger.info("Testing metadata consistency...") + + def concurrent_metadata_operations(user_id: int): + """Perform concurrent metadata operations.""" + db = get_connector( + dbHost=db_host, + dbDatabase=db_database, + userId=f"user_{user_id}" + ) + + try: + # Create multiple records rapidly + for i in range(10): + record = { + "id": f"user_{user_id}_meta_{i}", + "data": f"Metadata test {user_id}-{i}" + } + db.recordCreate("metadata_test", record) + time.sleep(0.001) # Small delay + + return True + except Exception as e: + logger.error(f"Metadata test error for user {user_id}: {e}") + return False + finally: + return_connector(db) + + # Run concurrent metadata operations + with ThreadPoolExecutor(max_workers=10) as executor: + futures = [executor.submit(concurrent_metadata_operations, i) for i in range(10)] + results = [future.result() for future in as_completed(futures)] + + # Verify metadata consistency + db = get_connector(dbHost=db_host, dbDatabase=db_database, userId="verifier") + records = db.getRecordset("metadata_test") + + # Check that metadata is consistent + metadata = db._loadTableMetadata("metadata_test") + expected_count = len(records) + actual_count = len(metadata["recordIds"]) + + logger.info(f"Expected record count: {expected_count}") + logger.info(f"Metadata record count: {actual_count}") + + success = expected_count == actual_count + + if success: + logger.info("✅ Metadata consistency test PASSED") + else: + logger.error("❌ Metadata consistency test FAILED") + + return success + + finally: + try: + shutil.rmtree(temp_dir) + except Exception as e: + logger.error(f"Error cleaning up: {e}") + +if __name__ == "__main__": + logger.info("Starting concurrency tests...") + + # Test 1: Concurrent record operations + test1_passed = test_concurrent_record_operations() + + # Test 2: Metadata consistency + test2_passed = test_metadata_consistency() + + # Overall result + if test1_passed and test2_passed: + logger.info("🎉 All concurrency tests PASSED!") + sys.exit(0) + else: + logger.error("💥 Some concurrency tests FAILED!") + sys.exit(1) From 8fbbd35055be9bb4406e2e85a4ab306d02e2b7db Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 8 Sep 2025 12:45:03 +0200 Subject: [PATCH 14/27] database attached --- app.py | 7 +- env_dev.env | 69 ++ env_int.env | 48 +- env_prod.env | 48 +- modules/chat/documents/documentGeneration.py | 5 +- modules/chat/handling/handlingTasks.py | 79 +- modules/chat/serviceCenter.py | 6 +- modules/connectors/connectorDbJson.py | 25 +- modules/connectors/connectorDbPostgre.py | 840 ++++++++++++++++++ modules/connectors/connectorPool.py | 178 ---- modules/interfaces/interfaceAppAccess.py | 49 +- modules/interfaces/interfaceAppModel.py | 89 ++ modules/interfaces/interfaceAppObjects.py | 200 ++--- modules/interfaces/interfaceChatAccess.py | 34 +- modules/interfaces/interfaceChatModel.py | 40 +- modules/interfaces/interfaceChatObjects.py | 828 +++++++++-------- .../interfaces/interfaceComponentAccess.py | 61 +- .../interfaces/interfaceComponentObjects.py | 114 ++- modules/routes/routeDataConnections.py | 17 +- modules/routes/routeSecurityGoogle.py | 12 +- modules/routes/routeSecurityLocal.py | 6 +- modules/routes/routeSecurityMsft.py | 6 +- modules/routes/routeWorkflows.py | 12 +- modules/workflow/managerWorkflow.py | 47 +- notes/changelog.txt | 3 +- notes/releasenotes.txt | 8 - query | 1 + requirements.txt | 5 +- test_concurrency_fixes.py | 237 ----- tests/connectors/__init__.py | 0 tests/connectors/test_connector_tavily.py | 108 --- .../test_graph_search.py | 0 .../test_neutralizer}/apprun.py | 0 .../test_neutralizer}/logs/log_mapping.csv | 0 .../logs/log_replacements.csv | 0 .../test_neutralizer}/neutralizer.py | 0 .../output/neutralized_Case.md | 0 .../output/neutralized_customers.csv | 0 .../output/neutralized_cv_lara_meier.txt | 0 .../output/neutralized_employees.csv | 0 .../output/neutralized_english.txt | 0 .../output/neutralized_example.json | 0 .../output/neutralized_example.xml | 0 .../output/neutralized_french.txt | 0 .../output/neutralized_german.txt | 0 .../neutralized_geschaeftsstrategie.txt | 0 .../output/neutralized_geschäfte.csv | 0 .../output/neutralized_italian.txt | 0 .../output/neutralized_kunden.csv | 0 .../output/neutralized_mitarbeiter.csv | 0 .../output/neutralized_swiss.txt | 0 .../output/neutralized_transactions.csv | 0 .../test_neutralizer}/patterns.py | 0 .../test_neutralizer}/testdata/Case.md | 0 .../test_neutralizer}/testdata/customers.csv | 0 .../testdata/cv_lara_meier.txt | 0 .../test_neutralizer}/testdata/employees.csv | 0 .../test_neutralizer}/testdata/english.txt | 0 .../test_neutralizer}/testdata/example.json | 0 .../test_neutralizer}/testdata/example.xml | 0 .../test_neutralizer}/testdata/french.txt | 0 .../test_neutralizer}/testdata/german.txt | 0 .../testdata/geschaeftsstrategie.txt | 0 .../test_neutralizer}/testdata/geschäfte.csv | 0 .../test_neutralizer}/testdata/italian.txt | 0 .../test_neutralizer}/testdata/kunden.csv | 0 .../testdata/mitarbeiter.csv | 0 .../test_neutralizer}/testdata/swiss.txt | 0 .../testdata/transactions.csv | 0 .../test_neutralizer}/zdocu.html | 0 70 files changed, 1923 insertions(+), 1259 deletions(-) create mode 100644 env_dev.env create mode 100644 modules/connectors/connectorDbPostgre.py delete mode 100644 modules/connectors/connectorPool.py delete mode 100644 notes/releasenotes.txt create mode 100644 query delete mode 100644 test_concurrency_fixes.py delete mode 100644 tests/connectors/__init__.py delete mode 100644 tests/connectors/test_connector_tavily.py rename test_graph_search.py => tests/test_graph_search.py (100%) rename {test_neutralizer => tests/test_neutralizer}/apprun.py (100%) rename {test_neutralizer => tests/test_neutralizer}/logs/log_mapping.csv (100%) rename {test_neutralizer => tests/test_neutralizer}/logs/log_replacements.csv (100%) rename {test_neutralizer => tests/test_neutralizer}/neutralizer.py (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_Case.md (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_customers.csv (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_cv_lara_meier.txt (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_employees.csv (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_english.txt (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_example.json (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_example.xml (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_french.txt (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_german.txt (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_geschaeftsstrategie.txt (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_geschäfte.csv (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_italian.txt (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_kunden.csv (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_mitarbeiter.csv (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_swiss.txt (100%) rename {test_neutralizer => tests/test_neutralizer}/output/neutralized_transactions.csv (100%) rename {test_neutralizer => tests/test_neutralizer}/patterns.py (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/Case.md (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/customers.csv (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/cv_lara_meier.txt (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/employees.csv (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/english.txt (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/example.json (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/example.xml (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/french.txt (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/german.txt (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/geschaeftsstrategie.txt (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/geschäfte.csv (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/italian.txt (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/kunden.csv (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/mitarbeiter.csv (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/swiss.txt (100%) rename {test_neutralizer => tests/test_neutralizer}/testdata/transactions.csv (100%) rename {test_neutralizer => tests/test_neutralizer}/zdocu.html (100%) diff --git a/app.py b/app.py index 4740357b..8070919d 100644 --- a/app.py +++ b/app.py @@ -63,10 +63,11 @@ def initLogging(): class EmojiFilter(logging.Filter): def filter(self, record): if isinstance(record.msg, str): - # Remove emojis and other Unicode characters that might cause encoding issues + # Remove only emojis, preserve other Unicode characters like quotes import re - # Remove emojis and other Unicode symbols - record.msg = re.sub(r'[^\x00-\x7F]+', '[EMOJI]', record.msg) + import unicodedata + # Remove emoji characters specifically + record.msg = ''.join(char for char in record.msg if unicodedata.category(char) != 'So' or not (0x1F600 <= ord(char) <= 0x1F64F or 0x1F300 <= ord(char) <= 0x1F5FF or 0x1F680 <= ord(char) <= 0x1F6FF or 0x1F1E0 <= ord(char) <= 0x1F1FF or 0x2600 <= ord(char) <= 0x26FF or 0x2700 <= ord(char) <= 0x27BF)) return True # Configure handlers based on config diff --git a/env_dev.env b/env_dev.env new file mode 100644 index 00000000..29c59f98 --- /dev/null +++ b/env_dev.env @@ -0,0 +1,69 @@ +# Development Environment Configuration + +# System Configuration +APP_ENV_TYPE = dev +APP_ENV_LABEL = Development Instance Patrick +APP_API_URL = http://localhost:8000 + +# Database Configuration for Application +# JSON File Storage (current) +# DB_APP_HOST=D:/Temp/_powerondb +# DB_APP_DATABASE=app +# DB_APP_USER=dev_user +# DB_APP_PASSWORD_SECRET=dev_password + +# PostgreSQL Storage (new) +DB_APP_HOST=localhost +DB_APP_DATABASE=poweron_app_dev +DB_APP_USER=poweron_dev +DB_APP_PASSWORD_SECRET=dev_password +DB_APP_PORT=5432 + +# Database Configuration Chat +# JSON File Storage (current) +# DB_CHAT_HOST=D:/Temp/_powerondb +# DB_CHAT_DATABASE=chat +# DB_CHAT_USER=dev_user +# DB_CHAT_PASSWORD_SECRET=dev_password + +# PostgreSQL Storage (new) +DB_CHAT_HOST=localhost +DB_CHAT_DATABASE=poweron_chat_dev +DB_CHAT_USER=poweron_dev +DB_CHAT_PASSWORD_SECRET=dev_password +DB_CHAT_PORT=5432 + +# Database Configuration Management +# JSON File Storage (current) +# DB_MANAGEMENT_HOST=D:/Temp/_powerondb +# DB_MANAGEMENT_DATABASE=management +# DB_MANAGEMENT_USER=dev_user +# DB_MANAGEMENT_PASSWORD_SECRET=dev_password + +# PostgreSQL Storage (new) +DB_MANAGEMENT_HOST=localhost +DB_MANAGEMENT_DATABASE=poweron_management_dev +DB_MANAGEMENT_USER=poweron_dev +DB_MANAGEMENT_PASSWORD_SECRET=dev_password +DB_MANAGEMENT_PORT=5432 + +# Security Configuration +APP_JWT_SECRET_SECRET=dev_jwt_secret_token +APP_TOKEN_EXPIRY=300 + +# CORS Configuration +APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net + +# Logging configuration +APP_LOGGING_LOG_LEVEL = DEBUG +APP_LOGGING_LOG_FILE = poweron.log +APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s +APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S +APP_LOGGING_CONSOLE_ENABLED = True +APP_LOGGING_FILE_ENABLED = True +APP_LOGGING_ROTATION_SIZE = 10485760 +APP_LOGGING_BACKUP_COUNT = 5 + +# Service Redirects +Service_MSFT_REDIRECT_URI = http://localhost:8000/api/msft/auth/callback +Service_GOOGLE_REDIRECT_URI = http://localhost:8000/api/google/auth/callback \ No newline at end of file diff --git a/env_int.env b/env_int.env index 30d1afd1..18720a6a 100644 --- a/env_int.env +++ b/env_int.env @@ -6,22 +6,46 @@ APP_ENV_LABEL = Integration Instance APP_API_URL = https://gateway-int.poweron-center.net # Database Configuration Application -DB_APP_HOST=/home/_powerondb -DB_APP_DATABASE=app -DB_APP_USER=dev_user -DB_APP_PASSWORD_SECRET=dev_password +# JSON File Storage (current) +# DB_APP_HOST=/home/_powerondb +# DB_APP_DATABASE=app +# DB_APP_USER=dev_user +# DB_APP_PASSWORD_SECRET=dev_password + +# PostgreSQL Storage (new) +DB_APP_HOST=gateway-int-db.poweron-center.net +DB_APP_DATABASE=poweron_app_int +DB_APP_USER=poweron_int +DB_APP_PASSWORD_SECRET=int_password_secure +DB_APP_PORT=5432 # Database Configuration Chat -DB_CHAT_HOST=/home/_powerondb -DB_CHAT_DATABASE=chat -DB_CHAT_USER=dev_user -DB_CHAT_PASSWORD_SECRET=dev_password +# JSON File Storage (current) +# DB_CHAT_HOST=/home/_powerondb +# DB_CHAT_DATABASE=chat +# DB_CHAT_USER=dev_user +# DB_CHAT_PASSWORD_SECRET=dev_password + +# PostgreSQL Storage (new) +DB_CHAT_HOST=gateway-int-db.poweron-center.net +DB_CHAT_DATABASE=poweron_chat_int +DB_CHAT_USER=poweron_int +DB_CHAT_PASSWORD_SECRET=int_password_secure +DB_CHAT_PORT=5432 # Database Configuration Management -DB_MANAGEMENT_HOST=/home/_powerondb -DB_MANAGEMENT_DATABASE=management -DB_MANAGEMENT_USER=dev_user -DB_MANAGEMENT_PASSWORD_SECRET=dev_password +# JSON File Storage (current) +# DB_MANAGEMENT_HOST=/home/_powerondb +# DB_MANAGEMENT_DATABASE=management +# DB_MANAGEMENT_USER=dev_user +# DB_MANAGEMENT_PASSWORD_SECRET=dev_password + +# PostgreSQL Storage (new) +DB_MANAGEMENT_HOST=gateway-int-db.poweron-center.net +DB_MANAGEMENT_DATABASE=poweron_management_int +DB_MANAGEMENT_USER=poweron_int +DB_MANAGEMENT_PASSWORD_SECRET=int_password_secure +DB_MANAGEMENT_PORT=5432 # Security Configuration APP_JWT_SECRET_SECRET=dev_jwt_secret_token diff --git a/env_prod.env b/env_prod.env index 3a95b367..66123d6b 100644 --- a/env_prod.env +++ b/env_prod.env @@ -6,22 +6,46 @@ APP_ENV_LABEL = Production Instance APP_API_URL = https://gateway.poweron-center.net # Database Configuration Application -DB_APP_HOST=/home/_powerondb -DB_APP_DATABASE=app -DB_APP_USER=dev_user -DB_APP_PASSWORD_SECRET=dev_password +# JSON File Storage (current) +# DB_APP_HOST=/home/_powerondb +# DB_APP_DATABASE=app +# DB_APP_USER=dev_user +# DB_APP_PASSWORD_SECRET=dev_password + +# PostgreSQL Storage (new) +DB_APP_HOST=gateway-prod-server.postgres.database.azure.com +DB_APP_DATABASE=gateway-app +DB_APP_USER=gzxxmcrdhn +DB_APP_PASSWORD_SECRET=prod_password_very_secure.2025 +DB_APP_PORT=5432 # Database Configuration Chat -DB_CHAT_HOST=/home/_powerondb -DB_CHAT_DATABASE=chat -DB_CHAT_USER=dev_user -DB_CHAT_PASSWORD_SECRET=dev_password +# JSON File Storage (current) +# DB_CHAT_HOST=/home/_powerondb +# DB_CHAT_DATABASE=chat +# DB_CHAT_USER=gzxxmcrdhn +# DB_CHAT_PASSWORD_SECRET=dev_password + +# PostgreSQL Storage (new) +DB_CHAT_HOST=gateway-prod-server.postgres.database.azure.com +DB_CHAT_DATABASE=gateway-chat +DB_CHAT_USER=poweron_prod +DB_CHAT_PASSWORD_SECRET=prod_password_very_secure.2025 +DB_CHAT_PORT=5432 # Database Configuration Management -DB_MANAGEMENT_HOST=/home/_powerondb -DB_MANAGEMENT_DATABASE=management -DB_MANAGEMENT_USER=dev_user -DB_MANAGEMENT_PASSWORD_SECRET=dev_password +# JSON File Storage (current) +# DB_MANAGEMENT_HOST=/home/_powerondb +# DB_MANAGEMENT_DATABASE=gateway-management +# DB_MANAGEMENT_USER=gzxxmcrdhn +# DB_MANAGEMENT_PASSWORD_SECRET=dev_password + +# PostgreSQL Storage (new) +DB_MANAGEMENT_HOST=gateway-prod-server.postgres.database.azure.com +DB_MANAGEMENT_DATABASE=gateway-management +DB_MANAGEMENT_USER=poweron_prod +DB_MANAGEMENT_PASSWORD_SECRET=prod_password_very_secure.2025 +DB_MANAGEMENT_PORT=5432 # Security Configuration APP_JWT_SECRET_SECRET=dev_jwt_secret_token diff --git a/modules/chat/documents/documentGeneration.py b/modules/chat/documents/documentGeneration.py index dfe10918..a5a9ae59 100644 --- a/modules/chat/documents/documentGeneration.py +++ b/modules/chat/documents/documentGeneration.py @@ -66,7 +66,7 @@ class DocumentGenerator: logger.error(f"Error processing single document: {str(e)}") return None - def createDocumentsFromActionResult(self, action_result, action, workflow) -> List[Any]: + def createDocumentsFromActionResult(self, action_result, action, workflow, message_id=None) -> List[Any]: """ Create actual document objects from action result and store them in the system. Returns a list of created document objects with proper workflow context. @@ -103,7 +103,8 @@ class DocumentGenerator: fileName=document_name, mimeType=mime_type, content=content, - base64encoded=False + base64encoded=False, + messageId=message_id ) if document: # Set workflow context on the document if possible diff --git a/modules/chat/handling/handlingTasks.py b/modules/chat/handling/handlingTasks.py index feb7b335..20cfbe13 100644 --- a/modules/chat/handling/handlingTasks.py +++ b/modules/chat/handling/handlingTasks.py @@ -250,7 +250,7 @@ class HandlingTasks: "taskProgress": "pending" } - message = self.chatInterface.createWorkflowMessage(message_data) + message = self.chatInterface.createMessage(message_data) if message: workflow.messages.append(message) @@ -492,7 +492,7 @@ class HandlingTasks: if task_step.userMessage: task_start_message["message"] += f"\n\n💬 {task_step.userMessage}" - message = self.chatInterface.createWorkflowMessage(task_start_message) + message = self.chatInterface.createMessage(task_start_message) if message: workflow.messages.append(message) logger.info(f"Task start message created for task {task_index}") @@ -569,7 +569,7 @@ class HandlingTasks: "actionNumber": action_number }) - message = self.chatInterface.createWorkflowMessage(action_start_message) + message = self.chatInterface.createMessage(action_start_message) if message: workflow.messages.append(message) logger.info(f"Action start message created for action {action_number}") @@ -623,7 +623,7 @@ class HandlingTasks: "taskProgress": "success" } - message = self.chatInterface.createWorkflowMessage(task_completion_message) + message = self.chatInterface.createMessage(task_completion_message) if message: workflow.messages.append(message) logger.info(f"Task completion message created for task {task_index}") @@ -715,7 +715,7 @@ class HandlingTasks: "taskProgress": "retry" } - message = self.chatInterface.createWorkflowMessage(retry_message) + message = self.chatInterface.createMessage(retry_message) if message: workflow.messages.append(message) @@ -768,7 +768,7 @@ class HandlingTasks: } try: - message = self.chatInterface.createWorkflowMessage(message_data) + message = self.chatInterface.createMessage(message_data) if message: workflow.messages.append(message) logger.info(f"Created user-facing retry message for failed task: {task_step.objective}") @@ -822,7 +822,7 @@ class HandlingTasks: } try: - message = self.chatInterface.createWorkflowMessage(message_data) + message = self.chatInterface.createMessage(message_data) if message: workflow.messages.append(message) logger.info(f"Created user-facing error message for failed task: {task_step.objective}") @@ -1030,8 +1030,11 @@ class HandlingTasks: if "execParameters" not in actionData: actionData["execParameters"] = {} + # Use generic field separation based on TaskAction model + simple_fields, object_fields = self.chatInterface._separate_object_fields(TaskAction, actionData) + # Create action in database - createdAction = self.chatInterface.db.recordCreate("taskActions", actionData) + createdAction = self.chatInterface.db.recordCreate(TaskAction, simple_fields) # Convert to TaskAction model return TaskAction( @@ -1095,27 +1098,36 @@ class HandlingTasks: ) result_label = action.execResultLabel - # Process documents from the action result - created_documents = [] - if result.success: - created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow) - action.setSuccess() - # Extract result text from documents if available, otherwise use empty string - action.result = "" - if result.documents and len(result.documents) > 0: - # Try to get text content from the first document - first_doc = result.documents[0] - if isinstance(first_doc.documentData, dict): - action.result = first_doc.documentData.get("result", "") - elif isinstance(first_doc.documentData, str): - action.result = first_doc.documentData - # Preserve the action's execResultLabel for document routing - # Action methods should NOT return resultLabel - this is managed by the action handler - if not action.execResultLabel: - logger.warning(f"Action {action.execMethod}.{action.execAction} has no execResultLabel set") - # Always use the action's execResultLabel for message creation to ensure proper document routing - message_result_label = action.execResultLabel - await self.createActionMessage(action, result, workflow, message_result_label, created_documents, task_step, task_index) + # Process documents from the action result + created_documents = [] + if result.success: + action.setSuccess() + # Extract result text from documents if available, otherwise use empty string + action.result = "" + if result.documents and len(result.documents) > 0: + # Try to get text content from the first document + first_doc = result.documents[0] + if isinstance(first_doc.documentData, dict): + action.result = first_doc.documentData.get("result", "") + elif isinstance(first_doc.documentData, str): + action.result = first_doc.documentData + # Preserve the action's execResultLabel for document routing + # Action methods should NOT return resultLabel - this is managed by the action handler + if not action.execResultLabel: + logger.warning(f"Action {action.execMethod}.{action.execAction} has no execResultLabel set") + # Always use the action's execResultLabel for message creation to ensure proper document routing + message_result_label = action.execResultLabel + + # Create message first to get messageId, then create documents with messageId + message = await self.createActionMessage(action, result, workflow, message_result_label, [], task_step, task_index) + if message: + # Now create documents with the messageId + created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow, message.id) + # Update the message with the created documents + if created_documents: + message.documents = created_documents + # Update the message in the database + self.chatInterface.updateMessage(message.id, {"documents": [doc.to_dict() for doc in created_documents]}) # Log action results logger.info(f"Action completed successfully") @@ -1138,10 +1150,10 @@ class HandlingTasks: logger.error(f"Action failed: {result.error}") # ⚠️ IMPORTANT: Create error message for failed actions so user can see what went wrong - await self.createActionMessage(action, result, workflow, result_label, [], task_step, task_index) + message = await self.createActionMessage(action, result, workflow, result_label, [], task_step, task_index) # Create database log entry for action failure - self.chatInterface.createWorkflowLog({ + self.chatInterface.createLog({ "workflowId": workflow.id, "message": f"❌ **Task {task_num}**\n\n❌ **Action {action_num}/{total_actions}** failed: {result.error}", "type": "error" @@ -1237,14 +1249,17 @@ class HandlingTasks: logger.info(f"Creating ERROR message: {message_text}") logger.info(f"Message data: {message_data}") - message = self.chatInterface.createWorkflowMessage(message_data) + message = self.chatInterface.createMessage(message_data) if message: workflow.messages.append(message) logger.info(f"Message created: {action.execMethod}.{action.execAction}") + return message else: logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}") + return None except Exception as e: logger.error(f"Error creating action message: {str(e)}") + return None # --- Helper validation methods --- diff --git a/modules/chat/serviceCenter.py b/modules/chat/serviceCenter.py index cef1555b..9160a3ae 100644 --- a/modules/chat/serviceCenter.py +++ b/modules/chat/serviceCenter.py @@ -920,7 +920,7 @@ Please provide a comprehensive summary of this conversation.""" logger.error(f"Error during document access recovery for {document.id}: {str(e)}") return False - def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True) -> ChatDocument: + def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> ChatDocument: """Create document with file in one step - handles file creation internally""" # Convert content to bytes based on base64 flag if base64encoded: @@ -948,6 +948,7 @@ Please provide a comprehensive summary of this conversation.""" # Create document with all file attributes copied document = ChatDocument( id=str(uuid.uuid4()), + messageId=messageId or "", # Use provided messageId or empty string as fallback fileId=file_item.id, fileName=file_info.get("fileName", fileName), fileSize=file_info.get("size", 0), @@ -1060,7 +1061,7 @@ Please provide a comprehensive summary of this conversation.""" logger.error(f"Error executing method {methodName}.{actionName}: {str(e)}") raise - async def processFileIds(self, fileIds: List[str]) -> List[ChatDocument]: + async def processFileIds(self, fileIds: List[str], messageId: str = None) -> List[ChatDocument]: """Process file IDs from existing files and return ChatDocument objects""" documents = [] for fileId in fileIds: @@ -1071,6 +1072,7 @@ Please provide a comprehensive summary of this conversation.""" # Create document directly with all file attributes document = ChatDocument( id=str(uuid.uuid4()), + messageId=messageId or "", # Use provided messageId or empty string as fallback fileId=fileId, fileName=fileInfo.get("fileName", "unknown"), fileSize=fileInfo.get("size", 0), diff --git a/modules/connectors/connectorDbJson.py b/modules/connectors/connectorDbJson.py index 56111ad3..b0b6b586 100644 --- a/modules/connectors/connectorDbJson.py +++ b/modules/connectors/connectorDbJson.py @@ -33,9 +33,11 @@ class DatabaseConnector: # Set userId (default to empty string if None) self.userId = userId if userId is not None else "" - # Ensure the database directory exists + # Initialize database system + self.initDbSystem() + + # Set up database folder path self.dbFolder = os.path.join(self.dbHost, self.dbDatabase) - os.makedirs(self.dbFolder, exist_ok=True) # Cache for loaded data self._tablesCache: Dict[str, List[Dict[str, Any]]] = {} @@ -52,6 +54,17 @@ class DatabaseConnector: logger.debug(f"Context: userId={self.userId}") + def initDbSystem(self): + """Initialize the database system - creates necessary directories and structure.""" + try: + # Ensure the database directory exists + self.dbFolder = os.path.join(self.dbHost, self.dbDatabase) + os.makedirs(self.dbFolder, exist_ok=True) + logger.info(f"Database system initialized: {self.dbFolder}") + except Exception as e: + logger.error(f"Error initializing database system: {e}") + raise + def _initializeSystemTable(self): """Initializes the system table if it doesn't exist yet.""" systemTablePath = self._getTablePath(self._systemTableName) @@ -652,8 +665,14 @@ class DatabaseConnector: except Exception as release_error: logger.error(f"Error releasing record lock for {recordPath}: {release_error}") - def getInitialId(self, table: str) -> Optional[str]: + def getInitialId(self, table_or_model) -> Optional[str]: """Returns the initial ID for a table.""" + # Handle both string table names (legacy) and model classes (new) + if isinstance(table_or_model, str): + table = table_or_model + else: + table = table_or_model.__name__ + systemData = self._loadSystemTable() initialId = systemData.get(table) logger.debug(f"Initial ID for table '{table}': {initialId}") diff --git a/modules/connectors/connectorDbPostgre.py b/modules/connectors/connectorDbPostgre.py new file mode 100644 index 00000000..eea519e8 --- /dev/null +++ b/modules/connectors/connectorDbPostgre.py @@ -0,0 +1,840 @@ +import psycopg2 +import psycopg2.extras +import json +import os +import logging +from typing import List, Dict, Any, Optional, Union, get_origin, get_args +from datetime import datetime +import uuid +from pydantic import BaseModel +import threading +import time + +from modules.shared.attributeUtils import to_dict +from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.configuration import APP_CONFIG +from modules.interfaces.interfaceAppModel import SystemTable + +logger = logging.getLogger(__name__) + +# No mapping needed - table name = Pydantic model name exactly + +def _get_model_fields(model_class) -> Dict[str, str]: + """Get all fields from Pydantic model and map to SQL types.""" + if not hasattr(model_class, '__fields__'): + return {} + + fields = {} + for field_name, field_info in model_class.__fields__.items(): + field_type = field_info.type_ + + # Check for JSONB fields (Dict, List, or complex types) + if (field_type == dict or + field_type == list or + (hasattr(field_type, '__origin__') and field_type.__origin__ in (dict, list)) or + field_name in ['execParameters', 'expectedDocumentFormats', 'resultDocuments', 'logs', 'messages', 'stats', 'tasks']): + fields[field_name] = 'JSONB' + # Simple type mapping + elif field_type in (str, type(None)) or (get_origin(field_type) is Union and type(None) in get_args(field_type)): + fields[field_name] = 'TEXT' + elif field_type == int: + fields[field_name] = 'INTEGER' + elif field_type == float: + fields[field_name] = 'REAL' + elif field_type == bool: + fields[field_name] = 'BOOLEAN' + else: + fields[field_name] = 'TEXT' # Default to TEXT + + return fields + +# No caching needed with proper database + +class DatabaseConnector: + """ + A connector for PostgreSQL-based data storage. + Provides generic database operations without user/mandate filtering. + Uses PostgreSQL with JSONB columns for flexible data storage. + """ + def __init__(self, dbHost: str, dbDatabase: str, dbUser: str = None, dbPassword: str = None, dbPort: int = None, userId: str = None): + # Store the input parameters + self.dbHost = dbHost + self.dbDatabase = dbDatabase + self.dbUser = dbUser + self.dbPassword = dbPassword + self.dbPort = dbPort + + # Set userId (default to empty string if None) + self.userId = userId if userId is not None else "" + + # Initialize database system first (creates database if needed) + self.connection = None + self.initDbSystem() + + # No caching needed with proper database - PostgreSQL handles performance + + # Thread safety + self._lock = threading.Lock() + + # Initialize system table + self._systemTableName = "_system" + self._initializeSystemTable() + + logger.debug(f"Context: userId={self.userId}") + + def initDbSystem(self): + """Initialize the database system - creates database and tables.""" + try: + # Create database if it doesn't exist + self._create_database_if_not_exists() + + # Create tables + self._create_tables() + + # Establish connection to the database + self._connect() + + logger.info("PostgreSQL database system initialized successfully") + except Exception as e: + logger.error(f"FATAL ERROR: Database system initialization failed: {e}") + raise + + def _create_database_if_not_exists(self): + """Create the database if it doesn't exist.""" + try: + # Use the configured user for database creation + conn = psycopg2.connect( + host=self.dbHost, + port=self.dbPort, + database="postgres", + user=self.dbUser, + password=self.dbPassword, + client_encoding='utf8' + ) + conn.autocommit = True + + with conn.cursor() as cursor: + # Check if database exists + cursor.execute("SELECT 1 FROM pg_database WHERE datname = %s", (self.dbDatabase,)) + exists = cursor.fetchone() + + if not exists: + # Create database + cursor.execute(f"CREATE DATABASE {self.dbDatabase}") + logger.info(f"Created database: {self.dbDatabase}") + else: + logger.info(f"Database {self.dbDatabase} already exists") + + conn.close() + + except Exception as e: + logger.error(f"FATAL ERROR: Cannot create database: {e}") + logger.error("Database connection failed - application cannot start") + raise RuntimeError(f"FATAL ERROR: Cannot create database '{self.dbDatabase}': {e}") + + + def _create_tables(self): + """Create only the system table - application tables are created by interfaces.""" + try: + # Use the configured user for table creation + conn = psycopg2.connect( + host=self.dbHost, + port=self.dbPort, + database=self.dbDatabase, + user=self.dbUser, + password=self.dbPassword, + client_encoding='utf8' + ) + conn.autocommit = True + + with conn.cursor() as cursor: + # Create only the system table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS _system ( + id SERIAL PRIMARY KEY, + table_name VARCHAR(255) UNIQUE NOT NULL, + initial_id VARCHAR(255) NOT NULL, + _createdAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + _modifiedAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """) + + logger.info("System table created successfully") + + conn.close() + + except Exception as e: + logger.error(f"FATAL ERROR: Cannot create system table: {e}") + logger.error("Database system table creation failed - application cannot start") + raise RuntimeError(f"FATAL ERROR: Cannot create system table: {e}") + + def _connect(self): + """Establish connection to PostgreSQL database.""" + try: + # Use configured user for main connection with proper parameter handling + self.connection = psycopg2.connect( + host=self.dbHost, + port=self.dbPort, + database=self.dbDatabase, + user=self.dbUser, + password=self.dbPassword, + client_encoding='utf8', + cursor_factory=psycopg2.extras.RealDictCursor + ) + self.connection.autocommit = False # Use transactions + logger.info(f"Connected to PostgreSQL database: {self.dbDatabase}") + except Exception as e: + logger.error(f"Failed to connect to PostgreSQL: {e}") + raise + + def _ensure_connection(self): + """Ensure database connection is alive, reconnect if necessary.""" + try: + if self.connection is None or self.connection.closed: + self._connect() + else: + # Test connection with a simple query + with self.connection.cursor() as cursor: + cursor.execute("SELECT 1") + except Exception as e: + logger.warning(f"Connection lost, reconnecting: {e}") + self._connect() + + def _initializeSystemTable(self): + """Initializes the system table if it doesn't exist yet.""" + try: + # First ensure the system table exists + self._ensureTableExists(SystemTable) + + with self.connection.cursor() as cursor: + # Check if system table has any data + cursor.execute('SELECT COUNT(*) FROM "_system"') + row = cursor.fetchone() + count = row['count'] if row else 0 + + self.connection.commit() + except Exception as e: + logger.error(f"Error initializing system table: {e}") + self.connection.rollback() + raise + + def _loadSystemTable(self) -> Dict[str, str]: + """Loads the system table with the initial IDs.""" + try: + with self.connection.cursor() as cursor: + cursor.execute('SELECT "table_name", "initial_id" FROM "_system"') + rows = cursor.fetchall() + + system_data = {} + for row in rows: + system_data[row['table_name']] = row['initial_id'] + + return system_data + except Exception as e: + logger.error(f"Error loading system table: {e}") + return {} + + def _saveSystemTable(self, data: Dict[str, str]) -> bool: + """Saves the system table with the initial IDs.""" + try: + with self.connection.cursor() as cursor: + # Clear existing data + cursor.execute('DELETE FROM "_system"') + + # Insert new data + for table_name, initial_id in data.items(): + cursor.execute(""" + INSERT INTO "_system" ("table_name", "initial_id", "_modifiedAt") + VALUES (%s, %s, CURRENT_TIMESTAMP) + """, (table_name, initial_id)) + + self.connection.commit() + return True + except Exception as e: + logger.error(f"Error saving system table: {e}") + self.connection.rollback() + return False + + def _ensureSystemTableExists(self) -> bool: + """Ensures the system table exists, creates it if it doesn't.""" + try: + self._ensure_connection() + + with self.connection.cursor() as cursor: + # Check if system table exists + cursor.execute("SELECT COUNT(*) FROM pg_stat_user_tables WHERE relname = %s", (self._systemTableName,)) + exists = cursor.fetchone()['count'] > 0 + + if not exists: + # Create system table + cursor.execute(f""" + CREATE TABLE "{self._systemTableName}" ( + "table_name" VARCHAR(255) PRIMARY KEY, + "initial_id" VARCHAR(255), + "_createdAt" TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + "_modifiedAt" TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """) + logger.info("System table created successfully") + else: + # Check if we need to add missing columns to existing table + cursor.execute(""" + SELECT column_name FROM information_schema.columns + WHERE table_name = %s AND table_schema = 'public' + """, (self._systemTableName,)) + existing_columns = [row['column_name'] for row in cursor.fetchall()] + + if '_modifiedAt' not in existing_columns: + cursor.execute(f'ALTER TABLE "{self._systemTableName}" ADD COLUMN "_modifiedAt" TIMESTAMP DEFAULT CURRENT_TIMESTAMP') + logger.info("Added _modifiedAt column to existing system table") + + logger.debug("System table already exists") + + return True + except Exception as e: + logger.error(f"Error ensuring system table exists: {e}") + return False + + def _ensureTableExists(self, model_class: type) -> bool: + """Ensures a table exists, creates it if it doesn't.""" + table = model_class.__name__ + + if table == "SystemTable": + # Handle system table specially - it uses _system as the actual table name + return self._ensureSystemTableExists() + + try: + self._ensure_connection() + + with self.connection.cursor() as cursor: + # Check if table exists by querying information_schema with case-insensitive search + cursor.execute(''' + SELECT COUNT(*) FROM information_schema.tables + WHERE LOWER(table_name) = LOWER(%s) AND table_schema = 'public' + ''', (table,)) + exists = cursor.fetchone()['count'] > 0 + logger.debug(f"Table {table} exists check: {exists}") + + if not exists: + # Create table from Pydantic model + logger.debug(f"Creating table {table} with model {model_class}") + self._create_table_from_model(cursor, table, model_class) + logger.info(f"Created table '{table}' with columns from Pydantic model") + + self.connection.commit() + return True + except Exception as e: + logger.error(f"Error ensuring table {table} exists: {e}") + if hasattr(self, 'connection') and self.connection: + self.connection.rollback() + return False + + + def _create_table_from_model(self, cursor, table: str, model_class: type) -> None: + """Create table with columns matching Pydantic model fields.""" + fields = _get_model_fields(model_class) + logger.debug(f"Creating table {table} with fields: {fields}") + + # Build column definitions with quoted identifiers to preserve exact case + columns = ['"id" VARCHAR(255) PRIMARY KEY'] + for field_name, sql_type in fields.items(): + if field_name != 'id': # Skip id, already defined + columns.append(f'"{field_name}" {sql_type}') + + # Add metadata columns + columns.extend([ + '"_createdAt" TIMESTAMP DEFAULT CURRENT_TIMESTAMP', + '"_modifiedAt" TIMESTAMP DEFAULT CURRENT_TIMESTAMP', + '"_createdBy" VARCHAR(255)', + '"_modifiedBy" VARCHAR(255)' + ]) + + # Create table + sql = f'CREATE TABLE IF NOT EXISTS "{table}" ({", ".join(columns)})' + logger.debug(f"Executing SQL: {sql}") + cursor.execute(sql) + + # Create indexes for foreign keys + for field_name in fields: + if field_name.endswith('Id') and field_name != 'id': + cursor.execute(f'CREATE INDEX IF NOT EXISTS "idx_{table}_{field_name}" ON "{table}" ("{field_name}")') + + + def _save_record(self, cursor, table: str, recordId: str, record: Dict[str, Any], model_class: type) -> None: + """Save record to normalized table with explicit columns.""" + # Get columns from Pydantic model instead of database schema + fields = _get_model_fields(model_class) + columns = ['id'] + [field for field in fields.keys() if field != 'id'] + ['_createdAt', '_createdBy', '_modifiedAt', '_modifiedBy'] + + logger.debug(f"Table {table} columns: {columns}") + logger.debug(f"Record data: {record}") + + if not columns: + logger.error(f"No columns found for table {table}") + return + + # Filter record data to only include columns that exist in the table + filtered_record = {k: v for k, v in record.items() if k in columns} + + # Ensure id is set + filtered_record['id'] = recordId + + # Prepare values in the correct order + values = [] + for col in columns: + value = filtered_record.get(col) + + # Convert timestamp fields to proper PostgreSQL format + if col in ['_createdAt', '_modifiedAt'] and value is not None: + if isinstance(value, (int, float)): + # Convert Unix timestamp to PostgreSQL timestamp + from datetime import datetime + value = datetime.fromtimestamp(value) + elif isinstance(value, str): + # If it's already a string, try to parse it + try: + from datetime import datetime + value = datetime.fromtimestamp(float(value)) + except: + pass # Keep as string if parsing fails + + # Convert enum values to their string representation + elif hasattr(value, 'value'): + value = value.value + + # Handle JSONB fields - ensure proper JSON format for PostgreSQL + elif col in fields and fields[col] == 'JSONB' and value is not None: + import json + if isinstance(value, (dict, list)): + # Convert Python objects to JSON string for PostgreSQL JSONB + value = json.dumps(value) + elif isinstance(value, str): + # Validate that it's valid JSON, if not, try to parse and re-serialize + try: + # Test if it's already valid JSON + json.loads(value) + # If successful, keep as is + pass + except (json.JSONDecodeError, TypeError): + # If not valid JSON, convert to JSON string + value = json.dumps(value) + else: + # Convert other types to JSON + value = json.dumps(value) + + values.append(value) + + logger.debug(f"Values to insert: {values}") + + # Build INSERT/UPDATE with quoted identifiers + col_names = ', '.join([f'"{col}"' for col in columns]) + placeholders = ', '.join(['%s'] * len(columns)) + updates = ', '.join([f'"{col}" = EXCLUDED."{col}"' for col in columns[1:] if col not in ['_createdAt', '_createdBy']]) + + sql = f'INSERT INTO "{table}" ({col_names}) VALUES ({placeholders}) ON CONFLICT ("id") DO UPDATE SET {updates}' + logger.debug(f"SQL: {sql}") + + cursor.execute(sql, values) + + def _loadRecord(self, model_class: type, recordId: str) -> Optional[Dict[str, Any]]: + """Loads a single record from the normalized table.""" + table = model_class.__name__ + + try: + if not self._ensureTableExists(model_class): + return None + + with self.connection.cursor() as cursor: + cursor.execute(f'SELECT * FROM "{table}" WHERE "id" = %s', (recordId,)) + row = cursor.fetchone() + if not row: + return None + + # Convert row to dict and handle JSONB fields + record = dict(row) + fields = _get_model_fields(model_class) + + # Parse JSONB fields back to Python objects + for field_name, field_type in fields.items(): + if field_type == 'JSONB' and field_name in record and record[field_name] is not None: + import json + try: + if isinstance(record[field_name], str): + # Parse JSON string back to Python object + record[field_name] = json.loads(record[field_name]) + elif isinstance(record[field_name], (dict, list)): + # Already a Python object, keep as is + pass + else: + # Try to parse as JSON + record[field_name] = json.loads(str(record[field_name])) + except (json.JSONDecodeError, TypeError, ValueError): + # If parsing fails, keep as string + logger.warning(f"Could not parse JSONB field {field_name}, keeping as string: {record[field_name]}") + pass + + return record + except Exception as e: + logger.error(f"Error loading record {recordId} from table {table}: {e}") + return None + + def _saveRecord(self, model_class: type, recordId: str, record: Dict[str, Any]) -> bool: + """Saves a single record to the table.""" + table = model_class.__name__ + + try: + if not self._ensureTableExists(model_class): + return False + + recordId = str(recordId) + if "id" in record and str(record["id"]) != recordId: + raise ValueError(f"Record ID mismatch: {recordId} != {record['id']}") + + # Add metadata + currentTime = get_utc_timestamp() + if "_createdAt" not in record: + record["_createdAt"] = currentTime + record["_createdBy"] = self.userId + record["_modifiedAt"] = currentTime + record["_modifiedBy"] = self.userId + + with self.connection.cursor() as cursor: + self._save_record(cursor, table, recordId, record, model_class) + + self.connection.commit() + return True + except Exception as e: + logger.error(f"Error saving record {recordId} to table {table}: {e}") + self.connection.rollback() + return False + + def _loadTable(self, model_class: type) -> List[Dict[str, Any]]: + """Loads all records from a normalized table.""" + table = model_class.__name__ + + if table == self._systemTableName: + return self._loadSystemTable() + + try: + if not self._ensureTableExists(model_class): + return [] + + with self.connection.cursor() as cursor: + cursor.execute(f'SELECT * FROM "{table}" ORDER BY "id"') + records = [dict(row) for row in cursor.fetchall()] + + # Handle JSONB fields for all records + fields = _get_model_fields(model_class) + for record in records: + for field_name, field_type in fields.items(): + if field_type == 'JSONB' and field_name in record and record[field_name] is not None: + import json + try: + if isinstance(record[field_name], str): + # Parse JSON string back to Python object + record[field_name] = json.loads(record[field_name]) + elif isinstance(record[field_name], (dict, list)): + # Already a Python object, keep as is + pass + else: + # Try to parse as JSON + record[field_name] = json.loads(str(record[field_name])) + except (json.JSONDecodeError, TypeError, ValueError): + # If parsing fails, keep as string + logger.warning(f"Could not parse JSONB field {field_name}, keeping as string: {record[field_name]}") + pass + + return records + except Exception as e: + logger.error(f"Error loading table {table}: {e}") + return [] + + + def _applyRecordFilter(self, records: List[Dict[str, Any]], recordFilter: Dict[str, Any] = None) -> List[Dict[str, Any]]: + """Applies a record filter to the records""" + if not recordFilter: + return records + + filteredRecords = [] + + for record in records: + match = True + + for field, value in recordFilter.items(): + # Check if the field exists + if field not in record: + match = False + break + + # Convert both values to strings for comparison + recordValue = str(record[field]) + filterValue = str(value) + + # Direct string comparison + if recordValue != filterValue: + match = False + break + + if match: + filteredRecords.append(record) + + return filteredRecords + + def _registerInitialId(self, table: str, initialId: str) -> bool: + """Registers the initial ID for a table.""" + try: + systemData = self._loadSystemTable() + + if table not in systemData: + systemData[table] = initialId + success = self._saveSystemTable(systemData) + if success: + logger.info(f"Initial ID {initialId} for table {table} registered") + return success + else: + # Check if the existing initial ID still exists in the table + existingInitialId = systemData[table] + records = self.getRecordset(model_class, recordFilter={"id": existingInitialId}) + if not records: + # The initial record no longer exists, update to the new one + systemData[table] = initialId + success = self._saveSystemTable(systemData) + if success: + logger.info(f"Initial ID updated from {existingInitialId} to {initialId} for table {table}") + return success + else: + logger.debug(f"Initial ID {existingInitialId} for table {table} already exists and is valid") + return True + except Exception as e: + logger.error(f"Error registering the initial ID for table {table}: {e}") + return False + + def _removeInitialId(self, table: str) -> bool: + """Removes the initial ID for a table from the system table.""" + try: + systemData = self._loadSystemTable() + + if table in systemData: + del systemData[table] + success = self._saveSystemTable(systemData) + if success: + logger.info(f"Initial ID for table {table} removed from system table") + return success + return True # If not present, this is not an error + except Exception as e: + logger.error(f"Error removing initial ID for table {table}: {e}") + return False + + def updateContext(self, userId: str) -> None: + """Updates the context of the database connector.""" + if userId is None: + raise ValueError("userId must be provided") + + self.userId = userId + logger.info(f"Updated database context: userId={self.userId}") + + # No cache to clear - database handles data consistency + + def clearTableCache(self, model_class: type) -> None: + """No-op: Database handles data consistency automatically.""" + # No caching with proper database - PostgreSQL handles consistency + pass + + # Public API + + def getTables(self) -> List[str]: + """Returns a list of all available tables.""" + tables = [] + + try: + with self.connection.cursor() as cursor: + cursor.execute(""" + SELECT table_name + FROM information_schema.tables + WHERE table_schema = 'public' + AND table_name NOT LIKE '_%' + ORDER BY table_name + """) + rows = cursor.fetchall() + tables = [row['table_name'] for row in rows] + except Exception as e: + logger.error(f"Error reading the database: {e}") + + return tables + + def getFields(self, model_class: type) -> List[str]: + """Returns a list of all fields in a table.""" + data = self._loadTable(model_class) + + if not data: + return [] + + fields = list(data[0].keys()) if data else [] + + return fields + + def getSchema(self, model_class: type, language: str = None) -> Dict[str, Dict[str, Any]]: + """Returns a schema object for a table with data types and labels.""" + data = self._loadTable(model_class) + + schema = {} + + if not data: + return schema + + firstRecord = data[0] + + for field, value in firstRecord.items(): + dataType = type(value).__name__ + label = field + + schema[field] = { + "type": dataType, + "label": label + } + + return schema + + def getRecordset(self, model_class: type, fieldFilter: List[str] = None, recordFilter: Dict[str, Any] = None) -> List[Dict[str, Any]]: + """Returns a list of records from a table, filtered by criteria.""" + table = model_class.__name__ + + # If we have specific record IDs in the filter, only load those records + if recordFilter and "id" in recordFilter: + recordId = recordFilter["id"] + record = self._loadRecord(model_class, recordId) + if record: + records = [record] + else: + return [] + else: + # Load all records if no specific ID filter + records = self._loadTable(model_class) + + # Apply recordFilter if available + if recordFilter: + records = self._applyRecordFilter(records, recordFilter) + + # If fieldFilter is available, reduce the fields + if fieldFilter and isinstance(fieldFilter, list): + result = [] + for record in records: + filteredRecord = {} + for field in fieldFilter: + if field in record: + filteredRecord[field] = record[field] + result.append(filteredRecord) + return result + + return records + + def recordCreate(self, model_class: type, record: Union[Dict[str, Any], BaseModel]) -> Dict[str, Any]: + """Creates a new record in a table based on Pydantic model class.""" + # If record is a Pydantic model, convert to dict + if isinstance(record, BaseModel): + record = to_dict(record) + elif isinstance(record, dict): + record = record.copy() + else: + raise ValueError("Record must be a Pydantic model or dictionary") + + # Ensure record has an ID + if "id" not in record: + record["id"] = str(uuid.uuid4()) + + # Save record + self._saveRecord(model_class, record["id"], record) + + # Check if this is the first record in the table and register as initial ID + table = model_class.__name__ + existingInitialId = self.getInitialId(model_class) + if existingInitialId is None: + # This is the first record, register it as the initial ID + self._registerInitialId(table, record["id"]) + logger.info(f"Registered initial ID {record['id']} for table {table}") + + return record + + def recordModify(self, model_class: type, recordId: str, record: Union[Dict[str, Any], BaseModel]) -> Dict[str, Any]: + """Modifies an existing record in a table based on Pydantic model class.""" + # Load existing record + existingRecord = self._loadRecord(model_class, recordId) + if not existingRecord: + table = model_class.__name__ + raise ValueError(f"Record {recordId} not found in table {table}") + + # If record is a Pydantic model, convert to dict + if isinstance(record, BaseModel): + record = to_dict(record) + elif isinstance(record, dict): + record = record.copy() + else: + raise ValueError("Record must be a Pydantic model or dictionary") + + # CRITICAL: Ensure we never modify the ID + if "id" in record and str(record["id"]) != recordId: + logger.error(f"Attempted to modify record ID from {recordId} to {record['id']}") + raise ValueError("Cannot modify record ID - it must match the provided recordId") + + # Update existing record with new data + existingRecord.update(record) + + # Save updated record + self._saveRecord(model_class, recordId, existingRecord) + return existingRecord + + def recordDelete(self, model_class: type, recordId: str) -> bool: + """Deletes a record from the table based on Pydantic model class.""" + table = model_class.__name__ + + try: + if not self._ensureTableExists(model_class): + return False + + with self.connection.cursor() as cursor: + # Check if record exists + cursor.execute(f"SELECT id FROM {table} WHERE id = %s", (recordId,)) + if not cursor.fetchone(): + return False + + # Check if it's an initial record + initialId = self.getInitialId(model_class) + if initialId is not None and initialId == recordId: + self._removeInitialId(table) + logger.info(f"Initial ID {recordId} for table {table} has been removed from the system table") + + # Delete the record + cursor.execute(f"DELETE FROM {table} WHERE id = %s", (recordId,)) + + # No cache to update - database handles consistency + + self.connection.commit() + return True + + except Exception as e: + logger.error(f"Error deleting record {recordId} from table {table}: {e}") + self.connection.rollback() + return False + + + def getInitialId(self, model_class: type) -> Optional[str]: + """Returns the initial ID for a table.""" + table = model_class.__name__ + systemData = self._loadSystemTable() + initialId = systemData.get(table) + logger.debug(f"Initial ID for table '{table}': {initialId}") + return initialId + + def close(self): + """Close the database connection.""" + if hasattr(self, 'connection') and self.connection and not self.connection.closed: + self.connection.close() + logger.debug("Database connection closed") + + def __del__(self): + """Cleanup method to close connection.""" + try: + self.close() + except Exception: + # Ignore errors during cleanup + pass diff --git a/modules/connectors/connectorPool.py b/modules/connectors/connectorPool.py deleted file mode 100644 index 3137c468..00000000 --- a/modules/connectors/connectorPool.py +++ /dev/null @@ -1,178 +0,0 @@ -import threading -import queue -import time -import logging -from typing import Optional, Dict, Any -from .connectorDbJson import DatabaseConnector - -logger = logging.getLogger(__name__) - -class DatabaseConnectorPool: - """ - A connection pool for DatabaseConnector instances to manage resources efficiently - and ensure proper isolation between users. - """ - - def __init__(self, max_connections: int = 100, max_idle_time: int = 300): - """ - Initialize the connection pool. - - Args: - max_connections: Maximum number of connections in the pool - max_idle_time: Maximum idle time in seconds before connection is considered stale - """ - self.max_connections = max_connections - self.max_idle_time = max_idle_time - self._pool = queue.Queue(maxsize=max_connections) - self._created_connections = 0 - self._lock = threading.Lock() - self._connection_times = {} # Track when connections were created - - def _create_connector(self, dbHost: str, dbDatabase: str, dbUser: str = None, - dbPassword: str = None, userId: str = None) -> DatabaseConnector: - """Create a new DatabaseConnector instance.""" - with self._lock: - if self._created_connections >= self.max_connections: - raise RuntimeError(f"Maximum connections ({self.max_connections}) exceeded") - - self._created_connections += 1 - logger.debug(f"Creating new database connector (total: {self._created_connections})") - - connector = DatabaseConnector( - dbHost=dbHost, - dbDatabase=dbDatabase, - dbUser=dbUser, - dbPassword=dbPassword, - userId=userId - ) - - # Track creation time - connector_id = id(connector) - self._connection_times[connector_id] = time.time() - - return connector - - def get_connector(self, dbHost: str, dbDatabase: str, dbUser: str = None, - dbPassword: str = None, userId: str = None) -> DatabaseConnector: - """ - Get a database connector from the pool or create a new one. - - Args: - dbHost: Database host path - dbDatabase: Database name - dbUser: Database user (optional) - dbPassword: Database password (optional) - userId: User ID for context (optional) - - Returns: - DatabaseConnector instance - """ - try: - # Try to get an existing connector from the pool - connector = self._pool.get_nowait() - - # Check if connector is stale - connector_id = id(connector) - if connector_id in self._connection_times: - idle_time = time.time() - self._connection_times[connector_id] - if idle_time > self.max_idle_time: - logger.debug(f"Connector {connector_id} is stale (idle: {idle_time}s), creating new one") - # Remove stale connector from tracking - if connector_id in self._connection_times: - del self._connection_times[connector_id] - # Create new connector - return self._create_connector(dbHost, dbDatabase, dbUser, dbPassword, userId) - - # Update user context if provided - if userId is not None: - connector.updateContext(userId) - - logger.debug(f"Reusing existing connector {connector_id}") - return connector - - except queue.Empty: - # Pool is empty, create new connector - return self._create_connector(dbHost, dbDatabase, dbUser, dbPassword, userId) - - def return_connector(self, connector: DatabaseConnector) -> None: - """ - Return a connector to the pool for reuse. - - Args: - connector: DatabaseConnector instance to return - """ - try: - # Update connection time - connector_id = id(connector) - self._connection_times[connector_id] = time.time() - - # Try to return to pool - self._pool.put_nowait(connector) - logger.debug(f"Returned connector {connector_id} to pool") - - except queue.Full: - # Pool is full, discard connector - logger.debug(f"Pool full, discarding connector {id(connector)}") - with self._lock: - self._created_connections -= 1 - if id(connector) in self._connection_times: - del self._connection_times[id(connector)] - - def cleanup_stale_connections(self) -> int: - """ - Clean up stale connections from the pool. - - Returns: - Number of connections cleaned up - """ - cleaned = 0 - current_time = time.time() - - # Check all tracked connections - stale_connectors = [] - for connector_id, creation_time in list(self._connection_times.items()): - if current_time - creation_time > self.max_idle_time: - stale_connectors.append(connector_id) - - # Remove stale connections from tracking - for connector_id in stale_connectors: - if connector_id in self._connection_times: - del self._connection_times[connector_id] - cleaned += 1 - - logger.debug(f"Cleaned up {cleaned} stale connections") - return cleaned - - def get_stats(self) -> Dict[str, Any]: - """Get pool statistics.""" - with self._lock: - return { - "max_connections": self.max_connections, - "created_connections": self._created_connections, - "available_connections": self._pool.qsize(), - "tracked_connections": len(self._connection_times) - } - -# Global pool instance -_connector_pool = None -_pool_lock = threading.Lock() - -def get_connector_pool() -> DatabaseConnectorPool: - """Get the global connector pool instance.""" - global _connector_pool - if _connector_pool is None: - with _pool_lock: - if _connector_pool is None: - _connector_pool = DatabaseConnectorPool() - return _connector_pool - -def get_connector(dbHost: str, dbDatabase: str, dbUser: str = None, - dbPassword: str = None, userId: str = None) -> DatabaseConnector: - """Get a database connector from the global pool.""" - pool = get_connector_pool() - return pool.get_connector(dbHost, dbDatabase, dbUser, dbPassword, userId) - -def return_connector(connector: DatabaseConnector) -> None: - """Return a database connector to the global pool.""" - pool = get_connector_pool() - pool.return_connector(connector) diff --git a/modules/interfaces/interfaceAppAccess.py b/modules/interfaces/interfaceAppAccess.py index 7277d853..f04d8968 100644 --- a/modules/interfaces/interfaceAppAccess.py +++ b/modules/interfaces/interfaceAppAccess.py @@ -5,7 +5,7 @@ Access control for the Application. import logging from typing import Dict, Any, List, Optional from datetime import datetime -from modules.interfaces.interfaceAppModel import UserPrivilege, User +from modules.interfaces.interfaceAppModel import UserPrivilege, User, UserInDB, AuthEvent from modules.shared.timezoneUtils import get_utc_now # Configure logger @@ -29,28 +29,29 @@ class AppAccess: self.db = db - def uam(self, table: str, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def uam(self, model_class: type, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Unified user access management function that filters data based on user privileges and adds access control attributes. Args: - table: Name of the table + model_class: Pydantic model class for the table recordset: Recordset to filter based on access rules Returns: Filtered recordset with access control attributes """ filtered_records = [] + table_name = model_class.__name__ # Only SYSADMIN can see mandates - if table == "mandates": + if table_name == "Mandate": if self.privilege == UserPrivilege.SYSADMIN: filtered_records = recordset else: filtered_records = [] # Special handling for users table - elif table == "users": + elif table_name == "UserInDB": if self.privilege == UserPrivilege.SYSADMIN: # SysAdmin sees all users filtered_records = recordset @@ -61,13 +62,13 @@ class AppAccess: # Regular users only see themselves filtered_records = [r for r in recordset if r.get("id") == self.userId] # Special handling for connections table - elif table == "connections": + elif table_name == "UserConnection": if self.privilege == UserPrivilege.SYSADMIN: # SysAdmin sees all connections filtered_records = recordset elif self.privilege == UserPrivilege.ADMIN: # Admin sees connections for users in their mandate - users: List[Dict[str, Any]] = self.db.getRecordset("users", recordFilter={"mandateId": self.mandateId}) + users: List[Dict[str, Any]] = self.db.getRecordset(UserInDB, recordFilter={"mandateId": self.mandateId}) user_ids: List[str] = [str(u["id"]) for u in users] filtered_records = [r for r in recordset if r.get("userId") in user_ids] else: @@ -89,11 +90,11 @@ class AppAccess: record_id = record.get("id") # Set access control flags based on user permissions - if table == "mandates": + if table_name == "Mandate": record["_hideView"] = False # SYSADMIN can view - record["_hideEdit"] = not self.canModify("mandates", record_id) - record["_hideDelete"] = not self.canModify("mandates", record_id) - elif table == "users": + record["_hideEdit"] = not self.canModify(Mandate, record_id) + record["_hideDelete"] = not self.canModify(Mandate, record_id) + elif table_name == "UserInDB": record["_hideView"] = False # Everyone can view users they have access to # SysAdmin can edit/delete any user if self.privilege == UserPrivilege.SYSADMIN: @@ -107,7 +108,7 @@ class AppAccess: else: record["_hideEdit"] = record.get("id") != self.userId record["_hideDelete"] = True # Regular users cannot delete users - elif table == "connections": + elif table_name == "UserConnection": # Everyone can view connections they have access to record["_hideView"] = False # SysAdmin can edit/delete any connection @@ -116,7 +117,7 @@ class AppAccess: record["_hideDelete"] = False # Admin can edit/delete connections for users in their mandate elif self.privilege == UserPrivilege.ADMIN: - users: List[Dict[str, Any]] = self.db.getRecordset("users", recordFilter={"mandateId": self.mandateId}) + users: List[Dict[str, Any]] = self.db.getRecordset(UserInDB, recordFilter={"mandateId": self.mandateId}) user_ids: List[str] = [str(u["id"]) for u in users] record["_hideEdit"] = record.get("userId") not in user_ids record["_hideDelete"] = record.get("userId") not in user_ids @@ -125,35 +126,37 @@ class AppAccess: record["_hideEdit"] = record.get("userId") != self.userId record["_hideDelete"] = record.get("userId") != self.userId - elif table == "auth_events": + elif table_name == "AuthEvent": # Only show auth events for the current user or if admin if self.privilege in [UserPrivilege.SYSADMIN, UserPrivilege.ADMIN]: record["_hideView"] = False else: record["_hideView"] = record.get("userId") != self.userId record["_hideEdit"] = True # Auth events can't be edited - record["_hideDelete"] = not self.canModify("auth_events", record_id) + record["_hideDelete"] = not self.canModify(AuthEvent, record_id) else: # Default access control for other tables record["_hideView"] = False - record["_hideEdit"] = not self.canModify(table, record_id) - record["_hideDelete"] = not self.canModify(table, record_id) + record["_hideEdit"] = not self.canModify(model_class, record_id) + record["_hideDelete"] = not self.canModify(model_class, record_id) return filtered_records - def canModify(self, table: str, recordId: Optional[str] = None) -> bool: + def canModify(self, model_class: type, recordId: Optional[str] = None) -> bool: """ Checks if the current user can modify (create/update/delete) records in a table. Args: - table: Name of the table + model_class: Pydantic model class for the table recordId: Optional record ID for specific record check Returns: Boolean indicating permission """ + table_name = model_class.__name__ + # For mandates, only SYSADMIN can modify - if table == "mandates": + if table_name == "Mandate": return self.privilege == UserPrivilege.SYSADMIN # System admins can modify anything else @@ -163,17 +166,17 @@ class AppAccess: # Check specific record permissions if recordId is not None: # Get the record to check ownership - records: List[Dict[str, Any]] = self.db.getRecordset(table, recordFilter={"id": str(recordId)}) + records: List[Dict[str, Any]] = self.db.getRecordset(model_class, recordFilter={"id": str(recordId)}) if not records: return False record = records[0] # Special handling for connections - if table == "connections": + if table_name == "UserConnection": # Admin can modify connections for users in their mandate if self.privilege == UserPrivilege.ADMIN: - users: List[Dict[str, Any]] = self.db.getRecordset("users", recordFilter={"mandateId": self.mandateId}) + users: List[Dict[str, Any]] = self.db.getRecordset(UserInDB, recordFilter={"mandateId": self.mandateId}) user_ids: List[str] = [str(u["id"]) for u in users] return record.get("userId") in user_ids # Users can only modify their own connections diff --git a/modules/interfaces/interfaceAppModel.py b/modules/interfaces/interfaceAppModel.py index ec95aaf5..73d8d146 100644 --- a/modules/interfaces/interfaceAppModel.py +++ b/modules/interfaces/interfaceAppModel.py @@ -353,4 +353,93 @@ class GoogleToken(Token): class MsftToken(Token): """Microsoft OAuth token model""" pass + +class AuthEvent(BaseModel, ModelMixin): + """Data model for authentication events""" + id: str = Field( + default_factory=lambda: str(uuid.uuid4()), + description="Unique ID of the auth event", + frontend_type="text", + frontend_readonly=True, + frontend_required=False + ) + userId: str = Field( + description="ID of the user this event belongs to", + frontend_type="text", + frontend_readonly=True, + frontend_required=True + ) + eventType: str = Field( + description="Type of authentication event (e.g., 'login', 'logout', 'token_refresh')", + frontend_type="text", + frontend_readonly=True, + frontend_required=True + ) + timestamp: float = Field( + default_factory=get_utc_timestamp, + description="Unix timestamp when the event occurred", + frontend_type="datetime", + frontend_readonly=True, + frontend_required=True + ) + ipAddress: Optional[str] = Field( + default=None, + description="IP address from which the event originated", + frontend_type="text", + frontend_readonly=True, + frontend_required=False + ) + userAgent: Optional[str] = Field( + default=None, + description="User agent string from the request", + frontend_type="text", + frontend_readonly=True, + frontend_required=False + ) + success: bool = Field( + default=True, + description="Whether the authentication event was successful", + frontend_type="boolean", + frontend_readonly=True, + frontend_required=True + ) + details: Optional[str] = Field( + default=None, + description="Additional details about the event", + frontend_type="text", + frontend_readonly=True, + frontend_required=False + ) + +# Register labels for AuthEvent +register_model_labels( + "AuthEvent", + {"en": "Authentication Event", "fr": "Événement d'authentification"}, + { + "id": {"en": "ID", "fr": "ID"}, + "userId": {"en": "User ID", "fr": "ID utilisateur"}, + "eventType": {"en": "Event Type", "fr": "Type d'événement"}, + "timestamp": {"en": "Timestamp", "fr": "Horodatage"}, + "ipAddress": {"en": "IP Address", "fr": "Adresse IP"}, + "userAgent": {"en": "User Agent", "fr": "Agent utilisateur"}, + "success": {"en": "Success", "fr": "Succès"}, + "details": {"en": "Details", "fr": "Détails"} + } +) + +class SystemTable(BaseModel, ModelMixin): + """Data model for system table entries""" + table_name: str = Field( + description="Name of the table", + frontend_type="text", + frontend_readonly=True, + frontend_required=True + ) + initial_id: Optional[str] = Field( + default=None, + description="Initial ID for the table", + frontend_type="text", + frontend_readonly=True, + frontend_required=False + ) \ No newline at end of file diff --git a/modules/interfaces/interfaceAppObjects.py b/modules/interfaces/interfaceAppObjects.py index 25183fe2..75af8878 100644 --- a/modules/interfaces/interfaceAppObjects.py +++ b/modules/interfaces/interfaceAppObjects.py @@ -12,15 +12,14 @@ import json from passlib.context import CryptContext import uuid -from modules.connectors.connectorDbJson import DatabaseConnector -from modules.connectors.connectorPool import get_connector, return_connector +from modules.connectors.connectorDbPostgre import DatabaseConnector from modules.shared.configuration import APP_CONFIG from modules.shared.timezoneUtils import get_utc_now, get_utc_timestamp from modules.interfaces.interfaceAppAccess import AppAccess from modules.interfaces.interfaceAppModel import ( User, Mandate, UserInDB, UserConnection, AuthAuthority, UserPrivilege, - ConnectionStatus, Token + ConnectionStatus, Token, AuthEvent ) logger = logging.getLogger(__name__) @@ -81,34 +80,36 @@ class AppObjects: self.db.updateContext(self.userId) def __del__(self): - """Cleanup method to return connector to pool.""" + """Cleanup method to close database connection.""" if hasattr(self, 'db') and self.db is not None: try: - return_connector(self.db) + self.db.close() except Exception as e: - logger.error(f"Error returning connector to pool: {e}") + logger.error(f"Error closing database connection: {e}") def _initializeDatabase(self): - """Initializes the database connection using connection pool.""" + """Initializes the database connection directly.""" try: # Get configuration values with defaults dbHost = APP_CONFIG.get("DB_APP_HOST", "_no_config_default_data") dbDatabase = APP_CONFIG.get("DB_APP_DATABASE", "app") dbUser = APP_CONFIG.get("DB_APP_USER") dbPassword = APP_CONFIG.get("DB_APP_PASSWORD_SECRET") + dbPort = int(APP_CONFIG.get("DB_APP_PORT", 5432)) - # Ensure the database directory exists - os.makedirs(dbHost, exist_ok=True) - - # Get connector from pool with user context - self.db = get_connector( + # Create database connector directly + self.db = DatabaseConnector( dbHost=dbHost, dbDatabase=dbDatabase, dbUser=dbUser, dbPassword=dbPassword, + dbPort=dbPort, userId=self.userId ) + # Initialize database system + self.db.initDbSystem() + logger.info(f"Database initialized successfully for user {self.userId}") except Exception as e: logger.error(f"Failed to initialize database: {str(e)}") @@ -121,8 +122,8 @@ class AppObjects: def _initRootMandate(self): """Creates the Root mandate if it doesn't exist.""" - existingMandateId = self.getInitialId("mandates") - mandates = self.db.getRecordset("mandates") + existingMandateId = self.getInitialId(Mandate) + mandates = self.db.getRecordset(Mandate) if existingMandateId is None or not mandates: logger.info("Creating Root mandate") rootMandate = Mandate( @@ -130,23 +131,20 @@ class AppObjects: language="en", enabled=True ) - createdMandate = self.db.recordCreate("mandates", rootMandate.to_dict()) + createdMandate = self.db.recordCreate(Mandate, rootMandate) logger.info(f"Root mandate created with ID {createdMandate['id']}") - # Register the initial ID - self.db._registerInitialId("mandates", createdMandate['id']) - # Update mandate context self.mandateId = createdMandate['id'] def _initAdminUser(self): """Creates the Admin user if it doesn't exist.""" - existingUserId = self.getInitialId("users") - users = self.db.getRecordset("users") + existingUserId = self.getInitialId(UserInDB) + users = self.db.getRecordset(UserInDB) if existingUserId is None or not users: logger.info("Creating Admin user") adminUser = UserInDB( - mandateId=self.getInitialId("mandates"), + mandateId=self.getInitialId(Mandate), username="admin", email="admin@example.com", fullName="Administrator", @@ -157,30 +155,27 @@ class AppObjects: hashedPassword=self._getPasswordHash("The 1st Poweron Admin"), # Use a secure password in production! connections=[] ) - createdUser = self.db.recordCreate("users", adminUser.to_dict()) + createdUser = self.db.recordCreate(UserInDB, adminUser) logger.info(f"Admin user created with ID {createdUser['id']}") - # Register the initial ID - self.db._registerInitialId("users", createdUser['id']) - # Update user context self.currentUser = createdUser self.userId = createdUser.get("id") - def _uam(self, table: str, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def _uam(self, model_class: type, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Unified user access management function that filters data based on user privileges and adds access control attributes. Args: - table: Name of the table + model_class: Pydantic model class for the table recordset: Recordset to filter based on access rules Returns: Filtered recordset with access control attributes """ # First apply access control - filteredRecords = self.access.uam(table, recordset) + filteredRecords = self.access.uam(model_class, recordset) # Then filter out database-specific fields cleanedRecords = [] @@ -191,26 +186,23 @@ class AppObjects: return cleanedRecords - def _canModify(self, table: str, recordId: Optional[str] = None) -> bool: + def _canModify(self, model_class: type, recordId: Optional[str] = None) -> bool: """ Checks if the current user can modify (create/update/delete) records in a table. Args: - table: Name of the table + model_class: Pydantic model class for the table recordId: Optional record ID for specific record check Returns: Boolean indicating permission """ - return self.access.canModify(table, recordId) + return self.access.canModify(model_class, recordId) - def _clearTableCache(self, table: str) -> None: - """Clears the cache for a specific table to ensure fresh data.""" - self.db.clearTableCache(table) - def getInitialId(self, table: str) -> Optional[str]: + def getInitialId(self, model_class: type) -> Optional[str]: """Returns the initial ID for a table.""" - return self.db.getInitialId(table) + return self.db.getInitialId(model_class) def _getPasswordHash(self, password: str) -> str: """Creates a hash for a password.""" @@ -225,8 +217,8 @@ class AppObjects: def getUsersByMandate(self, mandateId: str) -> List[User]: """Returns users for a specific mandate if user has access.""" # Get users for this mandate - users = self.db.getRecordset("users", recordFilter={"mandateId": mandateId}) - filteredUsers = self._uam("users", users) + users = self.db.getRecordset(UserInDB, recordFilter={"mandateId": mandateId}) + filteredUsers = self._uam(UserInDB, users) # Convert to User models return [User.from_dict(user) for user in filteredUsers] @@ -235,7 +227,7 @@ class AppObjects: """Returns a user by username.""" try: # Get users table - users = self.db.getRecordset("users") + users = self.db.getRecordset(UserInDB) if not users: return None @@ -255,7 +247,7 @@ class AppObjects: """Returns a user by ID if user has access.""" try: # Get all users - users = self.db.getRecordset("users") + users = self.db.getRecordset(UserInDB) if not users: return None @@ -263,7 +255,7 @@ class AppObjects: for user_dict in users: if user_dict.get("id") == userId: # Apply access control - filteredUsers = self._uam("users", [user_dict]) + filteredUsers = self._uam(UserInDB, [user_dict]) if filteredUsers: return User.from_dict(filteredUsers[0]) return None @@ -278,7 +270,7 @@ class AppObjects: """Returns all connections for a user.""" try: # Get connections for this user - connections = self.db.getRecordset("connections", recordFilter={"userId": userId}) + connections = self.db.getRecordset(UserConnection, recordFilter={"userId": userId}) # Convert to UserConnection objects result = [] @@ -345,10 +337,8 @@ class AppObjects: ) # Save to connections table - self.db.recordCreate("connections", connection.to_dict()) + self.db.recordCreate(UserConnection, connection) - # Clear cache to ensure fresh data - self._clearTableCache("connections") return connection @@ -360,7 +350,7 @@ class AppObjects: """Remove a connection to an external service""" try: # Get connection - connections = self.db.getRecordset("connections", recordFilter={ + connections = self.db.getRecordset(UserConnection, recordFilter={ "id": connectionId }) @@ -368,10 +358,8 @@ class AppObjects: raise ValueError(f"Connection {connectionId} not found") # Delete connection - self.db.recordDelete("connections", connectionId) + self.db.recordDelete(UserConnection, connectionId) - # Clear cache to ensure fresh data - self._clearTableCache("connections") except Exception as e: logger.error(f"Error removing user connection: {str(e)}") @@ -380,7 +368,6 @@ class AppObjects: def authenticateLocalUser(self, username: str, password: str) -> Optional[User]: """Authenticates a user by username and password using local authentication.""" # Clear the users table from cache and reload it - self._clearTableCache("users") # Get user by username user = self.getUserByUsername(username) @@ -397,7 +384,7 @@ class AppObjects: raise ValueError("User does not have local authentication enabled") # Get the full user record with password hash for verification - userRecord = self.db.getRecordset("users", recordFilter={"id": user.id})[0] + userRecord = self.db.getRecordset(UserInDB, recordFilter={"id": user.id})[0] if not userRecord.get("hashedPassword"): raise ValueError("User has no password set") @@ -441,12 +428,10 @@ class AppObjects: ) # Create user record - createdRecord = self.db.recordCreate("users", userData.to_dict()) + createdRecord = self.db.recordCreate(UserInDB, userData) if not createdRecord or not createdRecord.get("id"): raise ValueError("Failed to create user record") - # Clear cache to ensure fresh data - self._clearTableCache("users") # Add external connection if provided if externalId and externalUsername: @@ -459,12 +444,11 @@ class AppObjects: ) # Get created user using the returned ID - createdUser = self.db.getRecordset("users", recordFilter={"id": createdRecord["id"]}) + createdUser = self.db.getRecordset(UserInDB, recordFilter={"id": createdRecord["id"]}) if not createdUser or len(createdUser) == 0: raise ValueError("Failed to retrieve created user") # Clear cache to ensure fresh data (already done above) - # No need for additional cache clearing since _clearTableCache("users") was called return User.from_dict(createdUser[0]) @@ -489,10 +473,8 @@ class AppObjects: updatedUser = User.from_dict(updatedData) # Update user record - self.db.recordModify("users", userId, updatedUser.to_dict()) + self.db.recordModify(UserInDB, userId, updatedUser) - # Clear cache to ensure fresh data - self._clearTableCache("users") # Get updated user updatedUser = self.getUser(userId) @@ -519,20 +501,20 @@ class AppObjects: # Delete user auth events - events = self.db.getRecordset("auth_events", recordFilter={"userId": userId}) + events = self.db.getRecordset(AuthEvent, recordFilter={"userId": userId}) for event in events: - self.db.recordDelete("auth_events", event["id"]) + self.db.recordDelete(AuthEvent, event["id"]) # Delete user tokens - tokens = self.db.getRecordset("tokens", recordFilter={"userId": userId}) + tokens = self.db.getRecordset(Token, recordFilter={"userId": userId}) for token in tokens: - self.db.recordDelete("tokens", token["id"]) + self.db.recordDelete(Token, token["id"]) # Delete user connections - connections = self.db.getRecordset("connections", recordFilter={"userId": userId}) + connections = self.db.getRecordset(UserConnection, recordFilter={"userId": userId}) for conn in connections: - self.db.recordDelete("connections", conn["id"]) + self.db.recordDelete(UserConnection, conn["id"]) logger.info(f"All referenced data for user {userId} has been deleted") @@ -548,19 +530,17 @@ class AppObjects: if not user: raise ValueError(f"User {userId} not found") - if not self._canModify("users", userId): + if not self._canModify(UserInDB, userId): raise PermissionError(f"No permission to delete user {userId}") # Delete all referenced data first self._deleteUserReferencedData(userId) # Delete user record - success = self.db.recordDelete("users", userId) + success = self.db.recordDelete(UserInDB, userId) if not success: raise ValueError(f"Failed to delete user {userId}") - # Clear cache to ensure fresh data - self._clearTableCache("users") logger.info(f"User {userId} successfully deleted") return True @@ -573,17 +553,17 @@ class AppObjects: def getAllMandates(self) -> List[Mandate]: """Returns all mandates based on user access level.""" - allMandates = self.db.getRecordset("mandates") - filteredMandates = self._uam("mandates", allMandates) + allMandates = self.db.getRecordset(Mandate) + filteredMandates = self._uam(Mandate, allMandates) return [Mandate.from_dict(mandate) for mandate in filteredMandates] def getMandate(self, mandateId: str) -> Optional[Mandate]: """Returns a mandate by ID if user has access.""" - mandates = self.db.getRecordset("mandates", recordFilter={"id": mandateId}) + mandates = self.db.getRecordset(Mandate, recordFilter={"id": mandateId}) if not mandates: return None - filteredMandates = self._uam("mandates", mandates) + filteredMandates = self._uam(Mandate, mandates) if not filteredMandates: return None @@ -591,7 +571,7 @@ class AppObjects: def createMandate(self, name: str, language: str = "en") -> Mandate: """Creates a new mandate if user has permission.""" - if not self._canModify("mandates"): + if not self._canModify(Mandate): raise PermissionError("No permission to create mandates") # Create mandate data using model @@ -601,12 +581,10 @@ class AppObjects: ) # Create mandate record - createdRecord = self.db.recordCreate("mandates", mandateData.to_dict()) + createdRecord = self.db.recordCreate(Mandate, mandateData) if not createdRecord or not createdRecord.get("id"): raise ValueError("Failed to create mandate record") - # Clear cache to ensure fresh data - self._clearTableCache("mandates") return Mandate.from_dict(createdRecord) @@ -614,7 +592,7 @@ class AppObjects: """Updates a mandate if user has access.""" try: # First check if user has permission to modify mandates - if not self._canModify("mandates", mandateId): + if not self._canModify(Mandate, mandateId): raise PermissionError(f"No permission to update mandate {mandateId}") # Get mandate with access control @@ -628,10 +606,9 @@ class AppObjects: updatedMandate = Mandate.from_dict(updatedData) # Update mandate record - self.db.recordModify("mandates", mandateId, updatedMandate.to_dict()) + self.db.recordModify(Mandate, mandateId, updatedMandate) # Clear cache to ensure fresh data - self._clearTableCache("mandates") # Get updated mandate updatedMandate = self.getMandate(mandateId) @@ -652,7 +629,7 @@ class AppObjects: if not mandate: return False - if not self._canModify("mandates", mandateId): + if not self._canModify(Mandate, mandateId): raise PermissionError(f"No permission to delete mandate {mandateId}") # Check if mandate has users @@ -661,10 +638,9 @@ class AppObjects: raise ValueError(f"Cannot delete mandate {mandateId} with existing users") # Delete mandate - success = self.db.recordDelete("mandates", mandateId) + success = self.db.recordDelete(Mandate, mandateId) # Clear cache to ensure fresh data - self._clearTableCache("mandates") return success @@ -675,11 +651,11 @@ class AppObjects: def _getInitialUser(self) -> Optional[Dict[str, Any]]: """Get the initial user record directly from database without access control.""" try: - initialUserId = self.db.getInitialId("users") + initialUserId = self.getInitialId(UserInDB) if not initialUserId: return None - users = self.db.getRecordset("users", recordFilter={"id": initialUserId}) + users = self.db.getRecordset(UserInDB, recordFilter={"id": initialUserId}) return users[0] if users else None except Exception as e: logger.error(f"Error getting initial user: {str(e)}") @@ -742,7 +718,7 @@ class AppObjects: # If replace_existing is True, delete old access tokens for this user and authority first if replace_existing: try: - old_tokens = self.db.getRecordset("tokens", recordFilter={ + old_tokens = self.db.getRecordset(Token, recordFilter={ "userId": self.currentUser.id, "authority": token.authority, "connectionId": None # Ensure we only delete access tokens @@ -750,7 +726,7 @@ class AppObjects: deleted_count = 0 for old_token in old_tokens: if old_token["id"] != token.id: # Don't delete the new token if it already exists - self.db.recordDelete("tokens", old_token["id"]) + self.db.recordDelete(Token, old_token["id"]) deleted_count += 1 logger.debug(f"Deleted old access token {old_token['id']} for user {self.currentUser.id} and authority {token.authority}") @@ -767,10 +743,8 @@ class AppObjects: token_dict["userId"] = self.currentUser.id # Save to database - self.db.recordCreate("tokens", token_dict) + self.db.recordCreate(Token, token_dict) - # Clear cache to ensure fresh data - self._clearTableCache("tokens") except Exception as e: logger.error(f"Error saving access token: {str(e)}") @@ -799,13 +773,13 @@ class AppObjects: # If replace_existing is True, delete old tokens for this connectionId first if replace_existing: try: - old_tokens = self.db.getRecordset("tokens", recordFilter={ + old_tokens = self.db.getRecordset(Token, recordFilter={ "connectionId": token.connectionId }) deleted_count = 0 for old_token in old_tokens: if old_token["id"] != token.id: # Don't delete the new token if it already exists - self.db.recordDelete("tokens", old_token["id"]) + self.db.recordDelete(Token, old_token["id"]) deleted_count += 1 logger.debug(f"Deleted old token {old_token['id']} for connectionId {token.connectionId}") @@ -822,10 +796,8 @@ class AppObjects: token_dict["userId"] = self.currentUser.id # Save to database - self.db.recordCreate("tokens", token_dict) + self.db.recordCreate(Token, token_dict) - # Clear cache to ensure fresh data - self._clearTableCache("tokens") except Exception as e: logger.error(f"Error saving connection token: {str(e)}") @@ -839,7 +811,7 @@ class AppObjects: raise ValueError("No valid user context available for token retrieval") # Get access tokens for this user and authority (must NOT have connectionId) - tokens = self.db.getRecordset("tokens", recordFilter={ + tokens = self.db.getRecordset(Token, recordFilter={ "userId": self.currentUser.id, "authority": authority, "connectionId": None # Ensure we only get access tokens @@ -888,7 +860,7 @@ class AppObjects: # Get token for this specific connection # Query for specific connection - tokens = self.db.getRecordset("tokens", recordFilter={ + tokens = self.db.getRecordset(Token, recordFilter={ "connectionId": connectionId }) @@ -899,7 +871,7 @@ class AppObjects: logger.debug(f"getConnectionToken: Token {i}: id={token.get('id')}, expiresAt={token.get('expiresAt')}, createdAt={token.get('createdAt')}") else: # Debug: Check if there are any tokens at all in the database - all_tokens = self.db.getRecordset("tokens", recordFilter={}) + all_tokens = self.db.getRecordset(Token, recordFilter={}) logger.debug(f"getConnectionToken: No tokens found for connectionId {connectionId}. Total tokens in database: {len(all_tokens)}") if all_tokens: logger.debug(f"getConnectionToken: Sample tokens: {[{'id': t.get('id'), 'connectionId': t.get('connectionId'), 'authority': t.get('authority')} for t in all_tokens[:3]]}") @@ -956,7 +928,7 @@ class AppObjects: raise ValueError("No valid user context available for token deletion") # Get access tokens to delete (must NOT have connectionId) - tokens = self.db.getRecordset("tokens", recordFilter={ + tokens = self.db.getRecordset(Token, recordFilter={ "userId": self.currentUser.id, "authority": authority, "connectionId": None # Ensure we only delete access tokens @@ -964,10 +936,8 @@ class AppObjects: # Delete each token for token in tokens: - self.db.recordDelete("tokens", token["id"]) + self.db.recordDelete(Token, token["id"]) - # Clear cache to ensure fresh data - self._clearTableCache("tokens") except Exception as e: logger.error(f"Error deleting access token: {str(e)}") @@ -981,16 +951,14 @@ class AppObjects: raise ValueError("connectionId is required for deleteConnectionTokenByConnectionId") # Get connection tokens to delete - tokens = self.db.getRecordset("tokens", recordFilter={ + tokens = self.db.getRecordset(Token, recordFilter={ "connectionId": connectionId }) # Delete each token for token in tokens: - self.db.recordDelete("tokens", token["id"]) + self.db.recordDelete(Token, token["id"]) - # Clear cache to ensure fresh data - self._clearTableCache("tokens") except Exception as e: logger.error(f"Error deleting connection token for connectionId {connectionId}: {str(e)}") @@ -1005,17 +973,16 @@ class AppObjects: cleaned_count = 0 # Get all tokens - all_tokens = self.db.getRecordset("tokens", recordFilter={}) + all_tokens = self.db.getRecordset(Token, recordFilter={}) for token_data in all_tokens: if token_data.get("expiresAt") and token_data.get("expiresAt") < current_time: # Token is expired, delete it - self.db.recordDelete("tokens", token_data["id"]) + self.db.recordDelete(Token, token_data["id"]) cleaned_count += 1 # Clear cache to ensure fresh data if cleaned_count > 0: - self._clearTableCache("tokens") logger.info(f"Cleaned up {cleaned_count} expired tokens") return cleaned_count @@ -1072,16 +1039,23 @@ def getRootUser() -> User: tempInterface = AppObjects() # Get the initial user directly - initialUserId = tempInterface.db.getInitialId("users") + initialUserId = tempInterface.getInitialId(UserInDB) if not initialUserId: raise ValueError("No initial user ID found in database") - users = tempInterface.db.getRecordset("users", recordFilter={"id": initialUserId}) + users = tempInterface.db.getRecordset(UserInDB, recordFilter={"id": initialUserId}) if not users: raise ValueError("Initial user not found in database") - + + logger.debug(f"Retrieved user data: {users[0]}") + # Convert to User model and return the model instance - return User.from_dict(users[0]) + user_data = users[0] + logger.debug(f"User data keys: {list(user_data.keys())}") + logger.debug(f"User id: {user_data.get('id')}") + logger.debug(f"User mandateId: {user_data.get('mandateId')}") + + return User.parse_obj(user_data) except Exception as e: logger.error(f"Error getting root user: {str(e)}") diff --git a/modules/interfaces/interfaceChatAccess.py b/modules/interfaces/interfaceChatAccess.py index 22961874..0b4055dc 100644 --- a/modules/interfaces/interfaceChatAccess.py +++ b/modules/interfaces/interfaceChatAccess.py @@ -5,6 +5,7 @@ Handles user access management and permission checks. from typing import Dict, Any, List, Optional from modules.interfaces.interfaceAppModel import User, UserPrivilege +from modules.interfaces.interfaceChatModel import ChatWorkflow, ChatMessage, ChatLog, ChatStat, ChatDocument class ChatAccess: """ @@ -23,19 +24,20 @@ class ChatAccess: self.db = db - def uam(self, table: str, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def uam(self, model_class: type, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Unified user access management function that filters data based on user privileges and adds access control attributes. Args: - table: Name of the table + model_class: Pydantic model class for the table recordset: Recordset to filter based on access rules Returns: Filtered recordset with access control attributes """ userPrivilege = self.currentUser.privilege + table_name = model_class.__name__ filtered_records = [] # Apply filtering based on privilege @@ -54,32 +56,32 @@ class ChatAccess: record_id = record.get("id") # Set access control flags based on user permissions - if table == "workflows": + if table_name == "ChatWorkflow": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("workflows", record_id) - record["_hideDelete"] = not self.canModify("workflows", record_id) - elif table == "workflowMessages": + record["_hideEdit"] = not self.canModify(ChatWorkflow, record_id) + record["_hideDelete"] = not self.canModify(ChatWorkflow, record_id) + elif table_name == "ChatMessage": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("workflows", record.get("workflowId")) - record["_hideDelete"] = not self.canModify("workflows", record.get("workflowId")) - elif table == "workflowLogs": + record["_hideEdit"] = not self.canModify(ChatWorkflow, record.get("workflowId")) + record["_hideDelete"] = not self.canModify(ChatWorkflow, record.get("workflowId")) + elif table_name == "ChatLog": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("workflows", record.get("workflowId")) - record["_hideDelete"] = not self.canModify("workflows", record.get("workflowId")) + record["_hideEdit"] = not self.canModify(ChatWorkflow, record.get("workflowId")) + record["_hideDelete"] = not self.canModify(ChatWorkflow, record.get("workflowId")) else: # Default access control for other tables record["_hideView"] = False - record["_hideEdit"] = not self.canModify(table, record_id) - record["_hideDelete"] = not self.canModify(table, record_id) + record["_hideEdit"] = not self.canModify(model_class, record_id) + record["_hideDelete"] = not self.canModify(model_class, record_id) return filtered_records - def canModify(self, table: str, recordId: Optional[str] = None) -> bool: + def canModify(self, model_class: type, recordId: Optional[str] = None) -> bool: """ Checks if the current user can modify (create/update/delete) records in a table. Args: - table: Name of the table + model_class: Pydantic model class for the table recordId: Optional record ID for specific record check Returns: @@ -94,7 +96,7 @@ class ChatAccess: # For regular users and admins, check specific cases if recordId is not None: # Get the record to check ownership - records: List[Dict[str, Any]] = self.db.getRecordset(table, recordFilter={"id": recordId}) + records: List[Dict[str, Any]] = self.db.getRecordset(model_class, recordFilter={"id": recordId}) if not records: return False diff --git a/modules/interfaces/interfaceChatModel.py b/modules/interfaces/interfaceChatModel.py index 629b59ee..5633e8f8 100644 --- a/modules/interfaces/interfaceChatModel.py +++ b/modules/interfaces/interfaceChatModel.py @@ -174,6 +174,7 @@ register_model_labels( class ChatDocument(BaseModel, ModelMixin): """Data model for a chat document""" id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key") + messageId: str = Field(description="Foreign key to message") fileId: str = Field(description="Foreign key to file") # Direct file attributes (copied from file object) @@ -197,6 +198,7 @@ register_model_labels( {"en": "Chat Document", "fr": "Document de chat"}, { "id": {"en": "ID", "fr": "ID"}, + "messageId": {"en": "Message ID", "fr": "ID du message"}, "fileId": {"en": "File ID", "fr": "ID du fichier"}, "roundNumber": {"en": "Round Number", "fr": "Numéro de tour"}, "taskNumber": {"en": "Task Number", "fr": "Numéro de tâche"}, @@ -400,6 +402,8 @@ register_model_labels( class ChatStat(BaseModel, ModelMixin): """Data model for chat statistics - ONLY statistics, not workflow progress""" id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key") + workflowId: Optional[str] = Field(None, description="Foreign key to workflow (for workflow stats)") + messageId: Optional[str] = Field(None, description="Foreign key to message (for message stats)") processingTime: Optional[float] = Field(None, description="Processing time in seconds") tokenCount: Optional[int] = Field(None, description="Number of tokens processed") bytesSent: Optional[int] = Field(None, description="Number of bytes sent") @@ -413,6 +417,8 @@ register_model_labels( {"en": "Chat Statistics", "fr": "Statistiques de chat"}, { "id": {"en": "ID", "fr": "ID"}, + "workflowId": {"en": "Workflow ID", "fr": "ID du workflow"}, + "messageId": {"en": "Message ID", "fr": "ID du message"}, "processingTime": {"en": "Processing Time", "fr": "Temps de traitement"}, "tokenCount": {"en": "Token Count", "fr": "Nombre de tokens"}, "bytesSent": {"en": "Bytes Sent", "fr": "Octets envoyés"}, @@ -650,8 +656,8 @@ register_model_labels( class TaskStep(BaseModel, ModelMixin): id: str objective: str - dependencies: Optional[list[str]] = [] - success_criteria: Optional[list[str]] = [] + dependencies: Optional[list[str]] = Field(default_factory=list) + success_criteria: Optional[list[str]] = Field(default_factory=list) estimated_complexity: Optional[str] = None userMessage: Optional[str] = Field(None, description="User-friendly message in user's language") @@ -733,23 +739,23 @@ class TaskContext(BaseModel, ModelMixin): # Available resources available_documents: Optional[str] = "No documents available" - available_connections: Optional[list[str]] = [] + available_connections: Optional[list[str]] = Field(default_factory=list) # Previous execution state - previous_results: Optional[list[str]] = [] + previous_results: Optional[list[str]] = Field(default_factory=list) previous_handover: Optional[TaskHandover] = None # Current execution state - improvements: Optional[list[str]] = [] + improvements: Optional[list[str]] = Field(default_factory=list) retry_count: Optional[int] = 0 - previous_action_results: Optional[list] = [] + previous_action_results: Optional[list] = Field(default_factory=list) previous_review_result: Optional[dict] = None is_regeneration: Optional[bool] = False # Failure analysis - failure_patterns: Optional[list[str]] = [] - failed_actions: Optional[list] = [] - successful_actions: Optional[list] = [] + failure_patterns: Optional[list[str]] = Field(default_factory=list) + failed_actions: Optional[list] = Field(default_factory=list) + successful_actions: Optional[list] = Field(default_factory=list) # Criteria progress tracking for retries criteria_progress: Optional[dict] = None @@ -771,20 +777,20 @@ class TaskContext(BaseModel, ModelMixin): class ReviewContext(BaseModel, ModelMixin): task_step: TaskStep - task_actions: Optional[list] = [] - action_results: Optional[list] = [] - step_result: Optional[dict] = {} + task_actions: Optional[list] = Field(default_factory=list) + action_results: Optional[list] = Field(default_factory=list) + step_result: Optional[dict] = Field(default_factory=dict) workflow_id: Optional[str] = None - previous_results: Optional[list[str]] = [] + previous_results: Optional[list[str]] = Field(default_factory=list) class ReviewResult(BaseModel, ModelMixin): status: str reason: Optional[str] = None - improvements: Optional[list[str]] = [] + improvements: Optional[list[str]] = Field(default_factory=list) quality_score: Optional[int] = 5 - missing_outputs: Optional[list[str]] = [] - met_criteria: Optional[list[str]] = [] - unmet_criteria: Optional[list[str]] = [] + missing_outputs: Optional[list[str]] = Field(default_factory=list) + met_criteria: Optional[list[str]] = Field(default_factory=list) + unmet_criteria: Optional[list[str]] = Field(default_factory=list) confidence: Optional[float] = 0.5 userMessage: Optional[str] = Field(None, description="User-friendly message in user's language") diff --git a/modules/interfaces/interfaceChatObjects.py b/modules/interfaces/interfaceChatObjects.py index 239e76bd..8aca736a 100644 --- a/modules/interfaces/interfaceChatObjects.py +++ b/modules/interfaces/interfaceChatObjects.py @@ -7,7 +7,7 @@ import os import logging import uuid from datetime import datetime, UTC, timezone -from typing import Dict, Any, List, Optional, Union +from typing import Dict, Any, List, Optional, Union, get_origin, get_args import asyncio @@ -18,8 +18,7 @@ from modules.interfaces.interfaceChatModel import ( from modules.interfaces.interfaceAppModel import User # DYNAMIC PART: Connectors to the Interface -from modules.connectors.connectorDbJson import DatabaseConnector -from modules.connectors.connectorPool import get_connector, return_connector +from modules.connectors.connectorDbPostgre import DatabaseConnector from modules.shared.timezoneUtils import get_utc_timestamp # Basic Configurations @@ -53,6 +52,55 @@ class ChatObjects: if currentUser: self.setUserContext(currentUser) + # ===== Generic Utility Methods ===== + + def _is_object_field(self, field_type) -> bool: + """Check if a field type represents a complex object (not a simple type).""" + # Simple scalar types + if field_type in (str, int, float, bool, type(None)): + return False + + # Everything else is an object + return True + + def _separate_object_fields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]: + """Separate simple fields from object fields based on Pydantic model structure.""" + simple_fields = {} + object_fields = {} + + # Get field information from the Pydantic model + model_fields = {} + if hasattr(model_class, '__fields__'): + model_fields = model_class.__fields__ + + for field_name, value in data.items(): + # Check if this field should be stored as JSONB in the database + if field_name in model_fields: + field_info = model_fields[field_name] + field_type = field_info.type_ + + # Check if this is a JSONB field (Dict, List, or complex types) + if (field_type == dict or + field_type == list or + (hasattr(field_type, '__origin__') and field_type.__origin__ in (dict, list)) or + field_name in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']): + # Store as JSONB - include in simple_fields for database storage + simple_fields[field_name] = value + elif isinstance(value, (str, int, float, bool, type(None))): + # Simple scalar types + simple_fields[field_name] = value + else: + # Complex objects that should be filtered out + object_fields[field_name] = value + else: + # Field not in model - treat as scalar if simple, otherwise filter out + if isinstance(value, (str, int, float, bool, type(None))): + simple_fields[field_name] = value + else: + object_fields[field_name] = value + + return simple_fields, object_fields + def _initializeServices(self): pass @@ -75,35 +123,38 @@ class ChatObjects: self.db.updateContext(self.userId) def __del__(self): - """Cleanup method to return connector to pool.""" + """Cleanup method to close database connection.""" if hasattr(self, 'db') and self.db is not None: try: - return_connector(self.db) + self.db.close() except Exception as e: - logger.error(f"Error returning connector to pool: {e}") + logger.error(f"Error closing database connection: {e}") logger.debug(f"User context set: userId={self.userId}, mandateId={self.mandateId}") def _initializeDatabase(self): - """Initializes the database connection.""" + """Initializes the database connection directly.""" try: # Get configuration values with defaults dbHost = APP_CONFIG.get("DB_CHAT_HOST", "_no_config_default_data") dbDatabase = APP_CONFIG.get("DB_CHAT_DATABASE", "chat") dbUser = APP_CONFIG.get("DB_CHAT_USER") dbPassword = APP_CONFIG.get("DB_CHAT_PASSWORD_SECRET") + dbPort = int(APP_CONFIG.get("DB_CHAT_PORT", 5432)) - # Ensure the database directory exists - os.makedirs(dbHost, exist_ok=True) - - self.db = get_connector( + # Create database connector directly + self.db = DatabaseConnector( dbHost=dbHost, dbDatabase=dbDatabase, dbUser=dbUser, dbPassword=dbPassword, + dbPort=dbPort, userId=self.userId ) + # Initialize database system + self.db.initDbSystem() + logger.info("Database initialized successfully") except Exception as e: logger.error(f"Failed to initialize database: {str(e)}") @@ -113,10 +164,10 @@ class ChatObjects: """Initializes standard records in the database if they don't exist.""" pass - def _uam(self, table: str, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def _uam(self, model_class: type, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Delegate to access control module.""" # First apply access control - filteredRecords = self.access.uam(table, recordset) + filteredRecords = self.access.uam(model_class, recordset) # Then filter out database-specific fields cleanedRecords = [] @@ -127,56 +178,58 @@ class ChatObjects: return cleanedRecords - def _canModify(self, table: str, recordId: Optional[str] = None) -> bool: + def _canModify(self, model_class: type, recordId: Optional[str] = None) -> bool: """Delegate to access control module.""" - return self.access.canModify(table, recordId) + return self.access.canModify(model_class, recordId) - def _clearTableCache(self, table: str) -> None: - """Clears the cache for a specific table to ensure fresh data.""" - self.db.clearTableCache(table) # Utilities - def getInitialId(self, table: str) -> Optional[str]: + def getInitialId(self, model_class: type) -> Optional[str]: """Returns the initial ID for a table.""" - return self.db.getInitialId(table) + return self.db.getInitialId(model_class) # Workflow methods - - def getAllWorkflows(self) -> List[Dict[str, Any]]: + + def getWorkflows(self) -> List[Dict[str, Any]]: """Returns workflows based on user access level.""" - allWorkflows = self.db.getRecordset("workflows") - return self._uam("workflows", allWorkflows) + allWorkflows = self.db.getRecordset(ChatWorkflow) + return self._uam(ChatWorkflow, allWorkflows) def getWorkflow(self, workflowId: str) -> Optional[ChatWorkflow]: """Returns a workflow by ID if user has access.""" - workflows = self.db.getRecordset("workflows", recordFilter={"id": workflowId}) + workflows = self.db.getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) if not workflows: return None - filteredWorkflows = self._uam("workflows", workflows) + filteredWorkflows = self._uam(ChatWorkflow, workflows) if not filteredWorkflows: return None workflow = filteredWorkflows[0] try: + # Load related data from normalized tables + logs = self.getLogs(workflowId) + messages = self.getMessages(workflowId) + stats = self.getWorkflowStats(workflowId) + # Validate workflow data against ChatWorkflow model return ChatWorkflow( id=workflow["id"], status=workflow.get("status", "running"), name=workflow.get("name"), - currentRound=workflow.get("currentRound", 0), # Default value + currentRound=workflow.get("currentRound", 0), currentTask=workflow.get("currentTask", 0), currentAction=workflow.get("currentAction", 0), totalTasks=workflow.get("totalTasks", 0), totalActions=workflow.get("totalActions", 0), lastActivity=workflow.get("lastActivity", get_utc_timestamp()), startedAt=workflow.get("startedAt", get_utc_timestamp()), - logs=[ChatLog(**log) for log in workflow.get("logs", [])], - messages=[ChatMessage(**msg) for msg in workflow.get("messages", [])], - stats=ChatStat(**workflow.get("stats", {})) if workflow.get("stats") else None, + logs=logs, + messages=messages, + stats=stats, mandateId=workflow.get("mandateId", self.currentUser.mandateId) ) except Exception as e: @@ -185,7 +238,7 @@ class ChatObjects: def createWorkflow(self, workflowData: Dict[str, Any]) -> ChatWorkflow: """Creates a new workflow if user has permission.""" - if not self._canModify("workflows"): + if not self._canModify(ChatWorkflow): raise PermissionError("No permission to create workflows") # Set timestamp if not present @@ -195,19 +248,20 @@ class ChatObjects: if "lastActivity" not in workflowData: workflowData["lastActivity"] = currentTime + + # Use generic field separation based on ChatWorkflow model + simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData) # Create workflow in database - created = self.db.recordCreate("workflows", workflowData) + created = self.db.recordCreate(ChatWorkflow, simple_fields) - # Clear cache to ensure fresh data - self._clearTableCache("workflows") - # Convert to ChatWorkflow model + # Convert to ChatWorkflow model (empty related data for new workflow) return ChatWorkflow( id=created["id"], status=created.get("status", "running"), name=created.get("name"), - currentRound=created.get("currentRound", 0), # Default value + currentRound=created.get("currentRound", 0), currentTask=created.get("currentTask", 0), currentAction=created.get("currentAction", 0), totalTasks=created.get("totalTasks", 0), @@ -216,7 +270,7 @@ class ChatObjects: startedAt=created.get("startedAt", currentTime), logs=[], messages=[], - stats=ChatStat(**created.get("stats", {})) if created.get("stats") else None, + stats=None, mandateId=created.get("mandateId", self.currentUser.mandateId) ) @@ -227,17 +281,64 @@ class ChatObjects: if not workflow: return None - if not self._canModify("workflows", workflowId): + if not self._canModify(ChatWorkflow, workflowId): raise PermissionError(f"No permission to update workflow {workflowId}") - # Set update time - workflowData["lastActivity"] = get_utc_timestamp() + # Use generic field separation based on ChatWorkflow model + simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData) - # Update workflow in database - updated = self.db.recordModify("workflows", workflowId, workflowData) + # Set update time for main workflow + simple_fields["lastActivity"] = get_utc_timestamp() - # Clear cache to ensure fresh data - self._clearTableCache("workflows") + # Update main workflow in database + updated = self.db.recordModify(ChatWorkflow, workflowId, simple_fields) + + + # Handle object field updates (inline to avoid helper dependency) + if 'logs' in object_fields: + logs_data = object_fields['logs'] + try: + for log_data in logs_data: + if hasattr(log_data, 'dict'): + log_dict = log_data.dict() + elif hasattr(log_data, 'to_dict'): + log_dict = log_data.to_dict() + else: + log_dict = log_data + log_dict["workflowId"] = workflowId + self.createLog(log_dict) + logger.debug(f"Updated {len(logs_data)} logs for workflow {workflowId}") + except Exception as e: + logger.error(f"Error updating workflow logs: {str(e)}") + if 'messages' in object_fields: + messages_data = object_fields['messages'] + try: + for message_data in messages_data: + if hasattr(message_data, 'dict'): + msg_dict = message_data.dict() + elif hasattr(message_data, 'to_dict'): + msg_dict = message_data.to_dict() + else: + msg_dict = message_data + msg_dict["workflowId"] = workflowId + self.updateMessage(msg_dict.get("id"), msg_dict) + logger.debug(f"Updated {len(messages_data)} messages for workflow {workflowId}") + except Exception as e: + logger.error(f"Error updating workflow messages: {str(e)}") + if 'stats' in object_fields: + stats_data = object_fields['stats'] + try: + if stats_data: + stats_data["workflowId"] = workflowId + self.db.recordCreate(ChatStat, stats_data) + logger.debug(f"Updated stats for workflow {workflowId}") + except Exception as e: + logger.error(f"Error updating workflow stats: {str(e)}") + + # Load fresh data from normalized tables + logs = self.getLogs(workflowId) + messages = self.getMessages(workflowId) + stats = self.getWorkflowStats(workflowId) # Convert to ChatWorkflow model return ChatWorkflow( @@ -251,70 +352,118 @@ class ChatObjects: totalActions=updated.get("totalActions", workflow.totalActions), lastActivity=updated.get("lastActivity", workflow.lastActivity), startedAt=updated.get("startedAt", workflow.startedAt), - logs=[ChatLog(**log) for log in updated.get("logs", workflow.logs)], - messages=[ChatMessage(**msg) for msg in updated.get("messages", workflow.messages)], - stats=ChatStat(**updated.get("stats", workflow.stats.dict() if workflow.stats else {})) if updated.get("stats") or workflow.stats else None, + logs=logs, + messages=messages, + stats=stats, mandateId=updated.get("mandateId", workflow.mandateId) ) def deleteWorkflow(self, workflowId: str) -> bool: - """Deletes a workflow if user has access.""" - # Check if the workflow exists and user has access - workflow = self.getWorkflow(workflowId) - if not workflow: - return False + """Deletes a workflow and all related data if user has access.""" + try: + # Check if the workflow exists and user has access + workflow = self.getWorkflow(workflowId) + if not workflow: + return False + + if not self._canModify(ChatWorkflow, workflowId): + raise PermissionError(f"No permission to delete workflow {workflowId}") - if not self._canModify("workflows", workflowId): - raise PermissionError(f"No permission to delete workflow {workflowId}") - - # Delete workflow - success = self.db.recordDelete("workflows", workflowId) - - # Clear cache to ensure fresh data - self._clearTableCache("workflows") - - return success + # CASCADE DELETE: Delete all related data first + + # 1. Delete all workflow messages and their related data + messages = self.getMessages(workflowId) + for message in messages: + messageId = message.id + if messageId: + # Delete message stats + existing_stats = self.db.getRecordset(ChatStat, recordFilter={"messageId": messageId}) + for stat in existing_stats: + self.db.recordDelete(ChatStat, stat["id"]) + + # Delete message documents (but NOT the files!) + existing_docs = self.db.getRecordset(ChatDocument, recordFilter={"messageId": messageId}) + for doc in existing_docs: + self.db.recordDelete(ChatDocument, doc["id"]) + + # Delete the message itself + self.db.recordDelete(ChatMessage, messageId) + + # 2. Delete workflow stats + existing_stats = self.db.getRecordset(ChatStat, recordFilter={"workflowId": workflowId}) + for stat in existing_stats: + self.db.recordDelete(ChatStat, stat["id"]) + + # 3. Delete workflow logs + existing_logs = self.db.getRecordset(ChatLog, recordFilter={"workflowId": workflowId}) + for log in existing_logs: + self.db.recordDelete(ChatLog, log["id"]) + + # 4. Finally delete the workflow itself + success = self.db.recordDelete(ChatWorkflow, workflowId) + + logger.debug(f"Successfully deleted workflow {workflowId} and all related data") + return success + + except Exception as e: + logger.error(f"Error deleting workflow {workflowId}: {str(e)}") + return False - # Workflow Messages - def getWorkflowMessages(self, workflowId: str) -> List[ChatMessage]: + # Message methods + + def getMessages(self, workflowId: str) -> List[ChatMessage]: """Returns messages for a workflow if user has access to the workflow.""" - # Check workflow access first - workflow = self.getWorkflow(workflowId) - if not workflow: + # Check workflow access first (without calling getWorkflow to avoid circular reference) + workflows = self.db.getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) + if not workflows: return [] - # Get messages for this workflow - messages = self.db.getRecordset("workflowMessages", recordFilter={"workflowId": workflowId}) + filteredWorkflows = self._uam(ChatWorkflow, workflows) + if not filteredWorkflows: + return [] + + # Get messages for this workflow from normalized table + messages = self.db.getRecordset(ChatMessage, recordFilter={"workflowId": workflowId}) # Sort messages by publishedAt timestamp to ensure chronological order messages.sort(key=lambda x: x.get("publishedAt", x.get("timestamp", "0"))) - # Convert messages to ChatMessage objects with proper document handling + # Convert messages to ChatMessage objects and load documents chat_messages = [] for msg in messages: - # Ensure documents field is properly converted to ChatDocument objects - if "documents" in msg and msg["documents"]: - try: - # Convert each document back to ChatDocument object - documents = [] - for doc in msg["documents"]: - if isinstance(doc, dict): - documents.append(ChatDocument(**doc)) - else: - documents.append(doc) - msg["documents"] = documents - except Exception as e: - logger.warning(f"Error converting documents for message {msg.get('id', 'unknown')}: {e}") - msg["documents"] = [] - else: - msg["documents"] = [] + # Load documents from normalized documents table + documents = self.getDocuments(msg["id"]) - chat_messages.append(ChatMessage(**msg)) + # Create ChatMessage object with loaded documents + chat_message = ChatMessage( + id=msg["id"], + workflowId=msg["workflowId"], + parentMessageId=msg.get("parentMessageId"), + documents=documents, + documentsLabel=msg.get("documentsLabel"), + message=msg.get("message"), + role=msg.get("role", "assistant"), + status=msg.get("status", "step"), + sequenceNr=msg.get("sequenceNr", 0), + publishedAt=msg.get("publishedAt", get_utc_timestamp()), + stats=self.getMessageStats(msg["id"]), + success=msg.get("success"), + actionId=msg.get("actionId"), + actionMethod=msg.get("actionMethod"), + actionName=msg.get("actionName"), + roundNumber=msg.get("roundNumber"), + taskNumber=msg.get("taskNumber"), + actionNumber=msg.get("actionNumber"), + taskProgress=msg.get("taskProgress"), + actionProgress=msg.get("actionProgress") + ) + + chat_messages.append(chat_message) return chat_messages - def createWorkflowMessage(self, messageData: Dict[str, Any]) -> ChatMessage: + def createMessage(self, messageData: Dict[str, Any]) -> ChatMessage: """Creates a message for a workflow if user has access.""" try: # Ensure ID is present @@ -333,7 +482,7 @@ class ChatObjects: if not workflow: raise PermissionError(f"No access to workflow {workflowId}") - if not self._canModify("workflows", workflowId): + if not self._canModify(ChatWorkflow, workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") # Validate that ID is not None @@ -366,23 +515,31 @@ class ChatObjects: messageData["actionNumber"] = workflow.currentAction logger.debug(f"Auto-setting actionNumber to {workflow.currentAction} for message {messageData['id']}") - # Convert ChatDocument objects to dictionaries for database storage - if "documents" in messageData and messageData["documents"]: - documents_for_db = [] - for doc in messageData["documents"]: - if isinstance(doc, ChatDocument): - # Convert ChatDocument to dictionary - documents_for_db.append(doc.dict()) - else: - # Already a dictionary - documents_for_db.append(doc) - messageData["documents"] = documents_for_db + # Use generic field separation based on ChatMessage model + simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData) - # Create message in database - createdMessage = self.db.recordCreate("workflowMessages", messageData) + # Handle documents separately - they will be stored in normalized documents table + documents_to_create = object_fields.get("documents", []) - # Clear cache to ensure fresh data - self._clearTableCache("workflowMessages") + # Create message in normalized table using only simple fields + createdMessage = self.db.recordCreate(ChatMessage, simple_fields) + + + # Create documents in normalized documents table + created_documents = [] + for doc_data in documents_to_create: + # Convert to dict if it's a Pydantic object + if hasattr(doc_data, 'dict'): + doc_dict = doc_data.dict() + elif hasattr(doc_data, 'to_dict'): + doc_dict = doc_data.to_dict() + else: + doc_dict = doc_data + + doc_dict["messageId"] = createdMessage["id"] + created_doc = self.createDocument(doc_dict) + if created_doc: + created_documents.append(created_doc) # Convert to ChatMessage model return ChatMessage( @@ -390,15 +547,14 @@ class ChatObjects: workflowId=createdMessage["workflowId"], parentMessageId=createdMessage.get("parentMessageId"), agentName=createdMessage.get("agentName"), - documents=[ChatDocument(**doc) for doc in createdMessage.get("documents", [])], - documentsLabel=createdMessage.get("documentsLabel"), # <-- FIX: ensure label is set + documents=created_documents, + documentsLabel=createdMessage.get("documentsLabel"), message=createdMessage.get("message"), role=createdMessage.get("role", "assistant"), status=createdMessage.get("status", "step"), sequenceNr=len(workflow.messages) + 1, # Use messages list length for sequence number publishedAt=createdMessage.get("publishedAt", get_utc_timestamp()), - stats=ChatStat(**createdMessage.get("stats", {})) if createdMessage.get("stats") else None, - # CRITICAL FIX: Include the progress fields in the ChatMessage object + stats=object_fields.get("stats"), # Use stats from object_fields roundNumber=createdMessage.get("roundNumber"), taskNumber=createdMessage.get("taskNumber"), actionNumber=createdMessage.get("actionNumber"), @@ -412,18 +568,18 @@ class ChatObjects: logger.error(f"Error creating workflow message: {str(e)}") return None - def updateWorkflowMessage(self, messageId: str, messageData: Dict[str, Any]) -> Dict[str, Any]: + def updateMessage(self, messageId: str, messageData: Dict[str, Any]) -> Dict[str, Any]: """Updates a workflow message if user has access to the workflow.""" try: logger.debug(f"Updating message {messageId} in database") # Ensure messageId is provided if not messageId: - logger.error("No messageId provided for updateWorkflowMessage") + logger.error("No messageId provided for updateMessage") raise ValueError("messageId cannot be empty") # Check if message exists in database - messages = self.db.getRecordset("workflowMessages", recordFilter={"id": messageId}) + messages = self.db.getRecordset(ChatMessage, recordFilter={"id": messageId}) if not messages: logger.warning(f"Message with ID {messageId} does not exist in database") @@ -436,11 +592,11 @@ class ChatObjects: if not workflow: raise PermissionError(f"No access to workflow {workflowId}") - if not self._canModify("workflows", workflowId): + if not self._canModify(ChatWorkflow, workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") logger.info(f"Creating new message with ID {messageId} for workflow {workflowId}") - return self.db.recordCreate("workflowMessages", messageData) + return self.db.recordCreate(ChatMessage, messageData) else: logger.error(f"Workflow ID missing for new message {messageId}") return None @@ -454,30 +610,56 @@ class ChatObjects: if not workflow: raise PermissionError(f"No access to workflow {workflowId}") - if not self._canModify("workflows", workflowId): + if not self._canModify(ChatWorkflow, workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") + # Use generic field separation based on ChatMessage model + simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData) + # Ensure required fields present for key in ["role", "agentName"]: - if key not in messageData and key not in existingMessage: - messageData[key] = "assistant" if key == "role" else "" + if key not in simple_fields and key not in existingMessage: + simple_fields[key] = "assistant" if key == "role" else "" # Ensure ID is in the dataset - if 'id' not in messageData: - messageData['id'] = messageId + if 'id' not in simple_fields: + simple_fields['id'] = messageId # Convert createdAt to startedAt if needed - if "createdAt" in messageData and "startedAt" not in messageData: - messageData["startedAt"] = messageData["createdAt"] - del messageData["createdAt"] + if "createdAt" in simple_fields and "startedAt" not in simple_fields: + simple_fields["startedAt"] = simple_fields["createdAt"] + del simple_fields["createdAt"] - # Update the message - updatedMessage = self.db.recordModify("workflowMessages", messageId, messageData) + # Update the message with simple fields only + updatedMessage = self.db.recordModify(ChatMessage, messageId, simple_fields) + + # Handle object field updates (documents, stats) inline + if 'documents' in object_fields: + documents_data = object_fields['documents'] + try: + for doc_data in documents_data: + if hasattr(doc_data, 'dict'): + doc_dict = doc_data.dict() + elif hasattr(doc_data, 'to_dict'): + doc_dict = doc_data.to_dict() + else: + doc_dict = doc_data + doc_dict["messageId"] = messageId + self.createDocument(doc_dict) + logger.debug(f"Updated {len(documents_data)} documents for message {messageId}") + except Exception as e: + logger.error(f"Error updating message documents: {str(e)}") + if 'stats' in object_fields: + stats_data = object_fields['stats'] + try: + if stats_data: + stats_data["messageId"] = messageId + self.db.recordCreate(ChatStat, stats_data) + logger.debug(f"Updated stats for message {messageId}") + except Exception as e: + logger.error(f"Error updating message stats: {str(e)}") if updatedMessage: logger.debug(f"Message {messageId} updated successfully") - - # Clear cache to ensure fresh data - self._clearTableCache("workflowMessages") else: logger.warning(f"Failed to update message {messageId}") @@ -486,8 +668,8 @@ class ChatObjects: logger.error(f"Error updating message {messageId}: {str(e)}", exc_info=True) raise ValueError(f"Error updating message {messageId}: {str(e)}") - def deleteWorkflowMessage(self, workflowId: str, messageId: str) -> bool: - """Deletes a workflow message if user has access to the workflow.""" + def deleteMessage(self, workflowId: str, messageId: str) -> bool: + """Deletes a workflow message and all related data if user has access to the workflow.""" try: # Check workflow access workflow = self.getWorkflow(workflowId) @@ -495,24 +677,35 @@ class ChatObjects: logger.warning(f"No access to workflow {workflowId}") return False - if not self._canModify("workflows", workflowId): + if not self._canModify(ChatWorkflow, workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") # Check if the message exists - messages = self.getWorkflowMessages(workflowId) + messages = self.getMessages(workflowId) message = next((m for m in messages if m.get("id") == messageId), None) if not message: logger.warning(f"Message {messageId} for workflow {workflowId} not found") return False - # Delete the message from the database - success = self.db.recordDelete("workflowMessages", messageId) + # CASCADE DELETE: Delete all related data first - # Clear cache to ensure fresh data - self._clearTableCache("workflowMessages") + # 1. Delete message stats + existing_stats = self.db.getRecordset(ChatStat, recordFilter={"messageId": messageId}) + for stat in existing_stats: + self.db.recordDelete(ChatStat, stat["id"]) + # 2. Delete message documents (but NOT the files!) + existing_docs = self.db.getRecordset(ChatDocument, recordFilter={"messageId": messageId}) + for doc in existing_docs: + self.db.recordDelete(ChatDocument, doc["id"]) + + # 3. Finally delete the message itself + success = self.db.recordDelete(ChatMessage, messageId) + + logger.debug(f"Successfully deleted message {messageId} and all related data") return success + except Exception as e: logger.error(f"Error deleting message {messageId}: {str(e)}") return False @@ -526,51 +719,20 @@ class ChatObjects: logger.warning(f"No access to workflow {workflowId}") return False - if not self._canModify("workflows", workflowId): + if not self._canModify(ChatWorkflow, workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") logger.debug(f"Removing file {fileId} from message {messageId} in workflow {workflowId}") - # Get all workflow messages - allMessages = self.getWorkflowMessages(workflowId) - logger.debug(f"Workflow {workflowId} has {len(allMessages)} messages") + # Get documents for this message from normalized table + documents = self.db.getRecordset(ChatDocument, recordFilter={"messageId": messageId}) - # Try different approaches to find the message - message = None - - # Exact match - message = next((m for m in allMessages if m.get("id") == messageId), None) - - # Case-insensitive match - if not message and isinstance(messageId, str): - message = next((m for m in allMessages - if isinstance(m.get("id"), str) and m.get("id").lower() == messageId.lower()), None) - - # Partial match (starts with) - if not message and isinstance(messageId, str): - message = next((m for m in allMessages - if isinstance(m.get("id"), str) and m.get("id").startswith(messageId)), None) - - if not message: - logger.warning(f"Message {messageId} not found in workflow {workflowId}") + if not documents: + logger.warning(f"No documents found for message {messageId}") return False - # Log the found message - logger.debug(f"Found message: {message.get('id')}") - - # Check if message has documents - if "documents" not in message or not message["documents"]: - logger.warning(f"No documents in message {messageId}") - return False - - # Log existing documents - documents = message.get("documents", []) - logger.debug(f"Message has {len(documents)} documents") - - # Create a new list of documents without the one to delete - updatedDocuments = [] + # Find and delete the specific document removed = False - for doc in documents: docId = doc.get("id") fileIdValue = doc.get("fileId") @@ -584,161 +746,80 @@ class ChatObjects: ) if shouldRemove: - removed = True - logger.debug(f"Found file to remove: docId={docId}, fileId={fileIdValue}") - else: - updatedDocuments.append(doc) + # Delete the document from normalized table + success = self.db.recordDelete(ChatDocument, docId) + if success: + removed = True + logger.debug(f"Successfully removed document {docId} (fileId: {fileIdValue})") + else: + logger.warning(f"Failed to delete document {docId}") if not removed: logger.warning(f"No matching file {fileId} found in message {messageId}") return False - # Update message with modified documents array - messageUpdate = { - "documents": updatedDocuments - } - - # Apply the update directly to the database - updated = self.db.recordModify("workflowMessages", message["id"], messageUpdate) - - if updated: logger.debug(f"Successfully removed file {fileId} from message {messageId}") return True - else: - logger.warning(f"Failed to update message {messageId} in database") - return False except Exception as e: logger.error(f"Error removing file {fileId} from message {messageId}: {str(e)}") return False - # Workflow Logs + + # Document methods - def getWorkflowLogs(self, workflowId: str) -> List[ChatLog]: + def getDocuments(self, messageId: str) -> List[ChatDocument]: + """Returns documents for a message from normalized table.""" + try: + documents = self.db.getRecordset(ChatDocument, recordFilter={"messageId": messageId}) + return [ChatDocument(**doc) for doc in documents] + except Exception as e: + logger.error(f"Error getting message documents: {str(e)}") + return [] + + def createDocument(self, documentData: Dict[str, Any]) -> ChatDocument: + """Creates a document for a message in normalized table.""" + try: + # Validate document data + document = ChatDocument(**documentData) + + # Create document in normalized table + created = self.db.recordCreate(ChatDocument, document) + + + return ChatDocument(**created) + except Exception as e: + logger.error(f"Error creating message document: {str(e)}") + return None + + + # Log methods + + def getLogs(self, workflowId: str) -> List[ChatLog]: """Returns logs for a workflow if user has access to the workflow.""" - # Check workflow access first - workflow = self.getWorkflow(workflowId) - if not workflow: + # Check workflow access first (without calling getWorkflow to avoid circular reference) + workflows = self.db.getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) + if not workflows: return [] - # Get logs for this workflow - logs = self.db.getRecordset("workflowLogs", recordFilter={"workflowId": workflowId}) + filteredWorkflows = self._uam(ChatWorkflow, workflows) + if not filteredWorkflows: + return [] + + # Get logs for this workflow from normalized table + logs = self.db.getRecordset(ChatLog, recordFilter={"workflowId": workflowId}) # Sort logs by timestamp (Unix timestamps) logs.sort(key=lambda x: float(x.get("timestamp", 0))) return [ChatLog(**log) for log in logs] - def updateWorkflowStats(self, workflowId: str, bytesSent: int = 0, bytesReceived: int = 0) -> bool: - """Updates workflow statistics during execution with incremental values.""" - try: - # Get current workflow - workflow = self.getWorkflow(workflowId) - if not workflow: - logger.error(f"Workflow {workflowId} not found for stats update") - return False - - if not self._canModify("workflows", workflowId): - logger.error(f"No permission to update workflow {workflowId} stats") - return False - - # Get current stats - ensure we have proper defaults - if workflow.stats: - currentStats = workflow.stats.dict() - # Ensure all required fields exist - currentStats.setdefault("bytesSent", 0) - currentStats.setdefault("bytesReceived", 0) - currentStats.setdefault("tokenCount", 0) - currentStats.setdefault("processingTime", 0) - else: - currentStats = { - "bytesSent": 0, - "bytesReceived": 0, - "tokenCount": 0, - "processingTime": 0 - } - - # Calculate processing time as duration since workflow start using Unix timestamps - workflow = self.getWorkflow(workflowId) - if workflow and workflow.startedAt: - try: - # Parse start time as Unix timestamp (handle both old ISO format and new Unix format) - start_time_str = workflow.startedAt - try: - # Try to parse as Unix timestamp first - start_time = int(float(start_time_str)) - except ValueError: - # If that fails, try to parse as ISO format and convert to Unix - try: - # Handle ISO format timestamps (for backward compatibility) - if start_time_str.endswith('Z'): - start_time_str = start_time_str.replace('Z', '+00:00') - dt = datetime.fromisoformat(start_time_str) - start_time = int(dt.timestamp()) - except: - # If all parsing fails, use current time - logger.warning(f"Could not parse start time: {start_time_str}, using current time") - start_time = int(get_utc_timestamp()) - - current_time = int(get_utc_timestamp()) - processing_time = current_time - start_time - - # Ensure processing time is reasonable (not negative or extremely large) - if processing_time < 0: - logger.warning(f"Negative processing time calculated: {processing_time}, using 0") - processing_time = 0 - elif processing_time > 86400 * 365: # More than 1 year - logger.warning(f"Unreasonably large processing time: {processing_time}, using 0") - processing_time = 0 - - except Exception as e: - logger.warning(f"Error calculating processing time: {str(e)}") - processing_time = currentStats.get("processingTime", 0) or 0 - else: - # Fallback to existing processing time or 0 - processing_time = currentStats.get("processingTime", 0) or 0 - - # Update stats with incremental values - ensure no None values - current_bytes_sent = currentStats.get("bytesSent", 0) or 0 - current_bytes_received = currentStats.get("bytesReceived", 0) or 0 - - currentStats["bytesSent"] = current_bytes_sent + bytesSent - currentStats["bytesReceived"] = current_bytes_received + bytesReceived - currentStats["tokenCount"] = currentStats["bytesSent"] + currentStats["bytesReceived"] - currentStats["processingTime"] = processing_time - - # Update workflow in database - self.db.recordModify("workflows", workflowId, { - "stats": currentStats - }) - - # Log to stats table - stats_record = { - "timestamp": get_utc_timestamp(), - "workflowId": workflowId, - "bytesSent": bytesSent, - "bytesReceived": bytesReceived, - "tokenCount": bytesSent + bytesReceived, - "processingTime": processing_time - } - - # Create stats record in database - self.db.recordCreate("stats", stats_record) - - # logger.debug(f"Updated workflow {workflowId} stats: {currentStats}") - # logger.debug(f"Logged stats record: {stats_record}") - return True - - except Exception as e: - logger.error(f"Error updating workflow stats: {str(e)}") - return False - - def createWorkflowLog(self, logData: Dict[str, Any]) -> ChatLog: + def createLog(self, logData: Dict[str, Any]) -> ChatLog: """Creates a log entry for a workflow if user has access.""" # Check workflow access workflowId = logData.get("workflowId") if not workflowId: - logger.error("No workflowId provided for createWorkflowLog") + logger.error("No workflowId provided for createLog") return None workflow = self.getWorkflow(workflowId) @@ -746,7 +827,7 @@ class ChatObjects: logger.warning(f"No access to workflow {workflowId}") return None - if not self._canModify("workflows", workflowId): + if not self._canModify(ChatWorkflow, workflowId): logger.warning(f"No permission to modify workflow {workflowId}") return None @@ -778,62 +859,117 @@ class ChatObjects: logger.error(f"Invalid log data: {str(e)}") return None - # Create log in database - createdLog = self.db.recordCreate("workflowLogs", log_model.to_dict()) - - # Clear cache to ensure fresh data - self._clearTableCache("workflowLogs") + # Create log in normalized table + createdLog = self.db.recordCreate(ChatLog, log_model) # Return validated ChatLog instance return ChatLog(**createdLog) + # Stats methods - - - def loadWorkflowState(self, workflowId: str) -> Optional[ChatWorkflow]: - """Loads workflow state if user has access.""" + def getMessageStats(self, messageId: str) -> Optional[ChatStat]: + """Returns statistics for a message from normalized table.""" try: - # Check workflow access + stats = self.db.getRecordset(ChatStat, recordFilter={"messageId": messageId}) + if not stats: + return None + # Return the most recent stats record + stats.sort(key=lambda x: x.get("created_at", ""), reverse=True) + return ChatStat(**stats[0]) + except Exception as e: + logger.error(f"Error getting message stats: {str(e)}") + return None + + def getWorkflowStats(self, workflowId: str) -> Optional[ChatStat]: + """Returns statistics for a workflow if user has access.""" + # Check workflow access first (without calling getWorkflow to avoid circular reference) + workflows = self.db.getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) + if not workflows: + return None + + filteredWorkflows = self._uam(ChatWorkflow, workflows) + if not filteredWorkflows: + return None + + # Get stats for this workflow from normalized table + stats = self.db.getRecordset(ChatStat, recordFilter={"workflowId": workflowId}) + + if not stats: + return None + + # Return the most recent stats record + stats.sort(key=lambda x: x.get("created_at", ""), reverse=True) + return ChatStat(**stats[0]) + + def updateWorkflowStats(self, workflowId: str, bytesSent: int = 0, bytesReceived: int = 0) -> bool: + """Updates workflow statistics during execution with incremental values.""" + try: + # Get current workflow workflow = self.getWorkflow(workflowId) if not workflow: - return None + logger.error(f"Workflow {workflowId} not found for stats update") + return False + + if not self._canModify(ChatWorkflow, workflowId): + logger.error(f"No permission to update workflow {workflowId} stats") + return False - logger.debug(f"Loaded base workflow {workflowId} from database") + # Get current stats from normalized table + currentStats = self.getWorkflowStats(workflowId) + if currentStats: + current_bytes_sent = currentStats.bytesSent or 0 + current_bytes_received = currentStats.bytesReceived or 0 + current_processing_time = currentStats.processingTime or 0 + else: + current_bytes_sent = 0 + current_bytes_received = 0 + current_processing_time = 0 - # Load messages - messages = self.getWorkflowMessages(workflowId) - # Messages are already sorted by publishedAt in getWorkflowMessages + # Calculate processing time as duration since workflow start + if workflow and workflow.startedAt: + try: + start_time = int(float(workflow.startedAt)) + current_time = int(get_utc_timestamp()) + processing_time = current_time - start_time + + # Ensure processing time is reasonable + if processing_time < 0: + processing_time = 0 + elif processing_time > 86400 * 365: # More than 1 year + processing_time = 0 + except Exception as e: + logger.warning(f"Error calculating processing time: {str(e)}") + processing_time = current_processing_time + else: + processing_time = current_processing_time - messageCount = len(messages) - logger.debug(f"Loaded {messageCount} messages for workflow {workflowId}") + # Update stats with incremental values + new_bytes_sent = current_bytes_sent + bytesSent + new_bytes_received = current_bytes_received + bytesReceived + new_token_count = new_bytes_sent + new_bytes_received - # Log document counts for each message - for msg in messages: - docCount = len(msg.documents) if hasattr(msg, 'documents') else 0 - if docCount > 0: - logger.debug(f"Message {msg.id} has {docCount} documents loaded from database") + # Create or update stats record in normalized table + stats_record = { + "workflowId": workflowId, + "processingTime": processing_time, + "tokenCount": new_token_count, + "bytesSent": new_bytes_sent, + "bytesReceived": new_bytes_received, + "successRate": None, + "errorCount": None + } - # Load logs - logs = self.getWorkflowLogs(workflowId) - # Logs are already sorted by timestamp in getWorkflowLogs + # Create new stats record + self.db.recordCreate(ChatStat, stats_record) + + + return True - # Create a new ChatWorkflow object with loaded messages and logs - return ChatWorkflow( - id=workflow.id, - status=workflow.status, - name=workflow.name, - currentRound=workflow.currentRound, - lastActivity=workflow.lastActivity, - startedAt=workflow.startedAt, - logs=logs, - messages=messages, - stats=workflow.stats, - mandateId=workflow.mandateId - ) except Exception as e: - logger.error(f"Error loading workflow state: {str(e)}") - return None + logger.error(f"Error updating workflow stats: {str(e)}") + return False + # Workflow Actions @@ -854,7 +990,7 @@ class ChatObjects: if workflowId: # Continue existing workflow - load complete state including messages - workflow = self.loadWorkflowState(workflowId) + workflow = self.getWorkflow(workflowId) if not workflow: raise ValueError(f"Workflow {workflowId} not found") @@ -871,7 +1007,7 @@ class ChatObjects: }) # Add log entry for workflow stop - self.createWorkflowLog({ + self.createLog({ "workflowId": workflowId, "message": "Workflow stopped for new prompt", "type": "info", @@ -891,12 +1027,12 @@ class ChatObjects: }) # Reload workflow object to get updated currentRound from database - workflow = self.loadWorkflowState(workflowId) + workflow = self.getWorkflow(workflowId) if not workflow: raise ValueError(f"Failed to reload workflow {workflowId} after update") # Add log entry for workflow resumption - self.createWorkflowLog({ + self.createLog({ "workflowId": workflowId, "message": f"Workflow resumed (round {workflow.currentRound})", "type": "info", @@ -981,7 +1117,7 @@ class ChatObjects: }) # Add log entry - self.createWorkflowLog({ + self.createLog({ "workflowId": workflowId, "message": "Workflow stopped", "type": "warning", diff --git a/modules/interfaces/interfaceComponentAccess.py b/modules/interfaces/interfaceComponentAccess.py index 6db839af..ca5201bb 100644 --- a/modules/interfaces/interfaceComponentAccess.py +++ b/modules/interfaces/interfaceComponentAccess.py @@ -5,7 +5,9 @@ Handles user access management and permission checks. import logging from typing import Dict, Any, List, Optional -from modules.interfaces.interfaceAppModel import User +from modules.interfaces.interfaceAppModel import User, UserInDB +from modules.interfaces.interfaceComponentModel import Prompt, FileItem, FileData +from modules.interfaces.interfaceChatModel import ChatWorkflow, ChatMessage, ChatLog # Configure logger logger = logging.getLogger(__name__) @@ -47,19 +49,20 @@ class ComponentAccess: return True - def uam(self, table: str, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def uam(self, model_class: type, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Unified user access management function that filters data based on user privileges and adds access control attributes. Args: - table: Name of the table + model_class: Pydantic model class for the table recordset: Recordset to filter based on access rules Returns: Filtered recordset with access control attributes """ userPrivilege = self.privilege + table_name = model_class.__name__ filtered_records = [] @@ -73,9 +76,9 @@ class ComponentAccess: filtered_records = [r for r in recordset if r.get("mandateId") == self.mandateId] else: # Regular users # For prompts, users can see all prompts from their mandate - if table == "prompts": + if table_name == "Prompt": filtered_records = [r for r in recordset if r.get("mandateId") == self.mandateId] - elif table == "users": + elif table_name == "UserInDB": # For users table, users can only see their own record filtered_records = [r for r in recordset if r.get("id") == self.userId] else: @@ -90,32 +93,32 @@ class ComponentAccess: record_id = record.get("id") # Set access control flags based on user permissions - if table == "prompts": + if table_name == "Prompt": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("prompts", record_id) - record["_hideDelete"] = not self.canModify("prompts", record_id) + record["_hideEdit"] = not self.canModify(Prompt, record_id) + record["_hideDelete"] = not self.canModify(Prompt, record_id) # Add attribute-level permissions for mandateId if "mandateId" in record: - record["_hideEdit_mandateId"] = not self.canModifyAttribute("prompts", "mandateId") - elif table == "files": + record["_hideEdit_mandateId"] = not self.canModifyAttribute(Prompt, "mandateId") + elif table_name == "FileItem": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("files", record_id) - record["_hideDelete"] = not self.canModify("files", record_id) - record["_hideDownload"] = not self.canModify("files", record_id) - elif table == "workflows": + record["_hideEdit"] = not self.canModify(FileItem, record_id) + record["_hideDelete"] = not self.canModify(FileItem, record_id) + record["_hideDownload"] = not self.canModify(FileItem, record_id) + elif table_name == "ChatWorkflow": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("workflows", record_id) - record["_hideDelete"] = not self.canModify("workflows", record_id) - elif table == "workflowMessages": + record["_hideEdit"] = not self.canModify(ChatWorkflow, record_id) + record["_hideDelete"] = not self.canModify(ChatWorkflow, record_id) + elif table_name == "ChatMessage": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("workflows", record.get("workflowId")) - record["_hideDelete"] = not self.canModify("workflows", record.get("workflowId")) - elif table == "workflowLogs": + record["_hideEdit"] = not self.canModify(ChatWorkflow, record.get("workflowId")) + record["_hideDelete"] = not self.canModify(ChatWorkflow, record.get("workflowId")) + elif table_name == "ChatLog": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("workflows", record.get("workflowId")) - record["_hideDelete"] = not self.canModify("workflows", record.get("workflowId")) - elif table == "users": + record["_hideEdit"] = not self.canModify(ChatWorkflow, record.get("workflowId")) + record["_hideDelete"] = not self.canModify(ChatWorkflow, record.get("workflowId")) + elif table_name == "UserInDB": # For users table, users can only modify their own connections record["_hideView"] = False record["_hideEdit"] = record_id != self.userId @@ -128,17 +131,17 @@ class ComponentAccess: else: # Default access control for other tables record["_hideView"] = False - record["_hideEdit"] = not self.canModify(table, record_id) - record["_hideDelete"] = not self.canModify(table, record_id) + record["_hideEdit"] = not self.canModify(model_class, record_id) + record["_hideDelete"] = not self.canModify(model_class, record_id) return filtered_records - def canModify(self, table: str, recordId: Optional[int] = None) -> bool: + def canModify(self, model_class: type, recordId: Optional[int] = None) -> bool: """ Checks if the current user can modify (create/update/delete) records in a table. Args: - table: Name of the table + model_class: Pydantic model class for the table recordId: Optional record ID for specific record check Returns: @@ -153,14 +156,14 @@ class ComponentAccess: # For regular users and admins, check specific cases if recordId is not None: # Get the record to check ownership - records: List[Dict[str, Any]] = self.db.getRecordset(table, recordFilter={"id": recordId}) + records: List[Dict[str, Any]] = self.db.getRecordset(model_class, recordFilter={"id": recordId}) if not records: return False record = records[0] # Special case for users table - users can modify their own connections - if table == "users": + if model_class.__name__ == "UserInDB": if record.get("id") == self.userId: return True return False diff --git a/modules/interfaces/interfaceComponentObjects.py b/modules/interfaces/interfaceComponentObjects.py index 36058cc7..877769f1 100644 --- a/modules/interfaces/interfaceComponentObjects.py +++ b/modules/interfaces/interfaceComponentObjects.py @@ -14,11 +14,10 @@ from modules.interfaces.interfaceComponentAccess import ComponentAccess from modules.interfaces.interfaceComponentModel import ( FilePreview, Prompt, FileItem, FileData ) -from modules.interfaces.interfaceAppModel import User +from modules.interfaces.interfaceAppModel import User, Mandate # DYNAMIC PART: Connectors to the Interface -from modules.connectors.connectorDbJson import DatabaseConnector -from modules.connectors.connectorPool import get_connector, return_connector +from modules.connectors.connectorDbPostgre import DatabaseConnector # Basic Configurations from modules.shared.configuration import APP_CONFIG @@ -90,35 +89,38 @@ class ComponentObjects: self.db.updateContext(self.userId) def __del__(self): - """Cleanup method to return connector to pool.""" + """Cleanup method to close database connection.""" if hasattr(self, 'db') and self.db is not None: try: - return_connector(self.db) + self.db.close() except Exception as e: - logger.error(f"Error returning connector to pool: {e}") + logger.error(f"Error closing database connection: {e}") logger.debug(f"User context set: userId={self.userId}") def _initializeDatabase(self): - """Initializes the database connection.""" + """Initializes the database connection directly.""" try: # Get configuration values with defaults dbHost = APP_CONFIG.get("DB_MANAGEMENT_HOST", "_no_config_default_data") dbDatabase = APP_CONFIG.get("DB_MANAGEMENT_DATABASE", "management") dbUser = APP_CONFIG.get("DB_MANAGEMENT_USER") dbPassword = APP_CONFIG.get("DB_MANAGEMENT_PASSWORD_SECRET") + dbPort = int(APP_CONFIG.get("DB_MANAGEMENT_PORT")) - # Ensure the database directory exists - os.makedirs(dbHost, exist_ok=True) - - self.db = get_connector( + # Create database connector directly + self.db = DatabaseConnector( dbHost=dbHost, dbDatabase=dbDatabase, dbUser=dbUser, dbPassword=dbPassword, + dbPort=dbPort, userId=self.userId if hasattr(self, 'userId') else None ) + # Initialize database system + self.db.initDbSystem() + logger.info("Database initialized successfully") except Exception as e: logger.error(f"Failed to initialize database: {str(e)}") @@ -142,7 +144,7 @@ class ComponentObjects: """Initializes standard prompts if they don't exist yet.""" try: # Check if any prompts exist - existingPrompts = self.db.getRecordset("prompts") + existingPrompts = self.db.getRecordset(Prompt) if existingPrompts: logger.info("Prompts already exist, skipping initialization") return @@ -152,7 +154,7 @@ class ComponentObjects: rootInterface = getRootInterface() # Get initial mandate ID through the root interface - mandateId = rootInterface.getInitialId("mandates") + mandateId = rootInterface.getInitialId(Mandate) if not mandateId: logger.error("No initial mandate ID found") return @@ -205,7 +207,7 @@ class ComponentObjects: # Create prompts for prompt in standardPrompts: - self.db.recordCreate("prompts", prompt.to_dict()) + self.db.recordCreate(Prompt, prompt) logger.info(f"Created standard prompt: {prompt.name}") # Restore original user context if it existed @@ -228,10 +230,10 @@ class ComponentObjects: self.access = None self.db.updateContext("") # Reset database context - def _uam(self, table: str, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def _uam(self, model_class: type, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Delegate to access control module.""" # First apply access control - filteredRecords = self.access.uam(table, recordset) + filteredRecords = self.access.uam(model_class, recordset) # Then filter out database-specific fields cleanedRecords = [] @@ -242,19 +244,16 @@ class ComponentObjects: return cleanedRecords - def _canModify(self, table: str, recordId: Optional[str] = None) -> bool: + def _canModify(self, model_class: type, recordId: Optional[str] = None) -> bool: """Delegate to access control module.""" - return self.access.canModify(table, recordId) + return self.access.canModify(model_class, recordId) - def _clearTableCache(self, table: str) -> None: - """Clears the cache for a specific table to ensure fresh data.""" - self.db.clearTableCache(table) # Utilities - def getInitialId(self, table: str) -> Optional[str]: + def getInitialId(self, model_class: type) -> Optional[str]: """Returns the initial ID for a table.""" - return self.db.getInitialId(table) + return self.db.getInitialId(model_class) @@ -263,8 +262,8 @@ class ComponentObjects: def getAllPrompts(self) -> List[Prompt]: """Returns prompts based on user access level.""" try: - allPrompts = self.db.getRecordset("prompts") - filteredPrompts = self._uam("prompts", allPrompts) + allPrompts = self.db.getRecordset(Prompt) + filteredPrompts = self._uam(Prompt, allPrompts) # Convert to Prompt objects return [Prompt.from_dict(prompt) for prompt in filteredPrompts] @@ -275,25 +274,23 @@ class ComponentObjects: def getPrompt(self, promptId: str) -> Optional[Prompt]: """Returns a prompt by ID if user has access.""" - prompts = self.db.getRecordset("prompts", recordFilter={"id": promptId}) + prompts = self.db.getRecordset(Prompt, recordFilter={"id": promptId}) if not prompts: return None - filteredPrompts = self._uam("prompts", prompts) + filteredPrompts = self._uam(Prompt, prompts) return Prompt.from_dict(filteredPrompts[0]) if filteredPrompts else None def createPrompt(self, promptData: Dict[str, Any]) -> Dict[str, Any]: """Creates a new prompt if user has permission.""" - if not self._canModify("prompts"): + if not self._canModify(Prompt): raise PermissionError("No permission to create prompts") - # Create prompt record - createdRecord = self.db.recordCreate("prompts", promptData) + # Create prompt record + createdRecord = self.db.recordCreate(Prompt, promptData) if not createdRecord or not createdRecord.get("id"): raise ValueError("Failed to create prompt record") - # Clear cache to ensure fresh data - self._clearTableCache("prompts") return createdRecord @@ -306,10 +303,9 @@ class ComponentObjects: raise ValueError(f"Prompt {promptId} not found") # Update prompt record directly with the update data - self.db.recordModify("prompts", promptId, updateData) + self.db.recordModify(Prompt, promptId, updateData) # Clear cache to ensure fresh data - self._clearTableCache("prompts") # Get updated prompt updatedPrompt = self.getPrompt(promptId) @@ -329,14 +325,12 @@ class ComponentObjects: if not prompt: return False - if not self._canModify("prompts", promptId): + if not self._canModify(Prompt, promptId): raise PermissionError(f"No permission to delete prompt {promptId}") # Delete prompt - success = self.db.recordDelete("prompts", promptId) + success = self.db.recordDelete(Prompt, promptId) - # Clear cache to ensure fresh data - self._clearTableCache("prompts") return success @@ -347,12 +341,12 @@ class ComponentObjects: If fileName is provided, also checks for exact name+hash match. Only returns files the current user has access to.""" # First get all files with the hash - allFilesWithHash = self.db.getRecordset("files", recordFilter={ + allFilesWithHash = self.db.getRecordset(FileItem, recordFilter={ "fileHash": fileHash }) # Filter by user access using UAM - accessibleFiles = self._uam("files", allFilesWithHash) + accessibleFiles = self._uam(FileItem, allFilesWithHash) if not accessibleFiles: return None @@ -468,8 +462,8 @@ class ComponentObjects: def getAllFiles(self) -> List[FileItem]: """Returns files based on user access level.""" - allFiles = self.db.getRecordset("files") - filteredFiles = self._uam("files", allFiles) + allFiles = self.db.getRecordset(FileItem) + filteredFiles = self._uam(FileItem, allFiles) # Convert database records to FileItem instances fileItems = [] @@ -502,11 +496,11 @@ class ComponentObjects: def getFile(self, fileId: str) -> Optional[FileItem]: """Returns a file by ID if user has access.""" - files = self.db.getRecordset("files", recordFilter={"id": fileId}) + files = self.db.getRecordset(FileItem, recordFilter={"id": fileId}) if not files: return None - filteredFiles = self._uam("files", files) + filteredFiles = self._uam(FileItem, files) if not filteredFiles: return None @@ -534,7 +528,7 @@ class ComponentObjects: def _isfileNameUnique(self, fileName: str, excludeFileId: Optional[str] = None) -> bool: """Checks if a fileName is unique for the current user.""" # Get all files for current user - files = self.db.getRecordset("files", recordFilter={ + files = self.db.getRecordset(FileItem, recordFilter={ "_createdBy": self.currentUser.id }) @@ -566,7 +560,7 @@ class ComponentObjects: def createFile(self, name: str, mimeType: str, content: bytes) -> FileItem: """Creates a new file entry if user has permission. Computes fileHash and fileSize from content.""" import hashlib - if not self._canModify("files"): + if not self._canModify(FileItem): raise PermissionError("No permission to create files") # Ensure fileName is unique @@ -589,10 +583,8 @@ class ComponentObjects: ) # Store in database - self.db.recordCreate("files", fileItem.to_dict()) + self.db.recordCreate(FileItem, fileItem) - # Clear cache to ensure fresh data - self._clearTableCache("files") return fileItem @@ -603,7 +595,7 @@ class ComponentObjects: if not file: raise FileNotFoundError(f"File with ID {fileId} not found") - if not self._canModify("files", fileId): + if not self._canModify(FileItem, fileId): raise PermissionError(f"No permission to update file {fileId}") # If fileName is being updated, ensure it's unique @@ -611,10 +603,8 @@ class ComponentObjects: updateData["fileName"] = self._generateUniquefileName(updateData["fileName"], fileId) # Update file - success = self.db.recordModify("files", fileId, updateData) + success = self.db.recordModify(FileItem, fileId, updateData) - # Clear cache to ensure fresh data - self._clearTableCache("files") return success @@ -627,30 +617,29 @@ class ComponentObjects: if not file: raise FileNotFoundError(f"File with ID {fileId} not found") - if not self._canModify("files", fileId): + if not self._canModify(FileItem, fileId): raise PermissionError(f"No permission to delete file {fileId}") # Check for other references to this file (by hash) fileHash = file.fileHash if fileHash: - otherReferences = [f for f in self.db.getRecordset("files", recordFilter={"fileHash": fileHash}) + otherReferences = [f for f in self.db.getRecordset(FileItem, recordFilter={"fileHash": fileHash}) if f["id"] != fileId] # Only delete associated fileData if no other references exist if not otherReferences: try: - fileDataEntries = self.db.getRecordset("fileData", recordFilter={"id": fileId}) + fileDataEntries = self.db.getRecordset(FileData, recordFilter={"id": fileId}) if fileDataEntries: - self.db.recordDelete("fileData", fileId) + self.db.recordDelete(FileData, fileId) logger.debug(f"FileData for file {fileId} deleted") except Exception as e: logger.warning(f"Error deleting FileData for file {fileId}: {str(e)}") # Delete the FileItem entry - success = self.db.recordDelete("files", fileId) + success = self.db.recordDelete(FileItem, fileId) # Clear cache to ensure fresh data - self._clearTableCache("files") return success @@ -709,10 +698,9 @@ class ComponentObjects: "base64Encoded": base64Encoded } - self.db.recordCreate("fileData", fileDataObj) + self.db.recordCreate(FileData, fileDataObj) # Clear cache to ensure fresh data - self._clearTableCache("fileData") logger.debug(f"Successfully stored data for file {fileId} (base64Encoded: {base64Encoded})") return True @@ -730,7 +718,7 @@ class ComponentObjects: import base64 - fileDataEntries = self.db.getRecordset("fileData", recordFilter={"id": fileId}) + fileDataEntries = self.db.getRecordset(FileData, recordFilter={"id": fileId}) if not fileDataEntries: logger.warning(f"No data found for file ID {fileId}") return None @@ -830,7 +818,7 @@ class ComponentObjects: """Saves an uploaded file if user has permission.""" try: # Check file creation permission - if not self._canModify("files"): + if not self._canModify(FileItem): raise PermissionError("No permission to upload files") logger.debug(f"Starting upload process for file: {fileName}") diff --git a/modules/routes/routeDataConnections.py b/modules/routes/routeDataConnections.py index cf861e85..01452206 100644 --- a/modules/routes/routeDataConnections.py +++ b/modules/routes/routeDataConnections.py @@ -39,7 +39,7 @@ def get_token_status_for_connection(interface, connection_id: str) -> tuple[str, try: # Query tokens table for the latest token for this connection tokens = interface.db.getRecordset( - table="tokens", + Token, recordFilter={"connectionId": connection_id} ) @@ -93,9 +93,6 @@ async def get_connections( try: interface = getInterface(currentUser) - # Clear connections cache to ensure fresh data - interface.db.clearTableCache("connections") - # SECURITY FIX: All users (including admins) can only see their own connections # This prevents admin from seeing other users' connections and causing confusion connections = interface.getUserConnections(currentUser.id) @@ -179,10 +176,8 @@ async def create_connection( ) # Save connection record - models now handle timestamp serialization automatically - interface.db.recordModify("connections", connection.id, connection.to_dict()) + interface.db.recordModify(UserConnection, connection.id, connection.to_dict()) - # Clear cache to ensure fresh data - interface.db.clearTableCache("connections") return connection @@ -235,10 +230,8 @@ async def update_connection( connection.lastChecked = get_utc_timestamp() # Update connection - models now handle timestamp serialization automatically - interface.db.recordModify("connections", connectionId, connection.to_dict()) + interface.db.recordModify(UserConnection, connectionId, connection.to_dict()) - # Clear cache to ensure fresh data - interface.db.clearTableCache("connections") # Get token status for the updated connection token_status, token_expires_at = get_token_status_for_connection(interface, connectionId) @@ -372,10 +365,8 @@ async def disconnect_service( connection.lastChecked = get_utc_timestamp() # Update connection record - models now handle timestamp serialization automatically - interface.db.recordModify("connections", connectionId, connection.to_dict()) + interface.db.recordModify(UserConnection, connectionId, connection.to_dict()) - # Clear cache to ensure fresh data - interface.db.clearTableCache("connections") return {"message": "Service disconnected successfully"} diff --git a/modules/routes/routeSecurityGoogle.py b/modules/routes/routeSecurityGoogle.py index d3921b62..944f7c0f 100644 --- a/modules/routes/routeSecurityGoogle.py +++ b/modules/routes/routeSecurityGoogle.py @@ -173,7 +173,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse rootInterface = getRootInterface() # Prefer connection flow reuse; fallback to user access token if connection_id: - existing_tokens = rootInterface.db.getRecordset("tokens", recordFilter={ + from modules.interfaces.interfaceAppModel import Token + existing_tokens = rootInterface.db.getRecordset(Token, recordFilter={ "connectionId": connection_id, "authority": AuthAuthority.GOOGLE }) @@ -182,7 +183,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse existing_tokens.sort(key=lambda x: x.get("createdAt", 0), reverse=True) token_response["refresh_token"] = existing_tokens[0].get("tokenRefresh", "") if not token_response.get("refresh_token") and user_id: - existing_access_tokens = rootInterface.db.getRecordset("tokens", recordFilter={ + existing_access_tokens = rootInterface.db.getRecordset(Token, recordFilter={ "userId": user_id, "connectionId": None, "authority": AuthAuthority.GOOGLE @@ -358,10 +359,9 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse connection.externalEmail = user_info.get("email") # Update connection record directly - rootInterface.db.recordModify("connections", connection_id, connection.to_dict()) + from modules.interfaces.interfaceAppModel import UserConnection + rootInterface.db.recordModify(UserConnection, connection_id, connection.to_dict()) - # Clear cache to ensure fresh data - rootInterface.db.clearTableCache("connections") # Save token token = Token( @@ -543,7 +543,7 @@ async def refresh_token( google_connection.status = ConnectionStatus.ACTIVE # Save updated connection - appInterface.db.recordModify("connections", google_connection.id, google_connection.to_dict()) + appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.to_dict()) # Calculate time until expiration current_time = get_utc_timestamp() diff --git a/modules/routes/routeSecurityLocal.py b/modules/routes/routeSecurityLocal.py index 89450a9d..03bdb566 100644 --- a/modules/routes/routeSecurityLocal.py +++ b/modules/routes/routeSecurityLocal.py @@ -52,7 +52,8 @@ async def login( rootInterface = getRootInterface() # Get default mandate ID - defaultMandateId = rootInterface.getInitialId("mandates") + from modules.interfaces.interfaceAppModel import Mandate + defaultMandateId = rootInterface.getInitialId(Mandate) if not defaultMandateId: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -146,7 +147,8 @@ async def register_user( appInterface = getRootInterface() # Get default mandate ID - defaultMandateId = appInterface.getInitialId("mandates") + from modules.interfaces.interfaceAppModel import Mandate + defaultMandateId = appInterface.getInitialId(Mandate) if not defaultMandateId: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, diff --git a/modules/routes/routeSecurityMsft.py b/modules/routes/routeSecurityMsft.py index 5480586d..1d4d8f10 100644 --- a/modules/routes/routeSecurityMsft.py +++ b/modules/routes/routeSecurityMsft.py @@ -309,10 +309,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse connection.externalEmail = user_info.get("mail") # Update connection record directly - rootInterface.db.recordModify("connections", connection_id, connection.to_dict()) + rootInterface.db.recordModify(UserConnection, connection_id, connection.to_dict()) - # Clear cache to ensure fresh data - rootInterface.db.clearTableCache("connections") # Save token @@ -524,7 +522,7 @@ async def refresh_token( msft_connection.status = ConnectionStatus.ACTIVE # Save updated connection - appInterface.db.recordModify("connections", msft_connection.id, msft_connection.to_dict()) + appInterface.db.recordModify(UserConnection, msft_connection.id, msft_connection.to_dict()) # Calculate time until expiration current_time = get_utc_timestamp() diff --git a/modules/routes/routeWorkflows.py b/modules/routes/routeWorkflows.py index 565052ce..81f48205 100644 --- a/modules/routes/routeWorkflows.py +++ b/modules/routes/routeWorkflows.py @@ -57,7 +57,7 @@ async def get_workflows( """Get all workflows for the current user.""" try: appInterface = getInterface(currentUser) - workflows_data = appInterface.getAllWorkflows() + workflows_data = appInterface.getWorkflows() # Convert raw dictionaries to ChatWorkflow objects workflows = [] @@ -136,7 +136,7 @@ async def update_workflow( workflowInterface = getInterface(currentUser) # Get raw workflow data from database to check permissions - workflows = workflowInterface.db.getRecordset("workflows", recordFilter={"id": workflowId}) + workflows = workflowInterface.db.getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) if not workflows: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, @@ -225,7 +225,7 @@ async def get_workflow_logs( ) # Get all logs - allLogs = interfaceChat.getWorkflowLogs(workflowId) + allLogs = interfaceChat.getLogs(workflowId) # Apply selective data transfer if logId is provided if logId: @@ -268,7 +268,7 @@ async def get_workflow_messages( ) # Get all messages - allMessages = interfaceChat.getWorkflowMessages(workflowId) + allMessages = interfaceChat.getMessages(workflowId) # Apply selective data transfer if messageId is provided if messageId: @@ -356,7 +356,7 @@ async def delete_workflow( interfaceChat = getServiceChat(currentUser) # Get raw workflow data from database to check permissions - workflows = interfaceChat.db.getRecordset("workflows", recordFilter={"id": workflowId}) + workflows = interfaceChat.db.getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) if not workflows: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, @@ -419,7 +419,7 @@ async def delete_workflow_message( ) # Delete the message - success = interfaceChat.deleteWorkflowMessage(workflowId, messageId) + success = interfaceChat.deleteMessage(workflowId, messageId) if not success: raise HTTPException( diff --git a/modules/workflow/managerWorkflow.py b/modules/workflow/managerWorkflow.py index ce7a2366..1d50b134 100644 --- a/modules/workflow/managerWorkflow.py +++ b/modules/workflow/managerWorkflow.py @@ -76,12 +76,12 @@ class WorkflowManager: "taskProgress": "pending", "actionProgress": "pending" } - message = self.chatInterface.createWorkflowMessage(stopped_message) + message = self.chatInterface.createMessage(stopped_message) if message: workflow.messages.append(message) # Add log entry - self.chatInterface.createWorkflowLog({ + self.chatInterface.createLog({ "workflowId": workflow.id, "message": "Workflow stopped by user", "type": "warning", @@ -120,12 +120,12 @@ class WorkflowManager: "taskProgress": "fail", "actionProgress": "fail" } - message = self.chatInterface.createWorkflowMessage(error_message) + message = self.chatInterface.createMessage(error_message) if message: workflow.messages.append(message) # Add error log entry - self.chatInterface.createWorkflowLog({ + self.chatInterface.createLog({ "workflowId": workflow.id, "message": f"Workflow failed: {str(e)}", "type": "error", @@ -165,16 +165,19 @@ class WorkflowManager: "actionProgress": "pending" } - # Add documents if any - if userInput.listFileId: - # Process file IDs and add to message data - documents = await self.chatManager.service.processFileIds(userInput.listFileId) - messageData["documents"] = documents - - # Create message using interface - message = self.chatInterface.createWorkflowMessage(messageData) + # Create message first to get messageId + message = self.chatInterface.createMessage(messageData) if message: workflow.messages.append(message) + + # Add documents if any, now with messageId + if userInput.listFileId: + # Process file IDs and add to message data + documents = await self.chatManager.service.processFileIds(userInput.listFileId, message.id) + message.documents = documents + # Update the message with documents in database + self.chatInterface.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]}) + return message else: raise Exception("Failed to create first message") @@ -241,7 +244,7 @@ class WorkflowManager: } # Create message using interface - message = self.chatInterface.createWorkflowMessage(messageData) + message = self.chatInterface.createMessage(messageData) if message: workflow.messages.append(message) @@ -256,7 +259,7 @@ class WorkflowManager: }) # Add completion log entry - self.chatInterface.createWorkflowLog({ + self.chatInterface.createLog({ "workflowId": workflow.id, "message": "Workflow completed", "type": "success", @@ -294,7 +297,7 @@ class WorkflowManager: "taskProgress": "stopped", "actionProgress": "stopped" } - message = self.chatInterface.createWorkflowMessage(stopped_message) + message = self.chatInterface.createMessage(stopped_message) if message: workflow.messages.append(message) @@ -326,7 +329,7 @@ class WorkflowManager: "taskProgress": "stopped", "actionProgress": "stopped" } - message = self.chatInterface.createWorkflowMessage(stopped_message) + message = self.chatInterface.createMessage(stopped_message) if message: workflow.messages.append(message) @@ -341,7 +344,7 @@ class WorkflowManager: }) # Add stopped log entry - self.chatInterface.createWorkflowLog({ + self.chatInterface.createLog({ "workflowId": workflow.id, "message": "Workflow stopped by user", "type": "warning", @@ -368,7 +371,7 @@ class WorkflowManager: "taskProgress": "fail", "actionProgress": "fail" } - message = self.chatInterface.createWorkflowMessage(error_message) + message = self.chatInterface.createMessage(error_message) if message: workflow.messages.append(message) @@ -383,7 +386,7 @@ class WorkflowManager: }) # Add failed log entry - self.chatInterface.createWorkflowLog({ + self.chatInterface.createLog({ "workflowId": workflow.id, "message": f"Workflow failed: {workflow_result.error or 'Unknown error'}", "type": "error", @@ -411,7 +414,7 @@ class WorkflowManager: "actionProgress": "success" } - message = self.chatInterface.createWorkflowMessage(summary_message) + message = self.chatInterface.createMessage(summary_message) if message: workflow.messages.append(message) @@ -426,7 +429,7 @@ class WorkflowManager: }) # Add completion log entry - self.chatInterface.createWorkflowLog({ + self.chatInterface.createLog({ "workflowId": workflow.id, "message": "Workflow completed successfully", "type": "success", @@ -454,7 +457,7 @@ class WorkflowManager: "taskProgress": "fail", "actionProgress": "fail" } - message = self.chatInterface.createWorkflowMessage(error_message) + message = self.chatInterface.createMessage(error_message) if message: workflow.messages.append(message) diff --git a/notes/changelog.txt b/notes/changelog.txt index 60af5270..d4574804 100644 --- a/notes/changelog.txt +++ b/notes/changelog.txt @@ -4,7 +4,8 @@ TODO # System - database - db initialization as separate function to create root mandate, then sysadmin with hashed passwords --> using the connector according to env configuration -- config page for: db reset +- settings: UI page for: db new (delete if exists and init), then to add mandate root and sysadmin, log download --> in the api to add connector settings with the according endpoints +- access model as matrix, not as code --> to have view, add, update, delete with the rights on level table and attribute for all, my (created by me), my mandate (mandate I am in), none (no access) - document handling centralized - ai handling centralized - neutralizer to activate AND put back placeholders to the returned data diff --git a/notes/releasenotes.txt b/notes/releasenotes.txt deleted file mode 100644 index 10d5dadc..00000000 --- a/notes/releasenotes.txt +++ /dev/null @@ -1,8 +0,0 @@ -New features -- Limiter and tracking of ip adress access -- Sessions improved -- user and connection consequently separated -- seamless local and external authorities integration -- audit trail -- nda disclaimer in login window -- CSRF Tokens included in forms \ No newline at end of file diff --git a/query b/query new file mode 100644 index 00000000..a02a1cc7 --- /dev/null +++ b/query @@ -0,0 +1 @@ +postgresql diff --git a/requirements.txt b/requirements.txt index 783db728..695e29fb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -90,4 +90,7 @@ bokeh>=3.2.0,<3.4.0 linkify-it-py>=1.0.0 mdit-py-plugins>=0.3.0 pyviz-comms>=2.0.0 -xyzservices>=2021.09.1 \ No newline at end of file +xyzservices>=2021.09.1 + +# PostgreSQL connector dependencies +psycopg2-binary==2.9.9 diff --git a/test_concurrency_fixes.py b/test_concurrency_fixes.py deleted file mode 100644 index 4613b999..00000000 --- a/test_concurrency_fixes.py +++ /dev/null @@ -1,237 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script to verify concurrency improvements in DatabaseConnector. -This script simulates multiple users accessing the database simultaneously. -""" - -import os -import sys -import time -import threading -import logging -from concurrent.futures import ThreadPoolExecutor, as_completed -import tempfile -import shutil - -# Add the gateway directory to the path -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - -from modules.connectors.connectorDbJson import DatabaseConnector -from modules.connectors.connectorPool import get_connector, return_connector - -# Configure logging -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') -logger = logging.getLogger(__name__) - -def test_concurrent_record_operations(): - """Test concurrent record creation, modification, and deletion.""" - - # Create temporary database directory - temp_dir = tempfile.mkdtemp() - db_host = temp_dir - db_database = "test_db" - - try: - logger.info("Starting concurrency test...") - - def user_operation(user_id: int, operation_count: int = 10): - """Simulate a user performing database operations.""" - try: - # Get a dedicated connector for this user - db = get_connector( - dbHost=db_host, - dbDatabase=db_database, - userId=f"user_{user_id}" - ) - - results = [] - - for i in range(operation_count): - # Create a record - record = { - "id": f"user_{user_id}_record_{i}", - "data": f"User {user_id} data {i}", - "timestamp": time.time() - } - - # Create record - created = db.recordCreate("test_table", record) - results.append(f"Created: {created['id']}") - - # Modify record - record["data"] = f"Modified by user {user_id} - {i}" - modified = db.recordModify("test_table", record["id"], record) - results.append(f"Modified: {modified['id']}") - - # Small delay to increase chance of race conditions - time.sleep(0.001) - - # Return connector to pool - return_connector(db) - - return results - - except Exception as e: - logger.error(f"User {user_id} error: {e}") - return [f"Error: {e}"] - - # Test with multiple concurrent users - num_users = 20 - operations_per_user = 5 - - logger.info(f"Testing with {num_users} users, {operations_per_user} operations each") - - start_time = time.time() - - with ThreadPoolExecutor(max_workers=num_users) as executor: - # Submit all user operations - futures = [ - executor.submit(user_operation, user_id, operations_per_user) - for user_id in range(num_users) - ] - - # Collect results - all_results = [] - for future in as_completed(futures): - try: - result = future.result() - all_results.extend(result) - except Exception as e: - logger.error(f"Future error: {e}") - - end_time = time.time() - - # Verify data integrity - db = get_connector(dbHost=db_host, dbDatabase=db_database, userId="verifier") - - # Check that all records exist and are consistent - all_records = db.getRecordset("test_table") - expected_count = num_users * operations_per_user - - logger.info(f"Expected records: {expected_count}") - logger.info(f"Actual records: {len(all_records)}") - logger.info(f"Test completed in {end_time - start_time:.2f} seconds") - - # Check for data consistency - record_ids = set(record["id"] for record in all_records) - expected_ids = set(f"user_{user_id}_record_{i}" for user_id in range(num_users) for i in range(operations_per_user)) - - missing_ids = expected_ids - record_ids - extra_ids = record_ids - expected_ids - - if missing_ids: - logger.error(f"Missing records: {missing_ids}") - if extra_ids: - logger.error(f"Extra records: {extra_ids}") - - # Check for data corruption (records with wrong user data) - corrupted_records = [] - for record in all_records: - record_id = record["id"] - user_id = int(record_id.split("_")[1]) - if f"Modified by user {user_id}" not in record.get("data", ""): - corrupted_records.append(record_id) - - if corrupted_records: - logger.error(f"Corrupted records: {corrupted_records}") - - success = len(missing_ids) == 0 and len(extra_ids) == 0 and len(corrupted_records) == 0 - - if success: - logger.info("✅ Concurrency test PASSED - No data corruption detected") - else: - logger.error("❌ Concurrency test FAILED - Data corruption detected") - - return success - - finally: - # Cleanup - try: - shutil.rmtree(temp_dir) - logger.info("Cleaned up temporary directory") - except Exception as e: - logger.error(f"Error cleaning up: {e}") - -def test_metadata_consistency(): - """Test that metadata operations are atomic.""" - - temp_dir = tempfile.mkdtemp() - db_host = temp_dir - db_database = "test_metadata" - - try: - logger.info("Testing metadata consistency...") - - def concurrent_metadata_operations(user_id: int): - """Perform concurrent metadata operations.""" - db = get_connector( - dbHost=db_host, - dbDatabase=db_database, - userId=f"user_{user_id}" - ) - - try: - # Create multiple records rapidly - for i in range(10): - record = { - "id": f"user_{user_id}_meta_{i}", - "data": f"Metadata test {user_id}-{i}" - } - db.recordCreate("metadata_test", record) - time.sleep(0.001) # Small delay - - return True - except Exception as e: - logger.error(f"Metadata test error for user {user_id}: {e}") - return False - finally: - return_connector(db) - - # Run concurrent metadata operations - with ThreadPoolExecutor(max_workers=10) as executor: - futures = [executor.submit(concurrent_metadata_operations, i) for i in range(10)] - results = [future.result() for future in as_completed(futures)] - - # Verify metadata consistency - db = get_connector(dbHost=db_host, dbDatabase=db_database, userId="verifier") - records = db.getRecordset("metadata_test") - - # Check that metadata is consistent - metadata = db._loadTableMetadata("metadata_test") - expected_count = len(records) - actual_count = len(metadata["recordIds"]) - - logger.info(f"Expected record count: {expected_count}") - logger.info(f"Metadata record count: {actual_count}") - - success = expected_count == actual_count - - if success: - logger.info("✅ Metadata consistency test PASSED") - else: - logger.error("❌ Metadata consistency test FAILED") - - return success - - finally: - try: - shutil.rmtree(temp_dir) - except Exception as e: - logger.error(f"Error cleaning up: {e}") - -if __name__ == "__main__": - logger.info("Starting concurrency tests...") - - # Test 1: Concurrent record operations - test1_passed = test_concurrent_record_operations() - - # Test 2: Metadata consistency - test2_passed = test_metadata_consistency() - - # Overall result - if test1_passed and test2_passed: - logger.info("🎉 All concurrency tests PASSED!") - sys.exit(0) - else: - logger.error("💥 Some concurrency tests FAILED!") - sys.exit(1) diff --git a/tests/connectors/__init__.py b/tests/connectors/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/connectors/test_connector_tavily.py b/tests/connectors/test_connector_tavily.py deleted file mode 100644 index 23253cba..00000000 --- a/tests/connectors/test_connector_tavily.py +++ /dev/null @@ -1,108 +0,0 @@ -"""Tests for Tavliy web search.""" - -import pytest -import logging - -from modules.interfaces.interfaceChatModel import ActionResult -from gateway.modules.interfaces.interfaceWebModel import ( - WebSearchRequest, - WebCrawlRequest, - WebScrapeRequest, -) -from gateway.modules.connectors.connectorWebTavily import ConnectorTavily - -logger = logging.getLogger(__name__) - - -@pytest.mark.asyncio -@pytest.mark.expensive -async def test_tavily_connector_search_test_live_api(): - logger.info("Testing Tavliy connector search with live API calls") - - # Test request - request = WebSearchRequest(query="How old is the Earth?", max_results=5) - - # Tavily instance - connectorWebTavily = await ConnectorTavily.create() - - # Search test - action_result = await connectorWebTavily.search_urls(request=request) - - # Check results - assert isinstance(action_result, ActionResult) - - logger.info("=" * 20) - logger.info(f"Action result success status: {action_result.success}") - logger.info(f"Action result error: {action_result.error}") - logger.info(f"Action result label: {action_result.resultLabel}") - - logger.info("Documents:") - for doc in action_result.documents: - logger.info("-" * 10) - logger.info(f" - Document Name: {doc.documentName}") - logger.info(f" - Document Mime Type: {doc.mimeType}") - logger.info(f" - Document Data: {doc.documentData}") - - -@pytest.mark.asyncio -@pytest.mark.expensive -async def test_tavily_connector_crawl_test_live_api(): - logger.info("Testing Tavily connector crawl with live API calls") - - # Test request - urls = [ - "https://en.wikipedia.org/wiki/Earth", - "https://valueon.ch", - ] - request = WebCrawlRequest(urls=urls) - - # Tavily instance - connectorWebTavily = await ConnectorTavily.create() - - # Crawl test - action_result = await connectorWebTavily.crawl_urls(request=request) - - # Check results - assert isinstance(action_result, ActionResult) - - logger.info("=" * 20) - logger.info(f"Action result success status: {action_result.success}") - logger.info(f"Action result error: {action_result.error}") - logger.info(f"Action result label: {action_result.resultLabel}") - - logger.info("Documents:") - for doc in action_result.documents: - logger.info("-" * 10) - logger.info(f" - Document Name: {doc.documentName}") - logger.info(f" - Document Mime Type: {doc.mimeType}") - logger.info(f" - Document Data: {doc.documentData}") - - -@pytest.mark.asyncio -@pytest.mark.expensive -async def test_tavily_connector_scrape_test_live_api(): - logger.info("Testing Tavily connector scrape with live API calls") - - # Test request with query - request = WebScrapeRequest(query="How old is the Earth?", max_results=3) - - # Tavily instance - connectorWebTavily = await ConnectorTavily.create() - - # Scrape test - action_result = await connectorWebTavily.scrape(request=request) - - # Check results - assert isinstance(action_result, ActionResult) - - logger.info("=" * 20) - logger.info(f"Action result success status: {action_result.success}") - logger.info(f"Action result error: {action_result.error}") - logger.info(f"Action result label: {action_result.resultLabel}") - - logger.info("Documents:") - for doc in action_result.documents: - logger.info("-" * 10) - logger.info(f" - Document Name: {doc.documentName}") - logger.info(f" - Document Mime Type: {doc.mimeType}") - logger.info(f" - Document Data: {doc.documentData}") diff --git a/test_graph_search.py b/tests/test_graph_search.py similarity index 100% rename from test_graph_search.py rename to tests/test_graph_search.py diff --git a/test_neutralizer/apprun.py b/tests/test_neutralizer/apprun.py similarity index 100% rename from test_neutralizer/apprun.py rename to tests/test_neutralizer/apprun.py diff --git a/test_neutralizer/logs/log_mapping.csv b/tests/test_neutralizer/logs/log_mapping.csv similarity index 100% rename from test_neutralizer/logs/log_mapping.csv rename to tests/test_neutralizer/logs/log_mapping.csv diff --git a/test_neutralizer/logs/log_replacements.csv b/tests/test_neutralizer/logs/log_replacements.csv similarity index 100% rename from test_neutralizer/logs/log_replacements.csv rename to tests/test_neutralizer/logs/log_replacements.csv diff --git a/test_neutralizer/neutralizer.py b/tests/test_neutralizer/neutralizer.py similarity index 100% rename from test_neutralizer/neutralizer.py rename to tests/test_neutralizer/neutralizer.py diff --git a/test_neutralizer/output/neutralized_Case.md b/tests/test_neutralizer/output/neutralized_Case.md similarity index 100% rename from test_neutralizer/output/neutralized_Case.md rename to tests/test_neutralizer/output/neutralized_Case.md diff --git a/test_neutralizer/output/neutralized_customers.csv b/tests/test_neutralizer/output/neutralized_customers.csv similarity index 100% rename from test_neutralizer/output/neutralized_customers.csv rename to tests/test_neutralizer/output/neutralized_customers.csv diff --git a/test_neutralizer/output/neutralized_cv_lara_meier.txt b/tests/test_neutralizer/output/neutralized_cv_lara_meier.txt similarity index 100% rename from test_neutralizer/output/neutralized_cv_lara_meier.txt rename to tests/test_neutralizer/output/neutralized_cv_lara_meier.txt diff --git a/test_neutralizer/output/neutralized_employees.csv b/tests/test_neutralizer/output/neutralized_employees.csv similarity index 100% rename from test_neutralizer/output/neutralized_employees.csv rename to tests/test_neutralizer/output/neutralized_employees.csv diff --git a/test_neutralizer/output/neutralized_english.txt b/tests/test_neutralizer/output/neutralized_english.txt similarity index 100% rename from test_neutralizer/output/neutralized_english.txt rename to tests/test_neutralizer/output/neutralized_english.txt diff --git a/test_neutralizer/output/neutralized_example.json b/tests/test_neutralizer/output/neutralized_example.json similarity index 100% rename from test_neutralizer/output/neutralized_example.json rename to tests/test_neutralizer/output/neutralized_example.json diff --git a/test_neutralizer/output/neutralized_example.xml b/tests/test_neutralizer/output/neutralized_example.xml similarity index 100% rename from test_neutralizer/output/neutralized_example.xml rename to tests/test_neutralizer/output/neutralized_example.xml diff --git a/test_neutralizer/output/neutralized_french.txt b/tests/test_neutralizer/output/neutralized_french.txt similarity index 100% rename from test_neutralizer/output/neutralized_french.txt rename to tests/test_neutralizer/output/neutralized_french.txt diff --git a/test_neutralizer/output/neutralized_german.txt b/tests/test_neutralizer/output/neutralized_german.txt similarity index 100% rename from test_neutralizer/output/neutralized_german.txt rename to tests/test_neutralizer/output/neutralized_german.txt diff --git a/test_neutralizer/output/neutralized_geschaeftsstrategie.txt b/tests/test_neutralizer/output/neutralized_geschaeftsstrategie.txt similarity index 100% rename from test_neutralizer/output/neutralized_geschaeftsstrategie.txt rename to tests/test_neutralizer/output/neutralized_geschaeftsstrategie.txt diff --git a/test_neutralizer/output/neutralized_geschäfte.csv b/tests/test_neutralizer/output/neutralized_geschäfte.csv similarity index 100% rename from test_neutralizer/output/neutralized_geschäfte.csv rename to tests/test_neutralizer/output/neutralized_geschäfte.csv diff --git a/test_neutralizer/output/neutralized_italian.txt b/tests/test_neutralizer/output/neutralized_italian.txt similarity index 100% rename from test_neutralizer/output/neutralized_italian.txt rename to tests/test_neutralizer/output/neutralized_italian.txt diff --git a/test_neutralizer/output/neutralized_kunden.csv b/tests/test_neutralizer/output/neutralized_kunden.csv similarity index 100% rename from test_neutralizer/output/neutralized_kunden.csv rename to tests/test_neutralizer/output/neutralized_kunden.csv diff --git a/test_neutralizer/output/neutralized_mitarbeiter.csv b/tests/test_neutralizer/output/neutralized_mitarbeiter.csv similarity index 100% rename from test_neutralizer/output/neutralized_mitarbeiter.csv rename to tests/test_neutralizer/output/neutralized_mitarbeiter.csv diff --git a/test_neutralizer/output/neutralized_swiss.txt b/tests/test_neutralizer/output/neutralized_swiss.txt similarity index 100% rename from test_neutralizer/output/neutralized_swiss.txt rename to tests/test_neutralizer/output/neutralized_swiss.txt diff --git a/test_neutralizer/output/neutralized_transactions.csv b/tests/test_neutralizer/output/neutralized_transactions.csv similarity index 100% rename from test_neutralizer/output/neutralized_transactions.csv rename to tests/test_neutralizer/output/neutralized_transactions.csv diff --git a/test_neutralizer/patterns.py b/tests/test_neutralizer/patterns.py similarity index 100% rename from test_neutralizer/patterns.py rename to tests/test_neutralizer/patterns.py diff --git a/test_neutralizer/testdata/Case.md b/tests/test_neutralizer/testdata/Case.md similarity index 100% rename from test_neutralizer/testdata/Case.md rename to tests/test_neutralizer/testdata/Case.md diff --git a/test_neutralizer/testdata/customers.csv b/tests/test_neutralizer/testdata/customers.csv similarity index 100% rename from test_neutralizer/testdata/customers.csv rename to tests/test_neutralizer/testdata/customers.csv diff --git a/test_neutralizer/testdata/cv_lara_meier.txt b/tests/test_neutralizer/testdata/cv_lara_meier.txt similarity index 100% rename from test_neutralizer/testdata/cv_lara_meier.txt rename to tests/test_neutralizer/testdata/cv_lara_meier.txt diff --git a/test_neutralizer/testdata/employees.csv b/tests/test_neutralizer/testdata/employees.csv similarity index 100% rename from test_neutralizer/testdata/employees.csv rename to tests/test_neutralizer/testdata/employees.csv diff --git a/test_neutralizer/testdata/english.txt b/tests/test_neutralizer/testdata/english.txt similarity index 100% rename from test_neutralizer/testdata/english.txt rename to tests/test_neutralizer/testdata/english.txt diff --git a/test_neutralizer/testdata/example.json b/tests/test_neutralizer/testdata/example.json similarity index 100% rename from test_neutralizer/testdata/example.json rename to tests/test_neutralizer/testdata/example.json diff --git a/test_neutralizer/testdata/example.xml b/tests/test_neutralizer/testdata/example.xml similarity index 100% rename from test_neutralizer/testdata/example.xml rename to tests/test_neutralizer/testdata/example.xml diff --git a/test_neutralizer/testdata/french.txt b/tests/test_neutralizer/testdata/french.txt similarity index 100% rename from test_neutralizer/testdata/french.txt rename to tests/test_neutralizer/testdata/french.txt diff --git a/test_neutralizer/testdata/german.txt b/tests/test_neutralizer/testdata/german.txt similarity index 100% rename from test_neutralizer/testdata/german.txt rename to tests/test_neutralizer/testdata/german.txt diff --git a/test_neutralizer/testdata/geschaeftsstrategie.txt b/tests/test_neutralizer/testdata/geschaeftsstrategie.txt similarity index 100% rename from test_neutralizer/testdata/geschaeftsstrategie.txt rename to tests/test_neutralizer/testdata/geschaeftsstrategie.txt diff --git a/test_neutralizer/testdata/geschäfte.csv b/tests/test_neutralizer/testdata/geschäfte.csv similarity index 100% rename from test_neutralizer/testdata/geschäfte.csv rename to tests/test_neutralizer/testdata/geschäfte.csv diff --git a/test_neutralizer/testdata/italian.txt b/tests/test_neutralizer/testdata/italian.txt similarity index 100% rename from test_neutralizer/testdata/italian.txt rename to tests/test_neutralizer/testdata/italian.txt diff --git a/test_neutralizer/testdata/kunden.csv b/tests/test_neutralizer/testdata/kunden.csv similarity index 100% rename from test_neutralizer/testdata/kunden.csv rename to tests/test_neutralizer/testdata/kunden.csv diff --git a/test_neutralizer/testdata/mitarbeiter.csv b/tests/test_neutralizer/testdata/mitarbeiter.csv similarity index 100% rename from test_neutralizer/testdata/mitarbeiter.csv rename to tests/test_neutralizer/testdata/mitarbeiter.csv diff --git a/test_neutralizer/testdata/swiss.txt b/tests/test_neutralizer/testdata/swiss.txt similarity index 100% rename from test_neutralizer/testdata/swiss.txt rename to tests/test_neutralizer/testdata/swiss.txt diff --git a/test_neutralizer/testdata/transactions.csv b/tests/test_neutralizer/testdata/transactions.csv similarity index 100% rename from test_neutralizer/testdata/transactions.csv rename to tests/test_neutralizer/testdata/transactions.csv diff --git a/test_neutralizer/zdocu.html b/tests/test_neutralizer/zdocu.html similarity index 100% rename from test_neutralizer/zdocu.html rename to tests/test_neutralizer/zdocu.html From 748093b48e9dd70a3fd5f15d29400e7bc531c975 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 8 Sep 2025 21:55:21 +0200 Subject: [PATCH 15/27] database running --- modules/chat/handling/handlingTasks.py | 60 +++--- modules/connectors/connectorDbPostgre.py | 215 ++++++++++--------- modules/interfaces/interfaceAppObjects.py | 23 +- modules/interfaces/interfaceChatModel.py | 3 + modules/interfaces/interfaceChatObjects.py | 114 ++++++++-- modules/interfaces/interfaceTicketObjects.py | 4 +- modules/routes/routeJira.py | 4 +- modules/routes/routeWorkflows.py | 67 ++++-- modules/shared/timezoneUtils.py | 7 +- notes/changelog.txt | 6 + 10 files changed, 302 insertions(+), 201 deletions(-) diff --git a/modules/chat/handling/handlingTasks.py b/modules/chat/handling/handlingTasks.py index 20cfbe13..cfc31401 100644 --- a/modules/chat/handling/handlingTasks.py +++ b/modules/chat/handling/handlingTasks.py @@ -1098,36 +1098,36 @@ class HandlingTasks: ) result_label = action.execResultLabel - # Process documents from the action result - created_documents = [] - if result.success: - action.setSuccess() - # Extract result text from documents if available, otherwise use empty string - action.result = "" - if result.documents and len(result.documents) > 0: - # Try to get text content from the first document - first_doc = result.documents[0] - if isinstance(first_doc.documentData, dict): - action.result = first_doc.documentData.get("result", "") - elif isinstance(first_doc.documentData, str): - action.result = first_doc.documentData - # Preserve the action's execResultLabel for document routing - # Action methods should NOT return resultLabel - this is managed by the action handler - if not action.execResultLabel: - logger.warning(f"Action {action.execMethod}.{action.execAction} has no execResultLabel set") - # Always use the action's execResultLabel for message creation to ensure proper document routing - message_result_label = action.execResultLabel - - # Create message first to get messageId, then create documents with messageId - message = await self.createActionMessage(action, result, workflow, message_result_label, [], task_step, task_index) - if message: - # Now create documents with the messageId - created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow, message.id) - # Update the message with the created documents - if created_documents: - message.documents = created_documents - # Update the message in the database - self.chatInterface.updateMessage(message.id, {"documents": [doc.to_dict() for doc in created_documents]}) + # Process documents from the action result + created_documents = [] + if result.success: + action.setSuccess() + # Extract result text from documents if available, otherwise use empty string + action.result = "" + if result.documents and len(result.documents) > 0: + # Try to get text content from the first document + first_doc = result.documents[0] + if isinstance(first_doc.documentData, dict): + action.result = first_doc.documentData.get("result", "") + elif isinstance(first_doc.documentData, str): + action.result = first_doc.documentData + # Preserve the action's execResultLabel for document routing + # Action methods should NOT return resultLabel - this is managed by the action handler + if not action.execResultLabel: + logger.warning(f"Action {action.execMethod}.{action.execAction} has no execResultLabel set") + # Always use the action's execResultLabel for message creation to ensure proper document routing + message_result_label = action.execResultLabel + + # Create message first to get messageId, then create documents with messageId + message = await self.createActionMessage(action, result, workflow, message_result_label, [], task_step, task_index) + if message: + # Now create documents with the messageId + created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow, message.id) + # Update the message with the created documents + if created_documents: + message.documents = created_documents + # Update the message in the database + self.chatInterface.updateMessage(message.id, {"documents": [doc.dict() for doc in created_documents]}) # Log action results logger.info(f"Action completed successfully") diff --git a/modules/connectors/connectorDbPostgre.py b/modules/connectors/connectorDbPostgre.py index eea519e8..dfee166a 100644 --- a/modules/connectors/connectorDbPostgre.py +++ b/modules/connectors/connectorDbPostgre.py @@ -40,7 +40,7 @@ def _get_model_fields(model_class) -> Dict[str, str]: elif field_type == int: fields[field_name] = 'INTEGER' elif field_type == float: - fields[field_name] = 'REAL' + fields[field_name] = 'DOUBLE PRECISION' elif field_type == bool: fields[field_name] = 'BOOLEAN' else: @@ -80,7 +80,6 @@ class DatabaseConnector: self._systemTableName = "_system" self._initializeSystemTable() - logger.debug(f"Context: userId={self.userId}") def initDbSystem(self): """Initialize the database system - creates database and tables.""" @@ -154,8 +153,8 @@ class DatabaseConnector: id SERIAL PRIMARY KEY, table_name VARCHAR(255) UNIQUE NOT NULL, initial_id VARCHAR(255) NOT NULL, - _createdAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - _modifiedAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP + _createdAt DOUBLE PRECISION, + _modifiedAt DOUBLE PRECISION ) """) @@ -245,8 +244,8 @@ class DatabaseConnector: for table_name, initial_id in data.items(): cursor.execute(""" INSERT INTO "_system" ("table_name", "initial_id", "_modifiedAt") - VALUES (%s, %s, CURRENT_TIMESTAMP) - """, (table_name, initial_id)) + VALUES (%s, %s, %s) + """, (table_name, initial_id, get_utc_timestamp())) self.connection.commit() return True @@ -271,8 +270,8 @@ class DatabaseConnector: CREATE TABLE "{self._systemTableName}" ( "table_name" VARCHAR(255) PRIMARY KEY, "initial_id" VARCHAR(255), - "_createdAt" TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - "_modifiedAt" TIMESTAMP DEFAULT CURRENT_TIMESTAMP + "_createdAt" DOUBLE PRECISION, + "_modifiedAt" DOUBLE PRECISION ) """) logger.info("System table created successfully") @@ -285,10 +284,9 @@ class DatabaseConnector: existing_columns = [row['column_name'] for row in cursor.fetchall()] if '_modifiedAt' not in existing_columns: - cursor.execute(f'ALTER TABLE "{self._systemTableName}" ADD COLUMN "_modifiedAt" TIMESTAMP DEFAULT CURRENT_TIMESTAMP') + cursor.execute(f'ALTER TABLE "{self._systemTableName}" ADD COLUMN "_modifiedAt" DOUBLE PRECISION') logger.info("Added _modifiedAt column to existing system table") - logger.debug("System table already exists") return True except Exception as e: @@ -313,11 +311,9 @@ class DatabaseConnector: WHERE LOWER(table_name) = LOWER(%s) AND table_schema = 'public' ''', (table,)) exists = cursor.fetchone()['count'] > 0 - logger.debug(f"Table {table} exists check: {exists}") if not exists: # Create table from Pydantic model - logger.debug(f"Creating table {table} with model {model_class}") self._create_table_from_model(cursor, table, model_class) logger.info(f"Created table '{table}' with columns from Pydantic model") @@ -333,7 +329,6 @@ class DatabaseConnector: def _create_table_from_model(self, cursor, table: str, model_class: type) -> None: """Create table with columns matching Pydantic model fields.""" fields = _get_model_fields(model_class) - logger.debug(f"Creating table {table} with fields: {fields}") # Build column definitions with quoted identifiers to preserve exact case columns = ['"id" VARCHAR(255) PRIMARY KEY'] @@ -343,15 +338,14 @@ class DatabaseConnector: # Add metadata columns columns.extend([ - '"_createdAt" TIMESTAMP DEFAULT CURRENT_TIMESTAMP', - '"_modifiedAt" TIMESTAMP DEFAULT CURRENT_TIMESTAMP', + '"_createdAt" DOUBLE PRECISION', + '"_modifiedAt" DOUBLE PRECISION', '"_createdBy" VARCHAR(255)', '"_modifiedBy" VARCHAR(255)' ]) # Create table sql = f'CREATE TABLE IF NOT EXISTS "{table}" ({", ".join(columns)})' - logger.debug(f"Executing SQL: {sql}") cursor.execute(sql) # Create indexes for foreign keys @@ -366,8 +360,6 @@ class DatabaseConnector: fields = _get_model_fields(model_class) columns = ['id'] + [field for field in fields.keys() if field != 'id'] + ['_createdAt', '_createdBy', '_modifiedAt', '_modifiedBy'] - logger.debug(f"Table {table} columns: {columns}") - logger.debug(f"Record data: {record}") if not columns: logger.error(f"No columns found for table {table}") @@ -384,17 +376,12 @@ class DatabaseConnector: for col in columns: value = filtered_record.get(col) - # Convert timestamp fields to proper PostgreSQL format + # Handle timestamp fields - store as Unix timestamps (floats) for consistency if col in ['_createdAt', '_modifiedAt'] and value is not None: - if isinstance(value, (int, float)): - # Convert Unix timestamp to PostgreSQL timestamp - from datetime import datetime - value = datetime.fromtimestamp(value) - elif isinstance(value, str): - # If it's already a string, try to parse it + if isinstance(value, str): + # Try to parse string as timestamp try: - from datetime import datetime - value = datetime.fromtimestamp(float(value)) + value = float(value) except: pass # Keep as string if parsing fails @@ -424,7 +411,6 @@ class DatabaseConnector: values.append(value) - logger.debug(f"Values to insert: {values}") # Build INSERT/UPDATE with quoted identifiers col_names = ', '.join([f'"{col}"' for col in columns]) @@ -432,7 +418,6 @@ class DatabaseConnector: updates = ', '.join([f'"{col}" = EXCLUDED."{col}"' for col in columns[1:] if col not in ['_createdAt', '_createdBy']]) sql = f'INSERT INTO "{table}" ({col_names}) VALUES ({placeholders}) ON CONFLICT ("id") DO UPDATE SET {updates}' - logger.debug(f"SQL: {sql}") cursor.execute(sql, values) @@ -454,6 +439,7 @@ class DatabaseConnector: record = dict(row) fields = _get_model_fields(model_class) + # Parse JSONB fields back to Python objects for field_name, field_type in fields.items(): if field_type == 'JSONB' and field_name in record and record[field_name] is not None: @@ -527,22 +513,31 @@ class DatabaseConnector: fields = _get_model_fields(model_class) for record in records: for field_name, field_type in fields.items(): - if field_type == 'JSONB' and field_name in record and record[field_name] is not None: - import json - try: - if isinstance(record[field_name], str): - # Parse JSON string back to Python object - record[field_name] = json.loads(record[field_name]) - elif isinstance(record[field_name], (dict, list)): - # Already a Python object, keep as is - pass + if field_type == 'JSONB' and field_name in record: + if record[field_name] is None: + # Convert None to appropriate default based on field name + if field_name in ['logs', 'messages', 'tasks', 'expectedDocumentFormats', 'resultDocuments']: + record[field_name] = [] + elif field_name in ['execParameters', 'stats']: + record[field_name] = {} else: - # Try to parse as JSON - record[field_name] = json.loads(str(record[field_name])) - except (json.JSONDecodeError, TypeError, ValueError): - # If parsing fails, keep as string - logger.warning(f"Could not parse JSONB field {field_name}, keeping as string: {record[field_name]}") - pass + record[field_name] = None + else: + import json + try: + if isinstance(record[field_name], str): + # Parse JSON string back to Python object + record[field_name] = json.loads(record[field_name]) + elif isinstance(record[field_name], (dict, list)): + # Already a Python object, keep as is + pass + else: + # Try to parse as JSON + record[field_name] = json.loads(str(record[field_name])) + except (json.JSONDecodeError, TypeError, ValueError): + # If parsing fails, keep as string + logger.warning(f"Could not parse JSONB field {field_name}, keeping as string: {record[field_name]}") + pass return records except Exception as e: @@ -550,35 +545,6 @@ class DatabaseConnector: return [] - def _applyRecordFilter(self, records: List[Dict[str, Any]], recordFilter: Dict[str, Any] = None) -> List[Dict[str, Any]]: - """Applies a record filter to the records""" - if not recordFilter: - return records - - filteredRecords = [] - - for record in records: - match = True - - for field, value in recordFilter.items(): - # Check if the field exists - if field not in record: - match = False - break - - # Convert both values to strings for comparison - recordValue = str(record[field]) - filterValue = str(value) - - # Direct string comparison - if recordValue != filterValue: - match = False - break - - if match: - filteredRecords.append(record) - - return filteredRecords def _registerInitialId(self, table: str, initialId: str) -> bool: """Registers the initial ID for a table.""" @@ -603,7 +569,6 @@ class DatabaseConnector: logger.info(f"Initial ID updated from {existingInitialId} to {initialId} for table {table}") return success else: - logger.debug(f"Initial ID {existingInitialId} for table {table} already exists and is valid") return True except Exception as e: logger.error(f"Error registering the initial ID for table {table}: {e}") @@ -699,34 +664,76 @@ class DatabaseConnector: """Returns a list of records from a table, filtered by criteria.""" table = model_class.__name__ - # If we have specific record IDs in the filter, only load those records - if recordFilter and "id" in recordFilter: - recordId = recordFilter["id"] - record = self._loadRecord(model_class, recordId) - if record: - records = [record] - else: + try: + if not self._ensureTableExists(model_class): return [] - else: - # Load all records if no specific ID filter - records = self._loadTable(model_class) - - # Apply recordFilter if available - if recordFilter: - records = self._applyRecordFilter(records, recordFilter) - - # If fieldFilter is available, reduce the fields - if fieldFilter and isinstance(fieldFilter, list): - result = [] - for record in records: - filteredRecord = {} - for field in fieldFilter: - if field in record: - filteredRecord[field] = record[field] - result.append(filteredRecord) - return result - - return records + + # Build WHERE clause from recordFilter + where_conditions = [] + where_values = [] + + if recordFilter: + for field, value in recordFilter.items(): + where_conditions.append(f'"{field}" = %s') + where_values.append(value) + + # Build the query + if where_conditions: + where_clause = " WHERE " + " AND ".join(where_conditions) + else: + where_clause = "" + + query = f'SELECT * FROM "{table}"{where_clause} ORDER BY "id"' + + with self.connection.cursor() as cursor: + cursor.execute(query, where_values) + records = [dict(row) for row in cursor.fetchall()] + + # Handle JSONB fields for all records + fields = _get_model_fields(model_class) + for record in records: + for field_name, field_type in fields.items(): + if field_type == 'JSONB' and field_name in record: + if record[field_name] is None: + # Convert None to appropriate default based on field name + if field_name in ['logs', 'messages', 'tasks', 'expectedDocumentFormats', 'resultDocuments']: + record[field_name] = [] + elif field_name in ['execParameters', 'stats']: + record[field_name] = {} + else: + record[field_name] = None + else: + import json + try: + if isinstance(record[field_name], str): + # Parse JSON string back to Python object + record[field_name] = json.loads(record[field_name]) + elif isinstance(record[field_name], (dict, list)): + # Already a Python object, keep as is + pass + else: + # Try to parse as JSON + record[field_name] = json.loads(str(record[field_name])) + except (json.JSONDecodeError, TypeError, ValueError): + # If parsing fails, keep as string + logger.warning(f"Could not parse JSONB field {field_name}, keeping as string: {record[field_name]}") + pass + + # If fieldFilter is available, reduce the fields + if fieldFilter and isinstance(fieldFilter, list): + result = [] + for record in records: + filteredRecord = {} + for field in fieldFilter: + if field in record: + filteredRecord[field] = record[field] + result.append(filteredRecord) + return result + + return records + except Exception as e: + logger.error(f"Error loading records from table {table}: {e}") + return [] def recordCreate(self, model_class: type, record: Union[Dict[str, Any], BaseModel]) -> Dict[str, Any]: """Creates a new record in a table based on Pydantic model class.""" @@ -793,7 +800,7 @@ class DatabaseConnector: with self.connection.cursor() as cursor: # Check if record exists - cursor.execute(f"SELECT id FROM {table} WHERE id = %s", (recordId,)) + cursor.execute(f'SELECT "id" FROM "{table}" WHERE "id" = %s', (recordId,)) if not cursor.fetchone(): return False @@ -804,7 +811,7 @@ class DatabaseConnector: logger.info(f"Initial ID {recordId} for table {table} has been removed from the system table") # Delete the record - cursor.execute(f"DELETE FROM {table} WHERE id = %s", (recordId,)) + cursor.execute(f'DELETE FROM "{table}" WHERE "id" = %s', (recordId,)) # No cache to update - database handles consistency @@ -822,14 +829,12 @@ class DatabaseConnector: table = model_class.__name__ systemData = self._loadSystemTable() initialId = systemData.get(table) - logger.debug(f"Initial ID for table '{table}': {initialId}") return initialId def close(self): """Close the database connection.""" if hasattr(self, 'connection') and self.connection and not self.connection.closed: self.connection.close() - logger.debug("Database connection closed") def __del__(self): """Cleanup method to close connection.""" diff --git a/modules/interfaces/interfaceAppObjects.py b/modules/interfaces/interfaceAppObjects.py index 75af8878..c71e0c03 100644 --- a/modules/interfaces/interfaceAppObjects.py +++ b/modules/interfaces/interfaceAppObjects.py @@ -728,7 +728,6 @@ class AppObjects: if old_token["id"] != token.id: # Don't delete the new token if it already exists self.db.recordDelete(Token, old_token["id"]) deleted_count += 1 - logger.debug(f"Deleted old access token {old_token['id']} for user {self.currentUser.id} and authority {token.authority}") if deleted_count > 0: logger.info(f"Replaced {deleted_count} old access tokens for user {self.currentUser.id} and authority {token.authority}") @@ -781,7 +780,6 @@ class AppObjects: if old_token["id"] != token.id: # Don't delete the new token if it already exists self.db.recordDelete(Token, old_token["id"]) deleted_count += 1 - logger.debug(f"Deleted old token {old_token['id']} for connectionId {token.connectionId}") if deleted_count > 0: logger.info(f"Replaced {deleted_count} old tokens for connectionId {token.connectionId}") @@ -864,17 +862,6 @@ class AppObjects: "connectionId": connectionId }) - # Debug: Log what we found - logger.debug(f"getConnectionToken: Found {len(tokens)} tokens for connectionId {connectionId}") - if tokens: - for i, token in enumerate(tokens): - logger.debug(f"getConnectionToken: Token {i}: id={token.get('id')}, expiresAt={token.get('expiresAt')}, createdAt={token.get('createdAt')}") - else: - # Debug: Check if there are any tokens at all in the database - all_tokens = self.db.getRecordset(Token, recordFilter={}) - logger.debug(f"getConnectionToken: No tokens found for connectionId {connectionId}. Total tokens in database: {len(all_tokens)}") - if all_tokens: - logger.debug(f"getConnectionToken: Sample tokens: {[{'id': t.get('id'), 'connectionId': t.get('connectionId'), 'authority': t.get('authority')} for t in all_tokens[:3]]}") if not tokens: logger.warning(f"No connection token found for connectionId: {connectionId}") @@ -890,25 +877,21 @@ class AppObjects: if latest_token.expiresAt and latest_token.expiresAt < (current_time + thirty_minutes): if auto_refresh: - logger.debug(f"getConnectionToken: Token expires soon, attempting refresh. expiresAt: {latest_token.expiresAt}, current_time: {current_time}") - # Import TokenManager here to avoid circular imports from modules.security.tokenManager import TokenManager token_manager = TokenManager() # Try to refresh the token - logger.debug(f"getConnectionToken: Calling token_manager.refresh_token for token {latest_token.id}") refreshed_token = token_manager.refresh_token(latest_token) if refreshed_token: - logger.debug(f"getConnectionToken: Token refresh successful, saving new token {refreshed_token.id}") # Save the new token (which will automatically replace old ones) self.saveConnectionToken(refreshed_token) logger.info(f"Proactively refreshed connection token for connectionId {connectionId} (expired in {latest_token.expiresAt - current_time} seconds)") return refreshed_token else: - logger.warning(f"getConnectionToken: Token refresh failed for connectionId {connectionId}") + logger.warning(f"Token refresh failed for connectionId {connectionId}") return None else: logger.warning(f"Connection token for connectionId {connectionId} expires soon (expiresAt: {latest_token.expiresAt})") @@ -1047,13 +1030,9 @@ def getRootUser() -> User: if not users: raise ValueError("Initial user not found in database") - logger.debug(f"Retrieved user data: {users[0]}") # Convert to User model and return the model instance user_data = users[0] - logger.debug(f"User data keys: {list(user_data.keys())}") - logger.debug(f"User id: {user_data.get('id')}") - logger.debug(f"User mandateId: {user_data.get('mandateId')}") return User.parse_obj(user_data) diff --git a/modules/interfaces/interfaceChatModel.py b/modules/interfaces/interfaceChatModel.py index 5633e8f8..ed71963a 100644 --- a/modules/interfaces/interfaceChatModel.py +++ b/modules/interfaces/interfaceChatModel.py @@ -200,6 +200,9 @@ register_model_labels( "id": {"en": "ID", "fr": "ID"}, "messageId": {"en": "Message ID", "fr": "ID du message"}, "fileId": {"en": "File ID", "fr": "ID du fichier"}, + "fileName": {"en": "File Name", "fr": "Nom du fichier"}, + "fileSize": {"en": "File Size", "fr": "Taille du fichier"}, + "mimeType": {"en": "MIME Type", "fr": "Type MIME"}, "roundNumber": {"en": "Round Number", "fr": "Numéro de tour"}, "taskNumber": {"en": "Task Number", "fr": "Numéro de tâche"}, "actionNumber": {"en": "Action Number", "fr": "Numéro d'action"}, diff --git a/modules/interfaces/interfaceChatObjects.py b/modules/interfaces/interfaceChatObjects.py index 8aca736a..43ad3f97 100644 --- a/modules/interfaces/interfaceChatObjects.py +++ b/modules/interfaces/interfaceChatObjects.py @@ -130,7 +130,6 @@ class ChatObjects: except Exception as e: logger.error(f"Error closing database connection: {e}") - logger.debug(f"User context set: userId={self.userId}, mandateId={self.mandateId}") def _initializeDatabase(self): """Initializes the database connection directly.""" @@ -307,7 +306,6 @@ class ChatObjects: log_dict = log_data log_dict["workflowId"] = workflowId self.createLog(log_dict) - logger.debug(f"Updated {len(logs_data)} logs for workflow {workflowId}") except Exception as e: logger.error(f"Error updating workflow logs: {str(e)}") if 'messages' in object_fields: @@ -322,7 +320,6 @@ class ChatObjects: msg_dict = message_data msg_dict["workflowId"] = workflowId self.updateMessage(msg_dict.get("id"), msg_dict) - logger.debug(f"Updated {len(messages_data)} messages for workflow {workflowId}") except Exception as e: logger.error(f"Error updating workflow messages: {str(e)}") if 'stats' in object_fields: @@ -331,7 +328,6 @@ class ChatObjects: if stats_data: stats_data["workflowId"] = workflowId self.db.recordCreate(ChatStat, stats_data) - logger.debug(f"Updated stats for workflow {workflowId}") except Exception as e: logger.error(f"Error updating workflow stats: {str(e)}") @@ -402,7 +398,6 @@ class ChatObjects: # 4. Finally delete the workflow itself success = self.db.recordDelete(ChatWorkflow, workflowId) - logger.debug(f"Successfully deleted workflow {workflowId} and all related data") return success except Exception as e: @@ -461,6 +456,7 @@ class ChatObjects: chat_messages.append(chat_message) + return chat_messages def createMessage(self, messageData: Dict[str, Any]) -> ChatMessage: @@ -505,15 +501,12 @@ class ChatObjects: # This ensures messages have the correct progress context when workflows are continued if "roundNumber" not in messageData: messageData["roundNumber"] = workflow.currentRound - logger.debug(f"Auto-setting roundNumber to {workflow.currentRound} for message {messageData['id']}") if "taskNumber" not in messageData: messageData["taskNumber"] = workflow.currentTask - logger.debug(f"Auto-setting taskNumber to {workflow.currentTask} for message {messageData['id']}") if "actionNumber" not in messageData: messageData["actionNumber"] = workflow.currentAction - logger.debug(f"Auto-setting actionNumber to {workflow.currentAction} for message {messageData['id']}") # Use generic field separation based on ChatMessage model simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData) @@ -571,7 +564,6 @@ class ChatObjects: def updateMessage(self, messageId: str, messageData: Dict[str, Any]) -> Dict[str, Any]: """Updates a workflow message if user has access to the workflow.""" try: - logger.debug(f"Updating message {messageId} in database") # Ensure messageId is provided if not messageId: @@ -646,7 +638,6 @@ class ChatObjects: doc_dict = doc_data doc_dict["messageId"] = messageId self.createDocument(doc_dict) - logger.debug(f"Updated {len(documents_data)} documents for message {messageId}") except Exception as e: logger.error(f"Error updating message documents: {str(e)}") if 'stats' in object_fields: @@ -655,12 +646,9 @@ class ChatObjects: if stats_data: stats_data["messageId"] = messageId self.db.recordCreate(ChatStat, stats_data) - logger.debug(f"Updated stats for message {messageId}") except Exception as e: logger.error(f"Error updating message stats: {str(e)}") - if updatedMessage: - logger.debug(f"Message {messageId} updated successfully") - else: + if not updatedMessage: logger.warning(f"Failed to update message {messageId}") return updatedMessage @@ -703,7 +691,6 @@ class ChatObjects: # 3. Finally delete the message itself success = self.db.recordDelete(ChatMessage, messageId) - logger.debug(f"Successfully deleted message {messageId} and all related data") return success except Exception as e: @@ -722,7 +709,6 @@ class ChatObjects: if not self._canModify(ChatWorkflow, workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") - logger.debug(f"Removing file {fileId} from message {messageId} in workflow {workflowId}") # Get documents for this message from normalized table documents = self.db.getRecordset(ChatDocument, recordFilter={"messageId": messageId}) @@ -750,7 +736,6 @@ class ChatObjects: success = self.db.recordDelete(ChatDocument, docId) if success: removed = True - logger.debug(f"Successfully removed document {docId} (fileId: {fileIdValue})") else: logger.warning(f"Failed to delete document {docId}") @@ -758,7 +743,6 @@ class ChatObjects: logger.warning(f"No matching file {fileId} found in message {messageId}") return False - logger.debug(f"Successfully removed file {fileId} from message {messageId}") return True except Exception as e: @@ -902,6 +886,100 @@ class ChatObjects: stats.sort(key=lambda x: x.get("created_at", ""), reverse=True) return ChatStat(**stats[0]) + def getUnifiedChatData(self, workflowId: str, afterTimestamp: Optional[float] = None) -> Dict[str, Any]: + """ + Returns unified chat data (messages, logs, stats) for a workflow in chronological order. + Uses timestamp-based selective data transfer for efficient polling. + """ + # Check workflow access first + workflows = self.db.getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) + if not workflows: + return {"items": []} + + filteredWorkflows = self._uam(ChatWorkflow, workflows) + if not filteredWorkflows: + return {"items": []} + + # Get all data types and filter in Python (PostgreSQL connector doesn't support $gt operators) + items = [] + + # Get messages + messages = self.db.getRecordset(ChatMessage, recordFilter={"workflowId": workflowId}) + for msg in messages: + # Apply timestamp filtering in Python + msg_timestamp = msg.get("publishedAt", get_utc_timestamp()) + if afterTimestamp is not None and msg_timestamp <= afterTimestamp: + continue + + # Load documents for each message + documents = self.getDocuments(msg["id"]) + + # Create ChatMessage object with loaded documents + chat_message = ChatMessage( + id=msg["id"], + workflowId=msg["workflowId"], + parentMessageId=msg.get("parentMessageId"), + documents=documents, + documentsLabel=msg.get("documentsLabel"), + message=msg.get("message"), + role=msg.get("role", "assistant"), + status=msg.get("status", "step"), + sequenceNr=msg.get("sequenceNr", 0), + publishedAt=msg.get("publishedAt", get_utc_timestamp()), + stats=self.getMessageStats(msg["id"]), + success=msg.get("success"), + actionId=msg.get("actionId"), + actionMethod=msg.get("actionMethod"), + actionName=msg.get("actionName"), + roundNumber=msg.get("roundNumber"), + taskNumber=msg.get("taskNumber"), + actionNumber=msg.get("actionNumber"), + taskProgress=msg.get("taskProgress"), + actionProgress=msg.get("actionProgress") + ) + + # Use publishedAt as the timestamp for chronological ordering + items.append({ + "type": "message", + "createdAt": msg_timestamp, + "item": chat_message.dict() + }) + + # Get logs + logs = self.db.getRecordset(ChatLog, recordFilter={"workflowId": workflowId}) + for log in logs: + # Apply timestamp filtering in Python + log_timestamp = log.get("timestamp", get_utc_timestamp()) + if afterTimestamp is not None and log_timestamp <= afterTimestamp: + continue + + chat_log = ChatLog(**log) + items.append({ + "type": "log", + "createdAt": log_timestamp, + "item": chat_log.dict() + }) + + # Get stats + stats = self.db.getRecordset(ChatStat, recordFilter={"workflowId": workflowId}) + for stat in stats: + # Apply timestamp filtering in Python + stat_timestamp = stat.get("_createdAt", get_utc_timestamp()) + if afterTimestamp is not None and stat_timestamp <= afterTimestamp: + continue + + chat_stat = ChatStat(**stat) + items.append({ + "type": "stat", + "createdAt": stat_timestamp, + "item": chat_stat.dict() + }) + + # Sort all items by createdAt timestamp for chronological order + items.sort(key=lambda x: x["createdAt"]) + + return {"items": items} + def updateWorkflowStats(self, workflowId: str, bytesSent: int = 0, bytesReceived: int = 0) -> bool: """Updates workflow statistics during execution with incremental values.""" try: diff --git a/modules/interfaces/interfaceTicketObjects.py b/modules/interfaces/interfaceTicketObjects.py index 3df6464f..8d46e20f 100644 --- a/modules/interfaces/interfaceTicketObjects.py +++ b/modules/interfaces/interfaceTicketObjects.py @@ -504,7 +504,9 @@ class TicketSharepointSyncInterface: except Exception as e: # If audit logging fails, we don't want to break the main sync process # Just log the error (this could be enhanced with fallback logging) - print(f"Failed to write audit log: {str(e)}") + import logging + logger = logging.getLogger(__name__) + logger.warning(f"Failed to write audit log: {str(e)}") def _create_csv_content(self, data: list[dict]) -> bytes: """Create CSV content with 4-row structure matching reference code.""" diff --git a/modules/routes/routeJira.py b/modules/routes/routeJira.py index 7874b181..3e4038aa 100644 --- a/modules/routes/routeJira.py +++ b/modules/routes/routeJira.py @@ -68,8 +68,8 @@ async def perform_sync_jira_delta_group(): sharepoint_site_url = None # Jira connection parameters - jira_username = None - jira_api_token = None + jira_username = "ONHOLD - TASK - p.motsch@valueon.ch" + jira_api_token = "ATATT3xFfGF0d973nNb3R1wTDI4lesmJfJAmooS-4cYMJTyLfwYv4himrE6yyCxyX3aSMfl34NHcm2fAXeFXrLHUzJx0RQVUBonCFnlgexjLQTgS5BoCbSO7dwAVjlcHZZkArHbooCUaRwJ15n6AHkm-nwdjLQ3Z74TFnKKUZC4uhuh3Aj-MuX8=2D7124FA" jira_url = "https://deltasecurity.atlassian.net" project_code = "DCS" issue_type = "Task" diff --git a/modules/routes/routeWorkflows.py b/modules/routes/routeWorkflows.py index 81f48205..fe70e347 100644 --- a/modules/routes/routeWorkflows.py +++ b/modules/routes/routeWorkflows.py @@ -59,29 +59,16 @@ async def get_workflows( appInterface = getInterface(currentUser) workflows_data = appInterface.getWorkflows() - # Convert raw dictionaries to ChatWorkflow objects + # Convert raw dictionaries to ChatWorkflow objects by loading each workflow properly workflows = [] for workflow_data in workflows_data: try: - workflow = ChatWorkflow( - id=workflow_data["id"], - status=workflow_data.get("status", "running"), - name=workflow_data.get("name"), - currentRound=workflow_data.get("currentRound", 0), # Default value - currentTask=workflow_data.get("currentTask", 0), - currentAction=workflow_data.get("currentAction", 0), - totalTasks=workflow_data.get("totalTasks", 0), - totalActions=workflow_data.get("totalActions", 0), - lastActivity=workflow_data.get("lastActivity", get_utc_timestamp()), - startedAt=workflow_data.get("startedAt", get_utc_timestamp()), - logs=[ChatLog(**log) for log in workflow_data.get("logs", [])], - messages=[ChatMessage(**msg) for msg in workflow_data.get("messages", [])], - stats=ChatStat(**workflow_data.get("stats", {})) if workflow_data.get("stats") else None, - mandateId=workflow_data.get("mandateId", currentUser.mandateId or "") - ) - workflows.append(workflow) + # Load the workflow properly using the same method as individual workflow endpoint + workflow = appInterface.getWorkflow(workflow_data["id"]) + if workflow: + workflows.append(workflow) except Exception as e: - logger.warning(f"Error converting workflow data to ChatWorkflow object: {str(e)}") + logger.warning(f"Error loading workflow {workflow_data.get('id', 'unknown')}: {str(e)}") # Skip invalid workflows instead of failing the entire request continue @@ -276,7 +263,8 @@ async def get_workflow_messages( messageIndex = next((i for i, msg in enumerate(allMessages) if msg.id == messageId), -1) if messageIndex >= 0: # Return only messages after the specified message - return allMessages[messageIndex + 1:] + filteredMessages = allMessages[messageIndex + 1:] + return filteredMessages return allMessages except HTTPException: @@ -395,6 +383,45 @@ async def delete_workflow( ) +# Unified Chat Data Endpoint for Polling +@router.get("/{workflowId}/chatData") +@limiter.limit("120/minute") +async def get_workflow_chat_data( + request: Request, + workflowId: str = Path(..., description="ID of the workflow"), + afterTimestamp: Optional[float] = Query(None, description="Unix timestamp to get data after"), + currentUser: User = Depends(getCurrentUser) +) -> Dict[str, Any]: + """ + Get unified chat data (messages, logs, stats) for a workflow with timestamp-based selective data transfer. + Returns all data types in chronological order based on _createdAt timestamp. + """ + try: + # Get service center + interfaceChat = getServiceChat(currentUser) + + # Verify workflow exists + workflow = interfaceChat.getWorkflow(workflowId) + if not workflow: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Workflow with ID {workflowId} not found" + ) + + # Get unified chat data using the new method + chatData = interfaceChat.getUnifiedChatData(workflowId, afterTimestamp) + + return chatData + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting unified chat data: {str(e)}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error getting unified chat data: {str(e)}" + ) + # Document Management Endpoints @router.delete("/{workflowId}/messages/{messageId}", response_model=Dict[str, Any]) diff --git a/modules/shared/timezoneUtils.py b/modules/shared/timezoneUtils.py index a9d2260d..93011060 100644 --- a/modules/shared/timezoneUtils.py +++ b/modules/shared/timezoneUtils.py @@ -5,6 +5,7 @@ Ensures all timestamps are properly handled as UTC. from datetime import datetime, timezone, timedelta from typing import Union, Optional +import time def get_utc_now() -> datetime: """ @@ -17,12 +18,12 @@ def get_utc_now() -> datetime: def get_utc_timestamp() -> float: """ - Get current UTC timestamp (seconds since epoch). + Get current UTC timestamp (seconds since epoch with millisecond precision). Returns: - float: Current UTC timestamp in seconds + float: Current UTC timestamp in seconds with millisecond precision """ - return datetime.now(timezone.utc).timestamp() + return time.time() def to_utc_timestamp(dt: datetime) -> float: """ diff --git a/notes/changelog.txt b/notes/changelog.txt index d4574804..e10e683a 100644 --- a/notes/changelog.txt +++ b/notes/changelog.txt @@ -24,6 +24,12 @@ TODO - check zusammenfassung von 10 dokumenten >10 MB - test case bewerbung +# Ida changes gateway: +- Polling endpoint + doku dazu +- files in documents integriert --> document endpoint for files +- prompts in chat endpoint +- + # DOCUMENTATION Design principles - UI: Module classes for data management (CRUD tables & forms --> formGeneric) From bb253c18faf40936a6e771b2157349c47e77b6f9 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 8 Sep 2025 23:31:47 +0200 Subject: [PATCH 16/27] fixed mandates --- modules/interfaces/interfaceAppAccess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/interfaces/interfaceAppAccess.py b/modules/interfaces/interfaceAppAccess.py index f04d8968..25b318ad 100644 --- a/modules/interfaces/interfaceAppAccess.py +++ b/modules/interfaces/interfaceAppAccess.py @@ -5,7 +5,7 @@ Access control for the Application. import logging from typing import Dict, Any, List, Optional from datetime import datetime -from modules.interfaces.interfaceAppModel import UserPrivilege, User, UserInDB, AuthEvent +from modules.interfaces.interfaceAppModel import UserPrivilege, User, UserInDB, AuthEvent, Mandate from modules.shared.timezoneUtils import get_utc_now # Configure logger From bc63bf7a751099c31c199f68afd1446a3dbe74df Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 8 Sep 2025 23:48:48 +0200 Subject: [PATCH 17/27] cleaned handling of tasks --- modules/chat/handling/handlingTasks.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/modules/chat/handling/handlingTasks.py b/modules/chat/handling/handlingTasks.py index cfc31401..49d0b97c 100644 --- a/modules/chat/handling/handlingTasks.py +++ b/modules/chat/handling/handlingTasks.py @@ -109,9 +109,6 @@ class HandlingTasks: logger.info("=== TASK PLANNING PROMPT SENT TO AI ===") logger.info(f"User Input: {userInput}") logger.info(f"Available Documents: {available_docs}") - logger.info("=== FULL TASK PLANNING PROMPT ===") - logger.info(task_planning_prompt) - logger.info("=== END TASK PLANNING PROMPT ===") prompt = await self.service.callAiTextAdvanced(task_planning_prompt) From 6326e54a9a1b225a30e59fede6f3c5f7b516cdd3 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Mon, 8 Sep 2025 23:55:44 +0200 Subject: [PATCH 18/27] build prod --- .../workflows/main_poweron-gateway-prod.yml | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 .github/workflows/main_poweron-gateway-prod.yml diff --git a/.github/workflows/main_poweron-gateway-prod.yml b/.github/workflows/main_poweron-gateway-prod.yml new file mode 100644 index 00000000..9dc86510 --- /dev/null +++ b/.github/workflows/main_poweron-gateway-prod.yml @@ -0,0 +1,75 @@ +# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy +# More GitHub Actions for Azure: https://github.com/Azure/actions +# More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions + +name: Build and deploy Python app to Azure Web App - poweron-gateway-prod + +on: + push: + branches: + - main + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read #This is required for actions/checkout + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python version + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Create and start virtual environment + run: | + python -m venv venv + source venv/bin/activate + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt --no-cache-dir + + # Optional: Add step to run tests here (PyTest, Django test suites, etc.) + + - name: Zip artifact for deployment + run: zip release.zip ./* -r + + - name: Upload artifact for deployment jobs + uses: actions/upload-artifact@v4 + with: + name: python-app + path: | + release.zip + !venv/ + + deploy: + runs-on: ubuntu-latest + needs: build + environment: + name: 'Production' + url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} + + steps: + - name: Download artifact from build job + uses: actions/download-artifact@v4 + with: + name: python-app + + - name: Unzip artifact for deployment + run: unzip release.zip + + - name: Set productive environment + run: cp env_prod.env .env + + - name: 'Deploy to Azure Web App' + uses: azure/webapps-deploy@v3 + id: deploy-to-webapp + with: + app-name: 'poweron-gateway-prod' + slot-name: 'Production' + publish-profile: ${{ secrets.AZUREAPPSERVICE_PUBLISHPROFILE_A0393566625E447EAD8EB1C489BA06A2 }} \ No newline at end of file From 767084aa87057739848adae18dc2a3f5ac7cd347 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 9 Sep 2025 00:55:36 +0200 Subject: [PATCH 19/27] deploy 01 --- env_int.env | 33 ++++++--------------------------- env_prod.env | 2 +- 2 files changed, 7 insertions(+), 28 deletions(-) diff --git a/env_int.env b/env_int.env index 18720a6a..2d836e20 100644 --- a/env_int.env +++ b/env_int.env @@ -5,46 +5,25 @@ APP_ENV_TYPE = int APP_ENV_LABEL = Integration Instance APP_API_URL = https://gateway-int.poweron-center.net -# Database Configuration Application -# JSON File Storage (current) -# DB_APP_HOST=/home/_powerondb -# DB_APP_DATABASE=app -# DB_APP_USER=dev_user -# DB_APP_PASSWORD_SECRET=dev_password - # PostgreSQL Storage (new) DB_APP_HOST=gateway-int-db.poweron-center.net DB_APP_DATABASE=poweron_app_int -DB_APP_USER=poweron_int -DB_APP_PASSWORD_SECRET=int_password_secure +DB_APP_USER=heeshkdlby +DB_APP_PASSWORD_SECRET=VkAjgECESbEVQ$Tu DB_APP_PORT=5432 -# Database Configuration Chat -# JSON File Storage (current) -# DB_CHAT_HOST=/home/_powerondb -# DB_CHAT_DATABASE=chat -# DB_CHAT_USER=dev_user -# DB_CHAT_PASSWORD_SECRET=dev_password - # PostgreSQL Storage (new) DB_CHAT_HOST=gateway-int-db.poweron-center.net DB_CHAT_DATABASE=poweron_chat_int -DB_CHAT_USER=poweron_int -DB_CHAT_PASSWORD_SECRET=int_password_secure +DB_CHAT_USER=heeshkdlby +DB_CHAT_PASSWORD_SECRET=VkAjgECESbEVQ$Tu DB_CHAT_PORT=5432 -# Database Configuration Management -# JSON File Storage (current) -# DB_MANAGEMENT_HOST=/home/_powerondb -# DB_MANAGEMENT_DATABASE=management -# DB_MANAGEMENT_USER=dev_user -# DB_MANAGEMENT_PASSWORD_SECRET=dev_password - # PostgreSQL Storage (new) DB_MANAGEMENT_HOST=gateway-int-db.poweron-center.net DB_MANAGEMENT_DATABASE=poweron_management_int -DB_MANAGEMENT_USER=poweron_int -DB_MANAGEMENT_PASSWORD_SECRET=int_password_secure +DB_MANAGEMENT_USER=heeshkdlby +DB_MANAGEMENT_PASSWORD_SECRET=VkAjgECESbEVQ$Tu DB_MANAGEMENT_PORT=5432 # Security Configuration diff --git a/env_prod.env b/env_prod.env index 66123d6b..e1fff4c5 100644 --- a/env_prod.env +++ b/env_prod.env @@ -29,7 +29,7 @@ DB_APP_PORT=5432 # PostgreSQL Storage (new) DB_CHAT_HOST=gateway-prod-server.postgres.database.azure.com DB_CHAT_DATABASE=gateway-chat -DB_CHAT_USER=poweron_prod +DB_CHAT_USER=gzxxmcrdhn DB_CHAT_PASSWORD_SECRET=prod_password_very_secure.2025 DB_CHAT_PORT=5432 From 86f05901ab1523184e47e35f8ebb5002ceb2b3a6 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 9 Sep 2025 00:56:47 +0200 Subject: [PATCH 20/27] Deploy 02 --- .../{int_poweron-gateway-int.yml => int_gateway-int.yml} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename .github/workflows/{int_poweron-gateway-int.yml => int_gateway-int.yml} (94%) diff --git a/.github/workflows/int_poweron-gateway-int.yml b/.github/workflows/int_gateway-int.yml similarity index 94% rename from .github/workflows/int_poweron-gateway-int.yml rename to .github/workflows/int_gateway-int.yml index d092bada..fcf66c89 100644 --- a/.github/workflows/int_poweron-gateway-int.yml +++ b/.github/workflows/int_gateway-int.yml @@ -2,7 +2,7 @@ # More GitHub Actions for Azure: https://github.com/Azure/actions # More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions -name: Build and deploy Python app to Azure Web App - poweron-gateway-int +name: Build and deploy Python app to Azure Web App - gateway-int on: push: @@ -75,6 +75,6 @@ jobs: uses: azure/webapps-deploy@v2 id: deploy-to-webapp with: - app-name: 'poweron-gateway-int' + app-name: 'gateway-int' slot-name: 'Production' package: . From cf9a94fd79fd577cf6c853c97001ab188aa566dd Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 9 Sep 2025 01:14:16 +0200 Subject: [PATCH 21/27] deploy profile --- .github/workflows/int_gateway-int.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/int_gateway-int.yml b/.github/workflows/int_gateway-int.yml index fcf66c89..02cbb32f 100644 --- a/.github/workflows/int_gateway-int.yml +++ b/.github/workflows/int_gateway-int.yml @@ -51,7 +51,7 @@ jobs: runs-on: ubuntu-latest needs: build environment: - name: 'Integration' + name: 'Production' url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} steps: From 9b7420e9c1713930a0436f798e3812e44348d674 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 9 Sep 2025 01:20:30 +0200 Subject: [PATCH 22/27] publish profile with g-int --- .github/workflows/int_gateway-int.yml | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/.github/workflows/int_gateway-int.yml b/.github/workflows/int_gateway-int.yml index 02cbb32f..22511db2 100644 --- a/.github/workflows/int_gateway-int.yml +++ b/.github/workflows/int_gateway-int.yml @@ -1,7 +1,3 @@ -# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy -# More GitHub Actions for Azure: https://github.com/Azure/actions -# More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions - name: Build and deploy Python app to Azure Web App - gateway-int on: @@ -14,7 +10,7 @@ jobs: build: runs-on: ubuntu-latest permissions: - contents: read #This is required for actions/checkout + contents: read steps: - uses: actions/checkout@v4 @@ -34,8 +30,6 @@ jobs: python -m pip install --upgrade pip pip install -r requirements.txt --no-cache-dir - # Optional: Add step to run tests here (PyTest, Django test suites, etc.) - - name: Zip artifact for deployment run: zip release.zip ./* -r @@ -51,7 +45,7 @@ jobs: runs-on: ubuntu-latest needs: build environment: - name: 'Production' + name: 'Production' # Or change to 'Integration' if you prefer url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} steps: @@ -65,16 +59,13 @@ jobs: - name: Set productive environment run: cp env_int.env .env - - - name: Login to Azure - uses: azure/login@v1 - with: - creds: '{"clientId":"${{ secrets.AZURE_CLIENT_ID }}","clientSecret":"${{ secrets.AZURE_CLIENT_SECRET }}","subscriptionId":"${{ secrets.AZURE_SUBSCRIPTION_ID }}","tenantId":"${{ secrets.AZURE_TENANT_ID }}"}' - - - name: Deploy to Azure Web App - uses: azure/webapps-deploy@v2 + + # REMOVED: Azure login step - not needed with publish profile + + - name: 'Deploy to Azure Web App' + uses: azure/webapps-deploy@v3 # Using v3 like the working one id: deploy-to-webapp with: app-name: 'gateway-int' slot-name: 'Production' - package: . + publish-profile: ${{ secrets.AZUREAPPSERVICE_PUBLISHPROFILE_GATEWAY_INT }} \ No newline at end of file From 522651b98cd364e35107bdb2bde011a3158c648f Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 9 Sep 2025 01:44:37 +0200 Subject: [PATCH 23/27] revised yaml int --- .github/workflows/int_gateway-int.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/int_gateway-int.yml b/.github/workflows/int_gateway-int.yml index 22511db2..2a7da645 100644 --- a/.github/workflows/int_gateway-int.yml +++ b/.github/workflows/int_gateway-int.yml @@ -10,7 +10,7 @@ jobs: build: runs-on: ubuntu-latest permissions: - contents: read + contents: read #This is required for actions/checkout steps: - uses: actions/checkout@v4 @@ -30,6 +30,8 @@ jobs: python -m pip install --upgrade pip pip install -r requirements.txt --no-cache-dir + # Optional: Add step to run tests here (PyTest, Django test suites, etc.) + - name: Zip artifact for deployment run: zip release.zip ./* -r @@ -45,7 +47,7 @@ jobs: runs-on: ubuntu-latest needs: build environment: - name: 'Production' # Or change to 'Integration' if you prefer + name: 'Production' url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} steps: @@ -60,10 +62,8 @@ jobs: - name: Set productive environment run: cp env_int.env .env - # REMOVED: Azure login step - not needed with publish profile - - name: 'Deploy to Azure Web App' - uses: azure/webapps-deploy@v3 # Using v3 like the working one + uses: azure/webapps-deploy@v3 id: deploy-to-webapp with: app-name: 'gateway-int' From 3525b72c2c26bb5189b48b27bf15ebd46ea4d1d9 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 9 Sep 2025 08:43:10 +0200 Subject: [PATCH 24/27] az db host --- env_int.env | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/env_int.env b/env_int.env index 2d836e20..7b6cca3d 100644 --- a/env_int.env +++ b/env_int.env @@ -6,21 +6,21 @@ APP_ENV_LABEL = Integration Instance APP_API_URL = https://gateway-int.poweron-center.net # PostgreSQL Storage (new) -DB_APP_HOST=gateway-int-db.poweron-center.net +DB_APP_HOST=gateway-int-server.postgres.database.azure.com DB_APP_DATABASE=poweron_app_int DB_APP_USER=heeshkdlby DB_APP_PASSWORD_SECRET=VkAjgECESbEVQ$Tu DB_APP_PORT=5432 # PostgreSQL Storage (new) -DB_CHAT_HOST=gateway-int-db.poweron-center.net +DB_CHAT_HOST=gateway-int-server.postgres.database.azure.com DB_CHAT_DATABASE=poweron_chat_int DB_CHAT_USER=heeshkdlby DB_CHAT_PASSWORD_SECRET=VkAjgECESbEVQ$Tu DB_CHAT_PORT=5432 # PostgreSQL Storage (new) -DB_MANAGEMENT_HOST=gateway-int-db.poweron-center.net +DB_MANAGEMENT_HOST=gateway-int-server.postgres.database.azure.com DB_MANAGEMENT_DATABASE=poweron_management_int DB_MANAGEMENT_USER=heeshkdlby DB_MANAGEMENT_PASSWORD_SECRET=VkAjgECESbEVQ$Tu From b7a7ebedcb988eb4f10c3ddf1c8658352cefcaf3 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 9 Sep 2025 13:26:12 +0200 Subject: [PATCH 25/27] ready mvp3 --- app.py | 39 +- modules/connectors/connectorSharepoint.py | 607 +++++++++++++------ modules/interfaces/interfaceTicketObjects.py | 159 +++-- modules/routes/routeJira.py | 141 ----- modules/workflow/managerSyncDelta.py | 231 +++++++ 5 files changed, 827 insertions(+), 350 deletions(-) delete mode 100644 modules/routes/routeJira.py create mode 100644 modules/workflow/managerSyncDelta.py diff --git a/app.py b/app.py index 72c94ab5..282775ad 100644 --- a/app.py +++ b/app.py @@ -4,6 +4,7 @@ os.environ["NUMEXPR_MAX_THREADS"] = "12" from fastapi import FastAPI, HTTPException, Depends, Body, status, Response from fastapi.middleware.cors import CORSMiddleware from contextlib import asynccontextmanager +from zoneinfo import ZoneInfo import logging from logging.handlers import RotatingFileHandler @@ -11,6 +12,8 @@ from datetime import timedelta import pathlib from modules.shared.configuration import APP_CONFIG +from apscheduler.schedulers.asyncio import AsyncIOScheduler +from apscheduler.triggers.cron import CronTrigger def initLogging(): """Initialize logging with configuration from APP_CONFIG""" @@ -147,10 +150,43 @@ async def lifespan(app: FastAPI): from modules.interfaces.interfaceAppObjects import getRootInterface getRootInterface() + # Setup APScheduler for JIRA sync + scheduler = AsyncIOScheduler(timezone=ZoneInfo("Europe/Zurich")) + try: + from modules.workflow.managerSyncDelta import perform_sync_jira_delta_group + # Schedule hourly sync at minute 0 + scheduler.add_job( + perform_sync_jira_delta_group, + CronTrigger(minute="0"), + id="jira_delta_group_sync", + replace_existing=True, + coalesce=True, + max_instances=1, + misfire_grace_time=1800, + ) + scheduler.start() + logger.info("APScheduler started (jira_delta_group_sync hourly)") + + # Run initial sync on startup (non-blocking failure) + try: + logger.info("Running initial JIRA sync on app startup...") + await perform_sync_jira_delta_group() + logger.info("Initial JIRA sync completed successfully") + except Exception as e: + logger.error(f"Initial JIRA sync failed: {str(e)}") + except Exception as e: + logger.error(f"Failed to initialize scheduler or JIRA sync: {str(e)}") + yield # Shutdown logic logger.info("Application has been shut down") + try: + if 'scheduler' in locals() and scheduler.running: + scheduler.shutdown(wait=False) + logger.info("APScheduler stopped") + except Exception as e: + logger.error(f"Error shutting down scheduler: {str(e)}") # START APP app = FastAPI( @@ -212,6 +248,3 @@ app.include_router(msftRouter) from modules.routes.routeSecurityGoogle import router as googleRouter app.include_router(googleRouter) - -from modules.routes.routeJira import router as jiraRouter -app.include_router(jiraRouter) \ No newline at end of file diff --git a/modules/connectors/connectorSharepoint.py b/modules/connectors/connectorSharepoint.py index b5eaa703..89bdffbe 100644 --- a/modules/connectors/connectorSharepoint.py +++ b/modules/connectors/connectorSharepoint.py @@ -1,180 +1,443 @@ -"""Connector for CRUD sharepoint operations.""" +"""Connector for SharePoint operations using Microsoft Graph API.""" +import logging +import json +import aiohttp import asyncio -from concurrent.futures import ThreadPoolExecutor -from dataclasses import dataclass -from datetime import datetime -from io import BytesIO -from typing import Optional -from office365.sharepoint.client_context import ClientContext -from office365.sharepoint.files.file import File +from typing import Dict, Any, List, Optional +from datetime import datetime, UTC + +logger = logging.getLogger(__name__) -@dataclass class ConnectorSharepoint: - ctx: ClientContext - - @classmethod - async def create(cls, ctx: ClientContext) -> "ConnectorSharepoint": - """Creates an instance of the Sharepoint connector. - - Params: - ctx: The ClientContext instance. - - Returns: - ConnectorSharepoint: An instance of the Sharepoint connector. + """SharePoint connector using Microsoft Graph API for reliable authentication.""" + + def __init__(self, access_token: str): + """Initialize with access token. + + Args: + access_token: Microsoft Graph access token """ - return cls(ctx=ctx) - - @classmethod - def get_client_context_from_username_password( - cls, site_url: str, username: str, password: str - ) -> ClientContext: - """Creates a ClientContext instance from username and password. - - Params: - site_url: The URL of the SharePoint site. - username: The username for authentication. - password: The password for authentication. - - Returns: - ClientContext: An instance of the ClientContext. - """ - return ClientContext(site_url).with_user_credentials(username, password) - - @classmethod - def get_client_context_from_app( - cls, site_url: str, client_id: str, client_secret: str - ) -> ClientContext: - """Creates a ClientContext instance from client ID and client secret. - - Params: - site_url: The URL of the SharePoint site. - client_id: The client ID for authentication. - client_secret: The client secret for authentication. - - Returns: - ClientContext: An instance of the ClientContext. - """ - return ClientContext(site_url).with_client_credentials( - client_id=client_id, client_secret=client_secret - ) - - def copy_file( - self, *, source_folder: str, source_file: str, dest_folder: str, dest_file: str - ) -> bool: - """Copy a file from one SharePoint location to another. - - Params: - source_folder: Source folder path (server-relative) - source_file: Source file name - dest_folder: Destination folder path (server-relative) - dest_file: Destination file name - - Returns: - bool: True if successful, False otherwise - """ - source_path = f"{source_folder.rstrip('/')}/{source_file}" - dest_path = f"{dest_folder.rstrip('/')}/{dest_file}" - - source_file_obj = self.ctx.web.get_file_by_server_relative_url(source_path) - source_file_obj.copyto(dest_path).execute_query() - return True - - async def copy_file_async( - self, *, source_folder: str, source_file: str, dest_folder: str, dest_file: str - ) -> bool: - """Copy a file from one SharePoint location to another (async version). - - Params: - source_folder: Source folder path (server-relative) - source_file: Source file name - dest_folder: Destination folder path (server-relative) - dest_file: Destination file name - - Returns: - bool: True if successful, False otherwise - """ - loop = asyncio.get_event_loop() - with ThreadPoolExecutor() as executor: - return await loop.run_in_executor( - executor, - lambda: self.copy_file( - source_folder=source_folder, - source_file=source_file, - dest_folder=dest_folder, - dest_file=dest_file, - ), + self.access_token = access_token + self.base_url = "https://graph.microsoft.com/v1.0" + + async def _make_graph_api_call(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]: + """Make a Microsoft Graph API call with proper error handling.""" + try: + headers = { + "Authorization": f"Bearer {self.access_token}", + "Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json" + } + + # Remove leading slash from endpoint to avoid double slash + clean_endpoint = endpoint.lstrip('/') + url = f"{self.base_url}/{clean_endpoint}" + logger.debug(f"Making Graph API call: {method} {url}") + + timeout = aiohttp.ClientTimeout(total=30) + + async with aiohttp.ClientSession(timeout=timeout) as session: + if method == "GET": + async with session.get(url, headers=headers) as response: + if response.status == 200: + return await response.json() + else: + error_text = await response.text() + logger.error(f"Graph API call failed: {response.status} - {error_text}") + return {"error": f"API call failed: {response.status} - {error_text}"} + + elif method == "PUT": + async with session.put(url, headers=headers, data=data) as response: + if response.status in [200, 201]: + return await response.json() + else: + error_text = await response.text() + logger.error(f"Graph API call failed: {response.status} - {error_text}") + return {"error": f"API call failed: {response.status} - {error_text}"} + + elif method == "POST": + async with session.post(url, headers=headers, data=data) as response: + if response.status in [200, 201]: + return await response.json() + else: + error_text = await response.text() + logger.error(f"Graph API call failed: {response.status} - {error_text}") + return {"error": f"API call failed: {response.status} - {error_text}"} + + except asyncio.TimeoutError: + logger.error(f"Graph API call timed out after 30 seconds: {endpoint}") + return {"error": f"API call timed out after 30 seconds: {endpoint}"} + except Exception as e: + logger.error(f"Error making Graph API call: {str(e)}") + return {"error": f"Error making Graph API call: {str(e)}"} + + async def discover_sites(self) -> List[Dict[str, Any]]: + """Discover all SharePoint sites accessible to the user.""" + try: + result = await self._make_graph_api_call("sites?search=*") + + if "error" in result: + logger.error(f"Error discovering SharePoint sites: {result['error']}") + return [] + + sites = result.get("value", []) + logger.info(f"Discovered {len(sites)} SharePoint sites") + + processed_sites = [] + for site in sites: + site_info = { + "id": site.get("id"), + "displayName": site.get("displayName"), + "name": site.get("name"), + "webUrl": site.get("webUrl"), + "description": site.get("description"), + "createdDateTime": site.get("createdDateTime"), + "lastModifiedDateTime": site.get("lastModifiedDateTime") + } + processed_sites.append(site_info) + logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}") + + return processed_sites + + except Exception as e: + logger.error(f"Error discovering SharePoint sites: {str(e)}") + return [] + + async def find_site_by_name(self, site_name: str) -> Optional[Dict[str, Any]]: + """Find a specific SharePoint site by name using direct Graph API call.""" + try: + # Try to get the site directly by name using Graph API + endpoint = f"sites/{site_name}" + result = await self._make_graph_api_call(endpoint) + + if result and "error" not in result: + site_info = { + "id": result.get("id"), + "displayName": result.get("displayName"), + "name": result.get("name"), + "webUrl": result.get("webUrl"), + "description": result.get("description"), + "createdDateTime": result.get("createdDateTime"), + "lastModifiedDateTime": result.get("lastModifiedDateTime") + } + logger.info(f"Found site directly: {site_info['displayName']} - {site_info['webUrl']}") + return site_info + + except Exception as e: + logger.debug(f"Direct site lookup failed for '{site_name}': {str(e)}") + + # Fallback to discovery if direct lookup fails + logger.info(f"Direct lookup failed, trying discovery for site: {site_name}") + sites = await self.discover_sites() + if not sites: + logger.warning("No sites discovered") + return None + + logger.info(f"Discovered {len(sites)} SharePoint sites:") + for site in sites: + logger.info(f" - {site.get('displayName', 'Unknown')} (ID: {site.get('id', 'Unknown')})") + + # Try exact match first + for site in sites: + if site.get("displayName", "").strip().lower() == site_name.strip().lower(): + logger.info(f"Found exact match: {site.get('displayName')}") + return site + + # Try partial match + for site in sites: + if site_name.lower() in site.get("displayName", "").lower(): + logger.info(f"Found partial match: {site.get('displayName')}") + return site + + logger.warning(f"No site found matching: {site_name}") + return None + + async def find_site_by_web_url(self, web_url: str) -> Optional[Dict[str, Any]]: + """Find a SharePoint site using its web URL (useful for guest sites).""" + try: + # Use the web URL format: sites/{hostname}:/sites/{site-path} + # Extract hostname and site path from the web URL + if not web_url.startswith("https://"): + web_url = f"https://{web_url}" + + # Parse the URL to extract hostname and site path + from urllib.parse import urlparse + parsed = urlparse(web_url) + hostname = parsed.hostname + path_parts = parsed.path.strip('/').split('/') + + if len(path_parts) >= 2 and path_parts[0] == 'sites': + site_path = '/'.join(path_parts[1:]) # Everything after 'sites/' + else: + logger.error(f"Invalid SharePoint URL format: {web_url}") + return None + + endpoint = f"sites/{hostname}:/sites/{site_path}" + logger.debug(f"Trying web URL format: {endpoint}") + + result = await self._make_graph_api_call(endpoint) + + if result and "error" not in result: + site_info = { + "id": result.get("id"), + "displayName": result.get("displayName"), + "name": result.get("name"), + "webUrl": result.get("webUrl"), + "description": result.get("description"), + "createdDateTime": result.get("createdDateTime"), + "lastModifiedDateTime": result.get("lastModifiedDateTime") + } + logger.info(f"Found site by web URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})") + return site_info + else: + logger.warning(f"Site not found using web URL: {web_url}") + return None + + except Exception as e: + logger.error(f"Error finding site by web URL: {str(e)}") + return None + + async def find_site_by_url(self, hostname: str, site_path: str) -> Optional[Dict[str, Any]]: + """Find a SharePoint site using the site URL format.""" + try: + # For guest sites, try different URL formats + url_formats = [ + f"sites/{hostname}:/sites/{site_path}", # Standard format + f"sites/{hostname}:/sites/{site_path}/", # With trailing slash + f"sites/{hostname}:/sites/{site_path.lower()}", # Lowercase + f"sites/{hostname}:/sites/{site_path.lower()}/", # Lowercase with slash + ] + + for endpoint in url_formats: + logger.debug(f"Trying URL format: {endpoint}") + result = await self._make_graph_api_call(endpoint) + + if result and "error" not in result: + site_info = { + "id": result.get("id"), + "displayName": result.get("displayName"), + "name": result.get("name"), + "webUrl": result.get("webUrl"), + "description": result.get("description"), + "createdDateTime": result.get("createdDateTime"), + "lastModifiedDateTime": result.get("lastModifiedDateTime") + } + logger.info(f"Found site by URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})") + return site_info + else: + logger.debug(f"URL format failed: {endpoint} - {result.get('error', 'Unknown error')}") + + logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{site_path}") + return None + + except Exception as e: + logger.error(f"Error finding site by URL: {str(e)}") + return None + + async def get_folder_by_path(self, site_id: str, folder_path: str) -> Optional[Dict[str, Any]]: + """Get folder information by path within a site.""" + try: + # Clean the path + clean_path = folder_path.lstrip('/') + endpoint = f"sites/{site_id}/drive/root:/{clean_path}" + + result = await self._make_graph_api_call(endpoint) + + if "error" in result: + logger.warning(f"Folder not found at path {folder_path}: {result['error']}") + return None + + return result + + except Exception as e: + logger.error(f"Error getting folder by path: {str(e)}") + return None + + async def upload_file(self, site_id: str, folder_path: str, file_name: str, content: bytes) -> Dict[str, Any]: + """Upload a file to SharePoint.""" + try: + # Clean the path + clean_path = folder_path.lstrip('/') + upload_path = f"{clean_path.rstrip('/')}/{file_name}" + endpoint = f"sites/{site_id}/drive/root:/{upload_path}:/content" + + logger.info(f"Uploading file to: {endpoint}") + + result = await self._make_graph_api_call(endpoint, method="PUT", data=content) + + if "error" in result: + logger.error(f"Upload failed: {result['error']}") + return result + + logger.info(f"File uploaded successfully: {file_name}") + return result + + except Exception as e: + logger.error(f"Error uploading file: {str(e)}") + return {"error": f"Error uploading file: {str(e)}"} + + async def download_file(self, site_id: str, file_id: str) -> Optional[bytes]: + """Download a file from SharePoint.""" + try: + endpoint = f"sites/{site_id}/drive/items/{file_id}/content" + + headers = {"Authorization": f"Bearer {self.access_token}"} + timeout = aiohttp.ClientTimeout(total=30) + + async with aiohttp.ClientSession(timeout=timeout) as session: + async with session.get(f"{self.base_url}/{endpoint}", headers=headers) as response: + if response.status == 200: + return await response.read() + else: + logger.error(f"Download failed: {response.status}") + return None + + except Exception as e: + logger.error(f"Error downloading file: {str(e)}") + return None + + async def list_folder_contents(self, site_id: str, folder_path: str = "") -> List[Dict[str, Any]]: + """List contents of a folder.""" + try: + if not folder_path or folder_path == "/": + endpoint = f"sites/{site_id}/drive/root/children" + else: + clean_path = folder_path.lstrip('/') + endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/children" + + result = await self._make_graph_api_call(endpoint) + + if "error" in result: + logger.warning(f"Failed to list folder contents: {result['error']}") + return None + + items = result.get("value", []) + processed_items = [] + + for item in items: + # Determine if it's a folder or file + is_folder = 'folder' in item + + item_info = { + "id": item.get("id"), + "name": item.get("name"), + "type": "folder" if is_folder else "file", + "size": item.get("size", 0), + "createdDateTime": item.get("createdDateTime"), + "lastModifiedDateTime": item.get("lastModifiedDateTime"), + "webUrl": item.get("webUrl") + } + + if "file" in item: + item_info["mimeType"] = item["file"].get("mimeType") + item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl") + + if "folder" in item: + item_info["childCount"] = item["folder"].get("childCount", 0) + + processed_items.append(item_info) + + return processed_items + + except Exception as e: + logger.error(f"Error listing folder contents: {str(e)}") + return [] + + async def search_files(self, site_id: str, query: str) -> List[Dict[str, Any]]: + """Search for files in a site.""" + try: + search_query = query.replace("'", "''") # Escape single quotes for OData + endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')" + + result = await self._make_graph_api_call(endpoint) + + if "error" in result: + logger.warning(f"Search failed: {result['error']}") + return [] + + items = result.get("value", []) + processed_items = [] + + for item in items: + is_folder = 'folder' in item + + item_info = { + "id": item.get("id"), + "name": item.get("name"), + "type": "folder" if is_folder else "file", + "size": item.get("size", 0), + "createdDateTime": item.get("createdDateTime"), + "lastModifiedDateTime": item.get("lastModifiedDateTime"), + "webUrl": item.get("webUrl"), + "parentPath": item.get("parentReference", {}).get("path", "") + } + + if "file" in item: + item_info["mimeType"] = item["file"].get("mimeType") + item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl") + + processed_items.append(item_info) + + return processed_items + + except Exception as e: + logger.error(f"Error searching files: {str(e)}") + return [] + + async def copy_file_async(self, site_id: str, source_folder: str, source_file: str, dest_folder: str, dest_file: str) -> None: + """Copy a file from source to destination folder (like original synchronizer).""" + try: + # First, download the source file + source_path = f"{source_folder}/{source_file}" + file_content = await self.download_file_by_path(site_id=site_id, file_path=source_path) + + if not file_content: + raise Exception(f"Failed to download source file: {source_path}") + + # Upload to destination + await self.upload_file( + site_id=site_id, + folder_path=dest_folder, + file_name=dest_file, + content=file_content ) + + logger.info(f"File copied: {source_file} -> {dest_file}") + + except Exception as e: + logger.error(f"Error copying file: {str(e)}") + raise + + async def download_file_by_path(self, site_id: str, file_path: str) -> Optional[bytes]: + """Download a file by its path within a site.""" + try: + # Clean the path + clean_path = file_path.strip('/') + endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/content" + + # Use direct HTTP call for file downloads (binary content) + headers = { + "Authorization": f"Bearer {self.access_token}", + } + + # Remove leading slash from endpoint to avoid double slash + clean_endpoint = endpoint.lstrip('/') + url = f"{self.base_url}/{clean_endpoint}" + logger.debug(f"Downloading file: GET {url}") + + timeout = aiohttp.ClientTimeout(total=30) + + async with aiohttp.ClientSession(timeout=timeout) as session: + async with session.get(url, headers=headers) as response: + if response.status == 200: + return await response.read() + else: + error_text = await response.text() + logger.error(f"File download failed: {response.status} - {error_text}") + return None + + except Exception as e: + logger.error(f"Error downloading file by path: {str(e)}") + return None - def read_file(self, *, folder_path: str, file_name: str) -> bytes: - """Read a file from SharePoint and return its content as bytes. - - Params: - folder_path: Folder path (server-relative) - file_name: File name - - Returns: - bytes: File content as bytes - """ - file_path = f"{folder_path.rstrip('/')}/{file_name}" - response = File.open_binary(self.ctx, file_path) - return response.content - - async def read_file_async(self, *, folder_path: str, file_name: str) -> bytes: - """Read a file from SharePoint and return its content as bytes (async version). - - Params: - folder_path: Folder path (server-relative) - file_name: File name - - Returns: - bytes: File content as bytes - """ - loop = asyncio.get_event_loop() - with ThreadPoolExecutor() as executor: - return await loop.run_in_executor( - executor, - lambda: self.read_file(folder_path=folder_path, file_name=file_name), - ) - - def overwrite_file( - self, *, folder_path: str, file_name: str, content: bytes - ) -> bool: - """Write content to a SharePoint file, overwriting if it exists. - - Params: - folder_path: Target folder path (server-relative) - file_name: Target file name - content: File content as bytes - - Returns: - bool: True if successful, False otherwise - """ - target_folder = self.ctx.web.get_folder_by_server_relative_url(folder_path) - target_folder.upload_file(file_name, content).execute_query() - return True - - async def overwrite_file_async( - self, *, folder_path: str, file_name: str, content: bytes - ) -> bool: - """Write content to a SharePoint file, overwriting if it exists (async version). - - Params: - folder_path: Target folder path (server-relative) - file_name: Target file name - content: File content as bytes - - Returns: - bool: True if successful, False otherwise - """ - loop = asyncio.get_event_loop() - with ThreadPoolExecutor() as executor: - return await loop.run_in_executor( - executor, - lambda: self.overwrite_file( - folder_path=folder_path, - file_name=file_name, - content=content, - ), - ) diff --git a/modules/interfaces/interfaceTicketObjects.py b/modules/interfaces/interfaceTicketObjects.py index 8d46e20f..991c9da0 100644 --- a/modules/interfaces/interfaceTicketObjects.py +++ b/modules/interfaces/interfaceTicketObjects.py @@ -6,8 +6,7 @@ from modules.shared.timezoneUtils import get_utc_now from modules.connectors.connectorSharepoint import ConnectorSharepoint -from modules.interfaces.interfaceTicketModel import TicketBase -from modules.interfaces.interfaceTicketModel import Task +from modules.interfaces.interfaceTicketModel import TicketBase, Task @dataclass(slots=True) @@ -19,6 +18,7 @@ class TicketSharepointSyncInterface: sync_file: str backup_folder: str audit_folder: str + site_id: str # Keep for compatibility but not used with REST API @classmethod async def create( @@ -30,6 +30,7 @@ class TicketSharepointSyncInterface: sync_file: str, backup_folder: str, audit_folder: str, + site_id: str, ) -> "TicketSharepointSyncInterface": return cls( connector_ticket=connector_ticket, @@ -39,6 +40,7 @@ class TicketSharepointSyncInterface: sync_file=sync_file, backup_folder=backup_folder, audit_folder=audit_folder, + site_id=site_id, ) async def create_backup(self): @@ -47,6 +49,7 @@ class TicketSharepointSyncInterface: backup_filename = f"backup_{timestamp}_{self.sync_file}" await self.connector_sharepoint.copy_file_async( + site_id=self.site_id, source_folder=self.sync_folder, source_file=self.sync_file, dest_folder=self.backup_folder, @@ -83,8 +86,10 @@ class TicketSharepointSyncInterface: try: timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S") jira_export_filename = f"jira_export_{timestamp}.csv" - jira_export_content = self._create_csv_content(jira_data) - await self.connector_sharepoint.overwrite_file_async( + # Use default headers for JIRA export + jira_export_content = self._create_csv_content(jira_data, {"header1": "JIRA Export", "header2": "Raw Data"}) + await self.connector_sharepoint.upload_file( + site_id=self.site_id, folder_path=self.audit_folder, file_name=jira_export_filename, content=jira_export_content, @@ -111,18 +116,35 @@ class TicketSharepointSyncInterface: audit_log.append("Step 5: Reading existing CSV file...") existing_data = [] existing_file_found = False + existing_headers = {"header1": "", "header2": ""} try: - csv_content = await self.connector_sharepoint.read_file_async( - folder_path=self.sync_folder, file_name=self.sync_file + file_path = f"{self.sync_folder}/{self.sync_file}" + csv_content = await self.connector_sharepoint.download_file_by_path( + site_id=self.site_id, file_path=file_path ) + + # Read the first two lines to get headers + csv_lines = csv_content.decode('utf-8').split('\n') + if len(csv_lines) >= 2: + # Store the raw first two lines as headers (preserving original formatting) + existing_headers["header1"] = csv_lines[0].rstrip('\r\n') + existing_headers["header2"] = csv_lines[1].rstrip('\r\n') + + # Try to read with robust CSV parsing (skip first 2 rows) df_existing = pd.read_csv( - BytesIO(csv_content), skiprows=2 - ) # Skip header rows + BytesIO(csv_content), + skiprows=2, + quoting=1, # QUOTE_ALL + escapechar='\\', + on_bad_lines='skip', # Skip malformed lines + engine='python' # More robust parsing + ) existing_data = df_existing.to_dict("records") existing_file_found = True audit_log.append( f"Existing CSV file found with {len(existing_data)} records" ) + audit_log.append(f"Preserved headers: Header1='{existing_headers['header1']}', Header2='{existing_headers['header2']}'") except Exception as e: audit_log.append(f"No existing CSV file found or read error: {str(e)}") audit_log.append("") @@ -149,8 +171,9 @@ class TicketSharepointSyncInterface: # 7. Create CSV with 4-row structure and write to SharePoint audit_log.append("Step 7: Writing updated CSV to SharePoint...") - csv_content = self._create_csv_content(merged_data) - await self.connector_sharepoint.overwrite_file_async( + csv_content = self._create_csv_content(merged_data, existing_headers) + await self.connector_sharepoint.upload_file( + site_id=self.site_id, folder_path=self.sync_folder, file_name=self.sync_file, content=csv_content, @@ -196,10 +219,19 @@ class TicketSharepointSyncInterface: # 1. Read CSV file from SharePoint audit_log.append("Step 1: Reading CSV file from SharePoint...") try: - csv_content = await self.connector_sharepoint.read_file_async( - folder_path=self.sync_folder, file_name=self.sync_file + file_path = f"{self.sync_folder}/{self.sync_file}" + csv_content = await self.connector_sharepoint.download_file_by_path( + site_id=self.site_id, file_path=file_path + ) + # Try to read with robust CSV parsing + df = pd.read_csv( + BytesIO(csv_content), + skiprows=2, + quoting=1, # QUOTE_ALL + escapechar='\\', + on_bad_lines='skip', # Skip malformed lines + engine='python' # More robust parsing ) - df = pd.read_csv(BytesIO(csv_content), skiprows=2) # Skip header rows csv_data = df.to_dict("records") audit_log.append( f"CSV file read successfully with {len(csv_data)} records" @@ -495,34 +527,71 @@ class TicketSharepointSyncInterface: # Convert audit log to bytes audit_content = "\n".join(audit_log).encode("utf-8") + # Debug logging + import logging + logger = logging.getLogger(__name__) + logger.debug(f"Writing audit log to folder: {self.audit_folder}, file: {audit_filename}") + # Write to SharePoint - await self.connector_sharepoint.overwrite_file_async( + await self.connector_sharepoint.upload_file( + site_id=self.site_id, folder_path=self.audit_folder, file_name=audit_filename, content=audit_content, ) + logger.debug("Audit log written successfully") except Exception as e: # If audit logging fails, we don't want to break the main sync process # Just log the error (this could be enhanced with fallback logging) import logging logger = logging.getLogger(__name__) logger.warning(f"Failed to write audit log: {str(e)}") + logger.warning(f"Audit folder: {self.audit_folder}") + logger.warning(f"Operation type: {operation_type}") + import traceback + logger.warning(f"Traceback: {traceback.format_exc()}") - def _create_csv_content(self, data: list[dict]) -> bytes: + def _create_csv_content(self, data: list[dict], existing_headers: dict = None) -> bytes: """Create CSV content with 4-row structure matching reference code.""" + # Get current timestamp for header + timestamp = get_utc_now().strftime("%Y-%m-%d %H:%M:%S UTC") + + # Use existing headers if provided, otherwise use defaults + if existing_headers is None: + existing_headers = {"header1": "Header 1", "header2": "Header 2"} + if not data: # Build an empty table with the expected columns from schema cols = list(self.task_sync_definition.keys()) df = pd.DataFrame(columns=cols) - # Row 1 & 2: keep your current banner lines - header_row1 = pd.DataFrame( - [["Header 1"] + [""] * (len(cols) - 1)], columns=cols - ) - header_row2 = pd.DataFrame( - [["Header 2"] + [""] * (len(cols) - 1)], columns=cols - ) + # Parse existing headers to extract individual columns + import csv as csv_module + header1_text = existing_headers.get("header1", "Header 1") + header2_text = existing_headers.get("header2", "Header 2") + + # Parse the existing header rows + header1_reader = csv_module.reader([header1_text]) + header2_reader = csv_module.reader([header2_text]) + header1_row = next(header1_reader, []) + header2_row = next(header2_reader, []) + + # Row 1: Use existing header1 or default + if len(header1_row) >= len(cols): + header_row1_data = header1_row[:len(cols)] + else: + header_row1_data = header1_row + [""] * (len(cols) - len(header1_row)) + header_row1 = pd.DataFrame([header_row1_data], columns=cols) + + # Row 2: Use existing header2 and add timestamp to second column + if len(header2_row) >= len(cols): + header_row2_data = header2_row[:len(cols)] + else: + header_row2_data = header2_row + [""] * (len(cols) - len(header2_row)) + if len(header_row2_data) > 1: + header_row2_data[1] = timestamp + header_row2 = pd.DataFrame([header_row2_data], columns=cols) # Row 3: table headers table_headers = pd.DataFrame([cols], columns=cols) @@ -531,7 +600,7 @@ class TicketSharepointSyncInterface: [header_row1, header_row2, table_headers, df], ignore_index=True ) csv_text = StringIO() - final_df.to_csv(csv_text, index=False, header=False) + final_df.to_csv(csv_text, index=False, header=False, quoting=1, escapechar='\\') return csv_text.getvalue().encode("utf-8") # Create DataFrame from data @@ -542,16 +611,38 @@ class TicketSharepointSyncInterface: df[column] = df[column].astype("object") df[column] = df[column].fillna("") - # Create the 4-row structure - # Row 1: Static header row 1 - header_row1 = pd.DataFrame( - [["Header 1"] + [""] * (len(df.columns) - 1)], columns=df.columns - ) + # Clean data: replace actual line breaks with \n and escape quotes + for column in df.columns: + df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False) + df[column] = df[column].str.replace('"', '""', regex=False) - # Row 2: Static header row 2 with strict compatibility - header_row2 = pd.DataFrame( - [["Header 2"] + [""] * (len(df.columns) - 1)], columns=df.columns - ) + # Create the 4-row structure + # Parse existing headers to extract individual columns + import csv as csv_module + header1_text = existing_headers.get("header1", "Header 1") + header2_text = existing_headers.get("header2", "Header 2") + + # Parse the existing header rows + header1_reader = csv_module.reader([header1_text]) + header2_reader = csv_module.reader([header2_text]) + header1_row = next(header1_reader, []) + header2_row = next(header2_reader, []) + + # Row 1: Use existing header1 or default + if len(header1_row) >= len(df.columns): + header_row1_data = header1_row[:len(df.columns)] + else: + header_row1_data = header1_row + [""] * (len(df.columns) - len(header1_row)) + header_row1 = pd.DataFrame([header_row1_data], columns=df.columns) + + # Row 2: Use existing header2 and add timestamp to second column + if len(header2_row) >= len(df.columns): + header_row2_data = header2_row[:len(df.columns)] + else: + header_row2_data = header2_row + [""] * (len(df.columns) - len(header2_row)) + if len(header_row2_data) > 1: + header_row2_data[1] = timestamp + header_row2 = pd.DataFrame([header_row2_data], columns=df.columns) # Row 3: Table headers (column names) table_headers = pd.DataFrame([df.columns.tolist()], columns=df.columns) @@ -561,7 +652,7 @@ class TicketSharepointSyncInterface: [header_row1, header_row2, table_headers, df], ignore_index=True ) - # Convert to CSV bytes (write text, then encode) + # Convert to CSV bytes with proper quoting for fields containing special characters csv_text = StringIO() - final_df.to_csv(csv_text, index=False, header=False) + final_df.to_csv(csv_text, index=False, header=False, quoting=1, escapechar='\\') return csv_text.getvalue().encode("utf-8") diff --git a/modules/routes/routeJira.py b/modules/routes/routeJira.py deleted file mode 100644 index 3e4038aa..00000000 --- a/modules/routes/routeJira.py +++ /dev/null @@ -1,141 +0,0 @@ -# Configure logger -import logging -from fastapi import APIRouter, FastAPI -from contextlib import asynccontextmanager -from zoneinfo import ZoneInfo - - -from modules.connectors.connectorTicketJira import ConnectorTicketJira -from modules.connectors.connectorSharepoint import ConnectorSharepoint -from modules.interfaces.interfaceTicketObjects import TicketSharepointSyncInterface - -from apscheduler.schedulers.asyncio import AsyncIOScheduler -from apscheduler.triggers.cron import CronTrigger - - -logger = logging.getLogger(__name__) - - -scheduler = AsyncIOScheduler(timezone=ZoneInfo("Europe/Zurich")) - - -@asynccontextmanager -async def router_lifespan(app: FastAPI): - # start scheduler when this router is mounted - scheduler.add_job( - perform_sync_jira_delta_group, - CronTrigger(minute="0"), # run at the top of every hour - id="jira_delta_group_sync", - replace_existing=True, - coalesce=True, - max_instances=1, - misfire_grace_time=1800, - ) - scheduler.start() - logger.info("APScheduler started (jira_delta_group_sync hourly)") - try: - yield - finally: - if scheduler.running: - scheduler.shutdown(wait=False) - logger.info("APScheduler stopped") - - -router = APIRouter( - prefix="/api/jira", - tags=["JIRA Sync"], - lifespan=router_lifespan, -) - - -@router.post("/sync/delta-group") -async def sync_jira_delta_group(): - """Endpoint to trigger JIRA-SharePoint sync for Delta Group project.""" - - logger.info("Received request to sync JIRA Delta Group project") - await perform_sync_jira_delta_group() - - # Return a response - return {"status": "Sync completed"} - - -async def perform_sync_jira_delta_group(): - logger.info("Syncing Jira issues for Delta Group...") - - # Sharepoint connection parameters - sharepoint_client_id = None - sharepoint_client_secret = None - sharepoint_site_url = None - - # Jira connection parameters - jira_username = "ONHOLD - TASK - p.motsch@valueon.ch" - jira_api_token = "ATATT3xFfGF0d973nNb3R1wTDI4lesmJfJAmooS-4cYMJTyLfwYv4himrE6yyCxyX3aSMfl34NHcm2fAXeFXrLHUzJx0RQVUBonCFnlgexjLQTgS5BoCbSO7dwAVjlcHZZkArHbooCUaRwJ15n6AHkm-nwdjLQ3Z74TFnKKUZC4uhuh3Aj-MuX8=2D7124FA" - jira_url = "https://deltasecurity.atlassian.net" - project_code = "DCS" - issue_type = "Task" - - # Basic validation (credentials will be added later) - if not all([sharepoint_client_id, sharepoint_client_secret, sharepoint_site_url]): - raise ValueError("SharePoint credentials not configured") - - if not all([jira_username, jira_api_token]): - raise ValueError("JIRA credentials not configured") - - # Define the task sync definition - task_sync_definition = { - # key=excel-header, [get:jira>excel | put: excel>jira, jira-xml-field-list] - "ID": ["get", ["key"]], - "Module Category": ["get", ["fields", "customfield_10058", "value"]], - "Summary": ["get", ["fields", "summary"]], - "Description": ["get", ["fields", "description"]], - "References": ["get", ["fields", "customfield_10066"]], - "Priority": ["get", ["fields", "priority", "name"]], - "Issue Status": ["get", ["fields", "customfield_10062"]], - "Assignee": ["get", ["fields", "assignee", "displayName"]], - "Issue Created": ["get", ["fields", "created"]], - "Due Date": ["get", ["fields", "duedate"]], - "DELTA Comments": ["get", ["fields", "customfield_10060"]], - "SELISE Ticket References": ["put", ["fields", "customfield_10067"]], - "SELISE Status Values": ["put", ["fields", "customfield_10065"]], - "SELISE Comments": ["put", ["fields", "customfield_10064"]], - } - - # SharePoint file configuration - sync_folder = "/sites//Shared Documents/TicketSync" - sync_file = "delta_group_selise_ticket_exchange_list.csv" - backup_folder = "/sites//Shared Documents/TicketSync/Backups" - audit_folder = "/sites//Shared Documents/TicketSync/AuditLogs" - - # Create the jira connector instance - jira_connector = await ConnectorTicketJira.create( - jira_username=jira_username, - jira_api_token=jira_api_token, - jira_url=jira_url, - project_code=project_code, - issue_type=issue_type, - ) - - # Create the sharepoint connector instance - ctx = ConnectorSharepoint.get_client_context_from_app( - site_url=sharepoint_site_url, - client_id=sharepoint_client_id, - client_secret=sharepoint_client_secret, - ) - sharepoint_connector = await ConnectorSharepoint.create(ctx=ctx) - - # Create the sync interface instance - sync_interface = await TicketSharepointSyncInterface.create( - connector_ticket=jira_connector, - connector_sharepoint=sharepoint_connector, - task_sync_definition=task_sync_definition, - sync_folder=sync_folder, - sync_file=sync_file, - backup_folder=backup_folder, - audit_folder=audit_folder, - ) - - # Sync from JIRA to CSV in Sharepoint - await sync_interface.sync_from_jira_to_csv() - - # Sync from CSV in Sharepoint to JIRA - await sync_interface.sync_from_csv_to_jira() diff --git a/modules/workflow/managerSyncDelta.py b/modules/workflow/managerSyncDelta.py new file mode 100644 index 00000000..e6a0ce56 --- /dev/null +++ b/modules/workflow/managerSyncDelta.py @@ -0,0 +1,231 @@ +""" +Delta Group JIRA-SharePoint Sync Manager + +This module handles the synchronization of JIRA tickets to SharePoint using the new +Graph API-based connector architecture. +""" + +import logging +import csv +import io +from datetime import datetime, UTC +from typing import Dict, Any, List, Optional +from modules.connectors.connectorSharepoint import ConnectorSharepoint +from modules.connectors.connectorTicketJira import ConnectorTicketJira +from modules.interfaces.interfaceAppObjects import getRootInterface +from modules.interfaces.interfaceAppModel import UserInDB +from modules.interfaces.interfaceTicketObjects import TicketSharepointSyncInterface +from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.configuration import APP_CONFIG + +logger = logging.getLogger(__name__) + +# Get environment type from configuration +APP_ENV_TYPE = APP_CONFIG.get("APP_ENV_TYPE", "dev") + + +class ManagerSyncDelta: + """Manages JIRA to SharePoint synchronization for Delta Group.""" + #SHAREPOINT_SITE_ID = "02830618-4029-4dc8-8d3d-f5168f282249" + #SHAREPOINT_SITE_NAME = "SteeringBPM" + #SHAREPOINT_MAIN_FOLDER = "/sites/SteeringBPM/Freigegebene Dokumente/General/50 Docs hosted by SELISE" + #SHAREPOINT_BACKUP_FOLDER = "/sites/SteeringBPM/Freigegebene Dokumente/General/50 Docs hosted by SELISE/SyncHistory" + #SHAREPOINT_AUDIT_FOLDER = "/sites/SteeringBPM/Freigegebene Dokumente/General/50 Docs hosted by SELISE/SyncHistory" + + # SharePoint site constants using hostname + site path (resolve real site ID at runtime) + SHAREPOINT_HOSTNAME = "pcuster.sharepoint.com" + SHAREPOINT_SITE_PATH = "KM.DELTAG.20968511411" + SHAREPOINT_SITE_NAME = "KM.DELTAG.20968511411" + # Drive-relative (document library) paths, not server-relative "/sites/..." + # Note: Default library name is "Shared Documents" in Graph + SHAREPOINT_MAIN_FOLDER = "1_Arbeitsbereich" + SHAREPOINT_BACKUP_FOLDER = "1_Arbeitsbereich/SyncHistory" + SHAREPOINT_AUDIT_FOLDER = "1_Arbeitsbereich/SyncHistory" + + # Fixed filename for the main CSV file (like original synchronizer) + SYNC_FILE_NAME = "DELTAgroup x SELISE Ticket Exchange List.csv" + + # JIRA connection parameters (hardcoded for Delta Group) + JIRA_USERNAME = "p.motsch@valueon.ch" + JIRA_API_TOKEN = "ATATT3xFfGF0d973nNb3R1wTDI4lesmJfJAmooS-4cYMJTyLfwYv4himrE6yyCxyX3aSMfl34NHcm2fAXeFXrLHUzJx0RQVUBonCFnlgexjLQTgS5BoCbSO7dwAVjlcHZZkArHbooCUaRwJ15n6AHkm-nwdjLQ3Z74TFnKKUZC4uhuh3Aj-MuX8=2D7124FA" + JIRA_URL = "https://deltasecurity.atlassian.net" + JIRA_PROJECT_CODE = "DCS" + JIRA_ISSUE_TYPE = "Task" + + # Task sync definition for field mapping (like original synchronizer) + TASK_SYNC_DEFINITION = { + "ID": ["get", ["key"]], + "Summary": ["get", ["fields", "summary"]], + "Status": ["get", ["fields", "status", "name"]], + "Assignee": ["get", ["fields", "assignee", "displayName"]], + "Reporter": ["get", ["fields", "reporter", "displayName"]], + "Created": ["get", ["fields", "created"]], + "Updated": ["get", ["fields", "updated"]], + "Priority": ["get", ["fields", "priority", "name"]], + "IssueType": ["get", ["fields", "issuetype", "name"]], + "Project": ["get", ["fields", "project", "name"]], + "Description": ["get", ["fields", "description"]], + } + + def __init__(self): + """Initialize the sync manager with hardcoded Delta Group credentials.""" + self.root_interface = getRootInterface() + self.jira_connector = None + self.sharepoint_connector = None + self.target_site = None + + async def initialize_connectors(self) -> bool: + """Initialize JIRA and SharePoint connectors.""" + try: + logger.info("Initializing JIRA connector with hardcoded credentials") + + # Initialize JIRA connector using class constants + self.jira_connector = await ConnectorTicketJira.create( + jira_username=self.JIRA_USERNAME, + jira_api_token=self.JIRA_API_TOKEN, + jira_url=self.JIRA_URL, + project_code=self.JIRA_PROJECT_CODE, + issue_type=self.JIRA_ISSUE_TYPE + ) + + # Use the current logged-in user from root interface + activeUser = self.root_interface.currentUser + if not activeUser: + logger.error("No current user available - SharePoint connection required") + return False + + logger.info(f"Using current user for SharePoint: {activeUser.id}") + + # Get SharePoint connection for this user + user_connections = self.root_interface.getUserConnections(activeUser.id) + sharepoint_connection = None + + for connection in user_connections: + if connection.authority == "msft": + sharepoint_connection = connection + break + + if not sharepoint_connection: + logger.error("No SharePoint connection found for Delta Group user") + return False + + logger.info(f"Found SharePoint connection: {sharepoint_connection.id}") + + # Get SharePoint token for this connection + sharepoint_token = self.root_interface.getConnectionToken(sharepoint_connection.id) + if not sharepoint_token: + logger.error("No SharePoint token found for Delta Group user connection") + return False + + logger.info(f"Found SharePoint token: {sharepoint_token.id}") + + # Initialize SharePoint connector with Graph API + self.sharepoint_connector = ConnectorSharepoint(access_token=sharepoint_token.tokenAccess) + + # Resolve the site by hostname + site path to get the real site ID + logger.info( + f"Resolving site ID via hostname+path: {self.SHAREPOINT_HOSTNAME}:/sites/{self.SHAREPOINT_SITE_PATH}" + ) + resolved = await self.sharepoint_connector.find_site_by_url( + hostname=self.SHAREPOINT_HOSTNAME, + site_path=self.SHAREPOINT_SITE_PATH + ) + + if not resolved: + logger.error( + f"Failed to resolve site. Hostname: {self.SHAREPOINT_HOSTNAME}, Path: {self.SHAREPOINT_SITE_PATH}" + ) + return False + + self.target_site = { + "id": resolved.get("id"), + "displayName": resolved.get("displayName", self.SHAREPOINT_SITE_NAME), + "name": resolved.get("name", self.SHAREPOINT_SITE_NAME) + } + + # Test site access by listing root of the drive + logger.info("Testing site access using resolved site ID...") + test_result = await self.sharepoint_connector.list_folder_contents( + site_id=self.target_site["id"], + folder_path="" + ) + + if test_result is not None: + logger.info( + f"Site access confirmed: {self.target_site['displayName']} (ID: {self.target_site['id']})" + ) + else: + logger.error("Could not access site drive - check permissions") + return False + + return True + + except Exception as e: + logger.error(f"Error initializing connectors: {str(e)}") + return False + + async def sync_jira_to_sharepoint(self) -> bool: + """Perform the main JIRA to SharePoint synchronization using sophisticated sync logic.""" + try: + logger.info("Starting JIRA to SharePoint synchronization") + + # Initialize connectors + if not await self.initialize_connectors(): + logger.error("Failed to initialize connectors") + return False + + # Create the sophisticated sync interface + sync_interface = await TicketSharepointSyncInterface.create( + connector_ticket=self.jira_connector, + connector_sharepoint=self.sharepoint_connector, + task_sync_definition=self.TASK_SYNC_DEFINITION, + sync_folder=self.SHAREPOINT_MAIN_FOLDER, + sync_file=self.SYNC_FILE_NAME, + backup_folder=self.SHAREPOINT_BACKUP_FOLDER, + audit_folder=self.SHAREPOINT_AUDIT_FOLDER, + site_id=self.target_site['id'] + ) + + # Perform the sophisticated sync + logger.info("Performing sophisticated JIRA to CSV sync...") + await sync_interface.sync_from_jira_to_csv() + + logger.info("JIRA to SharePoint synchronization completed successfully") + return True + + except Exception as e: + logger.error(f"Error during JIRA to SharePoint synchronization: {str(e)}") + return False + + + +# Global sync function for use in app.py +async def perform_sync_jira_delta_group() -> bool: + """Perform JIRA to SharePoint synchronization for Delta Group. + + This function is called by the scheduler and can be used independently. + + Returns: + bool: True if synchronization was successful, False otherwise + """ + try: + if APP_ENV_TYPE != "prod": + logger.info("JIRA to SharePoint synchronization: TASK to run only in PROD") + return True + + logger.info("Starting Delta Group JIRA sync...") + + + sync_manager = ManagerSyncDelta() + success = await sync_manager.sync_jira_to_sharepoint() + + if success: + logger.info("Delta Group JIRA sync completed successfully") + else: + logger.error("Delta Group JIRA sync failed") + + return success + + except Exception as e: + logger.error(f"Error in perform_sync_jira_delta_group: {str(e)}") + return False From ed236da4ca3cc366b9a40bfe3f13746a52b37373 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 9 Sep 2025 14:32:07 +0200 Subject: [PATCH 26/27] updated publish keys --- .github/workflows/int_gateway-int.yml | 4 + ...gateway-prod.yml => main_gateway-prod.yml} | 6 +- .github/workflows/main_poweron-gateway.yml | 75 ------------------- 3 files changed, 7 insertions(+), 78 deletions(-) rename .github/workflows/{main_poweron-gateway-prod.yml => main_gateway-prod.yml} (92%) delete mode 100644 .github/workflows/main_poweron-gateway.yml diff --git a/.github/workflows/int_gateway-int.yml b/.github/workflows/int_gateway-int.yml index 2a7da645..ba0fe2e2 100644 --- a/.github/workflows/int_gateway-int.yml +++ b/.github/workflows/int_gateway-int.yml @@ -1,3 +1,7 @@ +# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy +# More GitHub Actions for Azure: https://github.com/Azure/actions +# More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions + name: Build and deploy Python app to Azure Web App - gateway-int on: diff --git a/.github/workflows/main_poweron-gateway-prod.yml b/.github/workflows/main_gateway-prod.yml similarity index 92% rename from .github/workflows/main_poweron-gateway-prod.yml rename to .github/workflows/main_gateway-prod.yml index 9dc86510..09e7c1f5 100644 --- a/.github/workflows/main_poweron-gateway-prod.yml +++ b/.github/workflows/main_gateway-prod.yml @@ -2,7 +2,7 @@ # More GitHub Actions for Azure: https://github.com/Azure/actions # More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions -name: Build and deploy Python app to Azure Web App - poweron-gateway-prod +name: Build and deploy Python app to Azure Web App - gateway-prod on: push: @@ -70,6 +70,6 @@ jobs: uses: azure/webapps-deploy@v3 id: deploy-to-webapp with: - app-name: 'poweron-gateway-prod' + app-name: 'gateway_prod' slot-name: 'Production' - publish-profile: ${{ secrets.AZUREAPPSERVICE_PUBLISHPROFILE_A0393566625E447EAD8EB1C489BA06A2 }} \ No newline at end of file + publish-profile: ${{ secrets.AZUREAPPSERVICE_PUBLISHPROFILE_GATEWAY_PROD }} \ No newline at end of file diff --git a/.github/workflows/main_poweron-gateway.yml b/.github/workflows/main_poweron-gateway.yml deleted file mode 100644 index a385dc98..00000000 --- a/.github/workflows/main_poweron-gateway.yml +++ /dev/null @@ -1,75 +0,0 @@ -# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy -# More GitHub Actions for Azure: https://github.com/Azure/actions -# More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions - -name: Build and deploy Python app to Azure Web App - poweron-gateway - -on: - push: - branches: - - main - workflow_dispatch: - -jobs: - build: - runs-on: ubuntu-latest - permissions: - contents: read #This is required for actions/checkout - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python version - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Create and start virtual environment - run: | - python -m venv venv - source venv/bin/activate - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt --no-cache-dir - - # Optional: Add step to run tests here (PyTest, Django test suites, etc.) - - - name: Zip artifact for deployment - run: zip release.zip ./* -r - - - name: Upload artifact for deployment jobs - uses: actions/upload-artifact@v4 - with: - name: python-app - path: | - release.zip - !venv/ - - deploy: - runs-on: ubuntu-latest - needs: build - environment: - name: 'Production' - url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} - - steps: - - name: Download artifact from build job - uses: actions/download-artifact@v4 - with: - name: python-app - - - name: Unzip artifact for deployment - run: unzip release.zip - - - name: Set productive environment - run: cp env_prod.env .env - - - name: 'Deploy to Azure Web App' - uses: azure/webapps-deploy@v3 - id: deploy-to-webapp - with: - app-name: 'poweron-gateway' - slot-name: 'Production' - publish-profile: ${{ secrets.AZUREAPPSERVICE_PUBLISHPROFILE_A0393566625E447EAD8EB1C489BA06A2 }} \ No newline at end of file From 3be01f4da562be6a77d5707ad142eef2ab9caef1 Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Tue, 9 Sep 2025 14:38:35 +0200 Subject: [PATCH 27/27] delta jira sync on hold bis account ready --- modules/workflow/managerSyncDelta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/workflow/managerSyncDelta.py b/modules/workflow/managerSyncDelta.py index e6a0ce56..b66a7488 100644 --- a/modules/workflow/managerSyncDelta.py +++ b/modules/workflow/managerSyncDelta.py @@ -209,7 +209,7 @@ async def perform_sync_jira_delta_group() -> bool: bool: True if synchronization was successful, False otherwise """ try: - if APP_ENV_TYPE != "prod": + if APP_ENV_TYPE != "TASK-ACTIVATE-WHEN-ACCOUNT-READY-prod": logger.info("JIRA to SharePoint synchronization: TASK to run only in PROD") return True