diff --git a/.github/workflows/int_poweron-gateway-int.yml b/.github/workflows/int_gateway-int.yml similarity index 76% rename from .github/workflows/int_poweron-gateway-int.yml rename to .github/workflows/int_gateway-int.yml index d092bada..ba0fe2e2 100644 --- a/.github/workflows/int_poweron-gateway-int.yml +++ b/.github/workflows/int_gateway-int.yml @@ -2,7 +2,7 @@ # More GitHub Actions for Azure: https://github.com/Azure/actions # More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions -name: Build and deploy Python app to Azure Web App - poweron-gateway-int +name: Build and deploy Python app to Azure Web App - gateway-int on: push: @@ -51,7 +51,7 @@ jobs: runs-on: ubuntu-latest needs: build environment: - name: 'Integration' + name: 'Production' url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} steps: @@ -65,16 +65,11 @@ jobs: - name: Set productive environment run: cp env_int.env .env - - - name: Login to Azure - uses: azure/login@v1 - with: - creds: '{"clientId":"${{ secrets.AZURE_CLIENT_ID }}","clientSecret":"${{ secrets.AZURE_CLIENT_SECRET }}","subscriptionId":"${{ secrets.AZURE_SUBSCRIPTION_ID }}","tenantId":"${{ secrets.AZURE_TENANT_ID }}"}' - - - name: Deploy to Azure Web App - uses: azure/webapps-deploy@v2 + + - name: 'Deploy to Azure Web App' + uses: azure/webapps-deploy@v3 id: deploy-to-webapp with: - app-name: 'poweron-gateway-int' + app-name: 'gateway-int' slot-name: 'Production' - package: . + publish-profile: ${{ secrets.AZUREAPPSERVICE_PUBLISHPROFILE_GATEWAY_INT }} \ No newline at end of file diff --git a/.github/workflows/main_poweron-gateway.yml b/.github/workflows/main_gateway-prod.yml similarity index 89% rename from .github/workflows/main_poweron-gateway.yml rename to .github/workflows/main_gateway-prod.yml index a385dc98..09e7c1f5 100644 --- a/.github/workflows/main_poweron-gateway.yml +++ b/.github/workflows/main_gateway-prod.yml @@ -1,75 +1,75 @@ -# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy -# More GitHub Actions for Azure: https://github.com/Azure/actions -# More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions - -name: Build and deploy Python app to Azure Web App - poweron-gateway - -on: - push: - branches: - - main - workflow_dispatch: - -jobs: - build: - runs-on: ubuntu-latest - permissions: - contents: read #This is required for actions/checkout - - steps: - - uses: actions/checkout@v4 - - - name: Set up Python version - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Create and start virtual environment - run: | - python -m venv venv - source venv/bin/activate - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt --no-cache-dir - - # Optional: Add step to run tests here (PyTest, Django test suites, etc.) - - - name: Zip artifact for deployment - run: zip release.zip ./* -r - - - name: Upload artifact for deployment jobs - uses: actions/upload-artifact@v4 - with: - name: python-app - path: | - release.zip - !venv/ - - deploy: - runs-on: ubuntu-latest - needs: build - environment: - name: 'Production' - url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} - - steps: - - name: Download artifact from build job - uses: actions/download-artifact@v4 - with: - name: python-app - - - name: Unzip artifact for deployment - run: unzip release.zip - - - name: Set productive environment - run: cp env_prod.env .env - - - name: 'Deploy to Azure Web App' - uses: azure/webapps-deploy@v3 - id: deploy-to-webapp - with: - app-name: 'poweron-gateway' - slot-name: 'Production' - publish-profile: ${{ secrets.AZUREAPPSERVICE_PUBLISHPROFILE_A0393566625E447EAD8EB1C489BA06A2 }} \ No newline at end of file +# Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy +# More GitHub Actions for Azure: https://github.com/Azure/actions +# More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions + +name: Build and deploy Python app to Azure Web App - gateway-prod + +on: + push: + branches: + - main + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + permissions: + contents: read #This is required for actions/checkout + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python version + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Create and start virtual environment + run: | + python -m venv venv + source venv/bin/activate + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt --no-cache-dir + + # Optional: Add step to run tests here (PyTest, Django test suites, etc.) + + - name: Zip artifact for deployment + run: zip release.zip ./* -r + + - name: Upload artifact for deployment jobs + uses: actions/upload-artifact@v4 + with: + name: python-app + path: | + release.zip + !venv/ + + deploy: + runs-on: ubuntu-latest + needs: build + environment: + name: 'Production' + url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} + + steps: + - name: Download artifact from build job + uses: actions/download-artifact@v4 + with: + name: python-app + + - name: Unzip artifact for deployment + run: unzip release.zip + + - name: Set productive environment + run: cp env_prod.env .env + + - name: 'Deploy to Azure Web App' + uses: azure/webapps-deploy@v3 + id: deploy-to-webapp + with: + app-name: 'gateway_prod' + slot-name: 'Production' + publish-profile: ${{ secrets.AZUREAPPSERVICE_PUBLISHPROFILE_GATEWAY_PROD }} \ No newline at end of file diff --git a/app.py b/app.py index 4740357b..282775ad 100644 --- a/app.py +++ b/app.py @@ -4,6 +4,7 @@ os.environ["NUMEXPR_MAX_THREADS"] = "12" from fastapi import FastAPI, HTTPException, Depends, Body, status, Response from fastapi.middleware.cors import CORSMiddleware from contextlib import asynccontextmanager +from zoneinfo import ZoneInfo import logging from logging.handlers import RotatingFileHandler @@ -11,6 +12,8 @@ from datetime import timedelta import pathlib from modules.shared.configuration import APP_CONFIG +from apscheduler.schedulers.asyncio import AsyncIOScheduler +from apscheduler.triggers.cron import CronTrigger def initLogging(): """Initialize logging with configuration from APP_CONFIG""" @@ -63,10 +66,11 @@ def initLogging(): class EmojiFilter(logging.Filter): def filter(self, record): if isinstance(record.msg, str): - # Remove emojis and other Unicode characters that might cause encoding issues + # Remove only emojis, preserve other Unicode characters like quotes import re - # Remove emojis and other Unicode symbols - record.msg = re.sub(r'[^\x00-\x7F]+', '[EMOJI]', record.msg) + import unicodedata + # Remove emoji characters specifically + record.msg = ''.join(char for char in record.msg if unicodedata.category(char) != 'So' or not (0x1F600 <= ord(char) <= 0x1F64F or 0x1F300 <= ord(char) <= 0x1F5FF or 0x1F680 <= ord(char) <= 0x1F6FF or 0x1F1E0 <= ord(char) <= 0x1F1FF or 0x2600 <= ord(char) <= 0x26FF or 0x2700 <= ord(char) <= 0x27BF)) return True # Configure handlers based on config @@ -146,10 +150,43 @@ async def lifespan(app: FastAPI): from modules.interfaces.interfaceAppObjects import getRootInterface getRootInterface() + # Setup APScheduler for JIRA sync + scheduler = AsyncIOScheduler(timezone=ZoneInfo("Europe/Zurich")) + try: + from modules.workflow.managerSyncDelta import perform_sync_jira_delta_group + # Schedule hourly sync at minute 0 + scheduler.add_job( + perform_sync_jira_delta_group, + CronTrigger(minute="0"), + id="jira_delta_group_sync", + replace_existing=True, + coalesce=True, + max_instances=1, + misfire_grace_time=1800, + ) + scheduler.start() + logger.info("APScheduler started (jira_delta_group_sync hourly)") + + # Run initial sync on startup (non-blocking failure) + try: + logger.info("Running initial JIRA sync on app startup...") + await perform_sync_jira_delta_group() + logger.info("Initial JIRA sync completed successfully") + except Exception as e: + logger.error(f"Initial JIRA sync failed: {str(e)}") + except Exception as e: + logger.error(f"Failed to initialize scheduler or JIRA sync: {str(e)}") + yield # Shutdown logic logger.info("Application has been shut down") + try: + if 'scheduler' in locals() and scheduler.running: + scheduler.shutdown(wait=False) + logger.info("APScheduler stopped") + except Exception as e: + logger.error(f"Error shutting down scheduler: {str(e)}") # START APP app = FastAPI( diff --git a/env_dev.env b/env_dev.env new file mode 100644 index 00000000..29c59f98 --- /dev/null +++ b/env_dev.env @@ -0,0 +1,69 @@ +# Development Environment Configuration + +# System Configuration +APP_ENV_TYPE = dev +APP_ENV_LABEL = Development Instance Patrick +APP_API_URL = http://localhost:8000 + +# Database Configuration for Application +# JSON File Storage (current) +# DB_APP_HOST=D:/Temp/_powerondb +# DB_APP_DATABASE=app +# DB_APP_USER=dev_user +# DB_APP_PASSWORD_SECRET=dev_password + +# PostgreSQL Storage (new) +DB_APP_HOST=localhost +DB_APP_DATABASE=poweron_app_dev +DB_APP_USER=poweron_dev +DB_APP_PASSWORD_SECRET=dev_password +DB_APP_PORT=5432 + +# Database Configuration Chat +# JSON File Storage (current) +# DB_CHAT_HOST=D:/Temp/_powerondb +# DB_CHAT_DATABASE=chat +# DB_CHAT_USER=dev_user +# DB_CHAT_PASSWORD_SECRET=dev_password + +# PostgreSQL Storage (new) +DB_CHAT_HOST=localhost +DB_CHAT_DATABASE=poweron_chat_dev +DB_CHAT_USER=poweron_dev +DB_CHAT_PASSWORD_SECRET=dev_password +DB_CHAT_PORT=5432 + +# Database Configuration Management +# JSON File Storage (current) +# DB_MANAGEMENT_HOST=D:/Temp/_powerondb +# DB_MANAGEMENT_DATABASE=management +# DB_MANAGEMENT_USER=dev_user +# DB_MANAGEMENT_PASSWORD_SECRET=dev_password + +# PostgreSQL Storage (new) +DB_MANAGEMENT_HOST=localhost +DB_MANAGEMENT_DATABASE=poweron_management_dev +DB_MANAGEMENT_USER=poweron_dev +DB_MANAGEMENT_PASSWORD_SECRET=dev_password +DB_MANAGEMENT_PORT=5432 + +# Security Configuration +APP_JWT_SECRET_SECRET=dev_jwt_secret_token +APP_TOKEN_EXPIRY=300 + +# CORS Configuration +APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net + +# Logging configuration +APP_LOGGING_LOG_LEVEL = DEBUG +APP_LOGGING_LOG_FILE = poweron.log +APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s +APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S +APP_LOGGING_CONSOLE_ENABLED = True +APP_LOGGING_FILE_ENABLED = True +APP_LOGGING_ROTATION_SIZE = 10485760 +APP_LOGGING_BACKUP_COUNT = 5 + +# Service Redirects +Service_MSFT_REDIRECT_URI = http://localhost:8000/api/msft/auth/callback +Service_GOOGLE_REDIRECT_URI = http://localhost:8000/api/google/auth/callback \ No newline at end of file diff --git a/env_int.env b/env_int.env index 30d1afd1..7b6cca3d 100644 --- a/env_int.env +++ b/env_int.env @@ -5,23 +5,26 @@ APP_ENV_TYPE = int APP_ENV_LABEL = Integration Instance APP_API_URL = https://gateway-int.poweron-center.net -# Database Configuration Application -DB_APP_HOST=/home/_powerondb -DB_APP_DATABASE=app -DB_APP_USER=dev_user -DB_APP_PASSWORD_SECRET=dev_password +# PostgreSQL Storage (new) +DB_APP_HOST=gateway-int-server.postgres.database.azure.com +DB_APP_DATABASE=poweron_app_int +DB_APP_USER=heeshkdlby +DB_APP_PASSWORD_SECRET=VkAjgECESbEVQ$Tu +DB_APP_PORT=5432 -# Database Configuration Chat -DB_CHAT_HOST=/home/_powerondb -DB_CHAT_DATABASE=chat -DB_CHAT_USER=dev_user -DB_CHAT_PASSWORD_SECRET=dev_password +# PostgreSQL Storage (new) +DB_CHAT_HOST=gateway-int-server.postgres.database.azure.com +DB_CHAT_DATABASE=poweron_chat_int +DB_CHAT_USER=heeshkdlby +DB_CHAT_PASSWORD_SECRET=VkAjgECESbEVQ$Tu +DB_CHAT_PORT=5432 -# Database Configuration Management -DB_MANAGEMENT_HOST=/home/_powerondb -DB_MANAGEMENT_DATABASE=management -DB_MANAGEMENT_USER=dev_user -DB_MANAGEMENT_PASSWORD_SECRET=dev_password +# PostgreSQL Storage (new) +DB_MANAGEMENT_HOST=gateway-int-server.postgres.database.azure.com +DB_MANAGEMENT_DATABASE=poweron_management_int +DB_MANAGEMENT_USER=heeshkdlby +DB_MANAGEMENT_PASSWORD_SECRET=VkAjgECESbEVQ$Tu +DB_MANAGEMENT_PORT=5432 # Security Configuration APP_JWT_SECRET_SECRET=dev_jwt_secret_token diff --git a/env_prod.env b/env_prod.env index 8415cb8c..f9dff9f9 100644 --- a/env_prod.env +++ b/env_prod.env @@ -6,22 +6,46 @@ APP_ENV_LABEL = Production Instance APP_API_URL = https://gateway.poweron-center.net # Database Configuration Application -DB_APP_HOST=/home/_powerondb -DB_APP_DATABASE=app -DB_APP_USER=dev_user -DB_APP_PASSWORD_SECRET=dev_password +# JSON File Storage (current) +# DB_APP_HOST=/home/_powerondb +# DB_APP_DATABASE=app +# DB_APP_USER=dev_user +# DB_APP_PASSWORD_SECRET=dev_password + +# PostgreSQL Storage (new) +DB_APP_HOST=gateway-prod-server.postgres.database.azure.com +DB_APP_DATABASE=gateway-app +DB_APP_USER=gzxxmcrdhn +DB_APP_PASSWORD_SECRET=prod_password_very_secure.2025 +DB_APP_PORT=5432 # Database Configuration Chat -DB_CHAT_HOST=/home/_powerondb -DB_CHAT_DATABASE=chat -DB_CHAT_USER=dev_user -DB_CHAT_PASSWORD_SECRET=dev_password +# JSON File Storage (current) +# DB_CHAT_HOST=/home/_powerondb +# DB_CHAT_DATABASE=chat +# DB_CHAT_USER=gzxxmcrdhn +# DB_CHAT_PASSWORD_SECRET=dev_password + +# PostgreSQL Storage (new) +DB_CHAT_HOST=gateway-prod-server.postgres.database.azure.com +DB_CHAT_DATABASE=gateway-chat +DB_CHAT_USER=gzxxmcrdhn +DB_CHAT_PASSWORD_SECRET=prod_password_very_secure.2025 +DB_CHAT_PORT=5432 # Database Configuration Management -DB_MANAGEMENT_HOST=/home/_powerondb -DB_MANAGEMENT_DATABASE=management -DB_MANAGEMENT_USER=dev_user -DB_MANAGEMENT_PASSWORD_SECRET=dev_password +# JSON File Storage (current) +# DB_MANAGEMENT_HOST=/home/_powerondb +# DB_MANAGEMENT_DATABASE=gateway-management +# DB_MANAGEMENT_USER=gzxxmcrdhn +# DB_MANAGEMENT_PASSWORD_SECRET=dev_password + +# PostgreSQL Storage (new) +DB_MANAGEMENT_HOST=gateway-prod-server.postgres.database.azure.com +DB_MANAGEMENT_DATABASE=gateway-management +DB_MANAGEMENT_USER=poweron_prod +DB_MANAGEMENT_PASSWORD_SECRET=prod_password_very_secure.2025 +DB_MANAGEMENT_PORT=5432 # Security Configuration APP_JWT_SECRET_SECRET=dev_jwt_secret_token diff --git a/modules/chat/documents/documentGeneration.py b/modules/chat/documents/documentGeneration.py index dfe10918..a5a9ae59 100644 --- a/modules/chat/documents/documentGeneration.py +++ b/modules/chat/documents/documentGeneration.py @@ -66,7 +66,7 @@ class DocumentGenerator: logger.error(f"Error processing single document: {str(e)}") return None - def createDocumentsFromActionResult(self, action_result, action, workflow) -> List[Any]: + def createDocumentsFromActionResult(self, action_result, action, workflow, message_id=None) -> List[Any]: """ Create actual document objects from action result and store them in the system. Returns a list of created document objects with proper workflow context. @@ -103,7 +103,8 @@ class DocumentGenerator: fileName=document_name, mimeType=mime_type, content=content, - base64encoded=False + base64encoded=False, + messageId=message_id ) if document: # Set workflow context on the document if possible diff --git a/modules/chat/handling/handlingTasks.py b/modules/chat/handling/handlingTasks.py index feb7b335..49d0b97c 100644 --- a/modules/chat/handling/handlingTasks.py +++ b/modules/chat/handling/handlingTasks.py @@ -109,9 +109,6 @@ class HandlingTasks: logger.info("=== TASK PLANNING PROMPT SENT TO AI ===") logger.info(f"User Input: {userInput}") logger.info(f"Available Documents: {available_docs}") - logger.info("=== FULL TASK PLANNING PROMPT ===") - logger.info(task_planning_prompt) - logger.info("=== END TASK PLANNING PROMPT ===") prompt = await self.service.callAiTextAdvanced(task_planning_prompt) @@ -250,7 +247,7 @@ class HandlingTasks: "taskProgress": "pending" } - message = self.chatInterface.createWorkflowMessage(message_data) + message = self.chatInterface.createMessage(message_data) if message: workflow.messages.append(message) @@ -492,7 +489,7 @@ class HandlingTasks: if task_step.userMessage: task_start_message["message"] += f"\n\n💬 {task_step.userMessage}" - message = self.chatInterface.createWorkflowMessage(task_start_message) + message = self.chatInterface.createMessage(task_start_message) if message: workflow.messages.append(message) logger.info(f"Task start message created for task {task_index}") @@ -569,7 +566,7 @@ class HandlingTasks: "actionNumber": action_number }) - message = self.chatInterface.createWorkflowMessage(action_start_message) + message = self.chatInterface.createMessage(action_start_message) if message: workflow.messages.append(message) logger.info(f"Action start message created for action {action_number}") @@ -623,7 +620,7 @@ class HandlingTasks: "taskProgress": "success" } - message = self.chatInterface.createWorkflowMessage(task_completion_message) + message = self.chatInterface.createMessage(task_completion_message) if message: workflow.messages.append(message) logger.info(f"Task completion message created for task {task_index}") @@ -715,7 +712,7 @@ class HandlingTasks: "taskProgress": "retry" } - message = self.chatInterface.createWorkflowMessage(retry_message) + message = self.chatInterface.createMessage(retry_message) if message: workflow.messages.append(message) @@ -768,7 +765,7 @@ class HandlingTasks: } try: - message = self.chatInterface.createWorkflowMessage(message_data) + message = self.chatInterface.createMessage(message_data) if message: workflow.messages.append(message) logger.info(f"Created user-facing retry message for failed task: {task_step.objective}") @@ -822,7 +819,7 @@ class HandlingTasks: } try: - message = self.chatInterface.createWorkflowMessage(message_data) + message = self.chatInterface.createMessage(message_data) if message: workflow.messages.append(message) logger.info(f"Created user-facing error message for failed task: {task_step.objective}") @@ -1030,8 +1027,11 @@ class HandlingTasks: if "execParameters" not in actionData: actionData["execParameters"] = {} + # Use generic field separation based on TaskAction model + simple_fields, object_fields = self.chatInterface._separate_object_fields(TaskAction, actionData) + # Create action in database - createdAction = self.chatInterface.db.recordCreate("taskActions", actionData) + createdAction = self.chatInterface.db.recordCreate(TaskAction, simple_fields) # Convert to TaskAction model return TaskAction( @@ -1098,7 +1098,6 @@ class HandlingTasks: # Process documents from the action result created_documents = [] if result.success: - created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow) action.setSuccess() # Extract result text from documents if available, otherwise use empty string action.result = "" @@ -1115,7 +1114,17 @@ class HandlingTasks: logger.warning(f"Action {action.execMethod}.{action.execAction} has no execResultLabel set") # Always use the action's execResultLabel for message creation to ensure proper document routing message_result_label = action.execResultLabel - await self.createActionMessage(action, result, workflow, message_result_label, created_documents, task_step, task_index) + + # Create message first to get messageId, then create documents with messageId + message = await self.createActionMessage(action, result, workflow, message_result_label, [], task_step, task_index) + if message: + # Now create documents with the messageId + created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow, message.id) + # Update the message with the created documents + if created_documents: + message.documents = created_documents + # Update the message in the database + self.chatInterface.updateMessage(message.id, {"documents": [doc.dict() for doc in created_documents]}) # Log action results logger.info(f"Action completed successfully") @@ -1138,10 +1147,10 @@ class HandlingTasks: logger.error(f"Action failed: {result.error}") # ⚠️ IMPORTANT: Create error message for failed actions so user can see what went wrong - await self.createActionMessage(action, result, workflow, result_label, [], task_step, task_index) + message = await self.createActionMessage(action, result, workflow, result_label, [], task_step, task_index) # Create database log entry for action failure - self.chatInterface.createWorkflowLog({ + self.chatInterface.createLog({ "workflowId": workflow.id, "message": f"❌ **Task {task_num}**\n\n❌ **Action {action_num}/{total_actions}** failed: {result.error}", "type": "error" @@ -1237,14 +1246,17 @@ class HandlingTasks: logger.info(f"Creating ERROR message: {message_text}") logger.info(f"Message data: {message_data}") - message = self.chatInterface.createWorkflowMessage(message_data) + message = self.chatInterface.createMessage(message_data) if message: workflow.messages.append(message) logger.info(f"Message created: {action.execMethod}.{action.execAction}") + return message else: logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}") + return None except Exception as e: logger.error(f"Error creating action message: {str(e)}") + return None # --- Helper validation methods --- diff --git a/modules/chat/serviceCenter.py b/modules/chat/serviceCenter.py index cef1555b..9160a3ae 100644 --- a/modules/chat/serviceCenter.py +++ b/modules/chat/serviceCenter.py @@ -920,7 +920,7 @@ Please provide a comprehensive summary of this conversation.""" logger.error(f"Error during document access recovery for {document.id}: {str(e)}") return False - def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True) -> ChatDocument: + def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> ChatDocument: """Create document with file in one step - handles file creation internally""" # Convert content to bytes based on base64 flag if base64encoded: @@ -948,6 +948,7 @@ Please provide a comprehensive summary of this conversation.""" # Create document with all file attributes copied document = ChatDocument( id=str(uuid.uuid4()), + messageId=messageId or "", # Use provided messageId or empty string as fallback fileId=file_item.id, fileName=file_info.get("fileName", fileName), fileSize=file_info.get("size", 0), @@ -1060,7 +1061,7 @@ Please provide a comprehensive summary of this conversation.""" logger.error(f"Error executing method {methodName}.{actionName}: {str(e)}") raise - async def processFileIds(self, fileIds: List[str]) -> List[ChatDocument]: + async def processFileIds(self, fileIds: List[str], messageId: str = None) -> List[ChatDocument]: """Process file IDs from existing files and return ChatDocument objects""" documents = [] for fileId in fileIds: @@ -1071,6 +1072,7 @@ Please provide a comprehensive summary of this conversation.""" # Create document directly with all file attributes document = ChatDocument( id=str(uuid.uuid4()), + messageId=messageId or "", # Use provided messageId or empty string as fallback fileId=fileId, fileName=fileInfo.get("fileName", "unknown"), fileSize=fileInfo.get("size", 0), diff --git a/modules/connectors/connectorDbJson.py b/modules/connectors/connectorDbJson.py index 5ecb88dd..b0b6b586 100644 --- a/modules/connectors/connectorDbJson.py +++ b/modules/connectors/connectorDbJson.py @@ -33,9 +33,11 @@ class DatabaseConnector: # Set userId (default to empty string if None) self.userId = userId if userId is not None else "" - # Ensure the database directory exists + # Initialize database system + self.initDbSystem() + + # Set up database folder path self.dbFolder = os.path.join(self.dbHost, self.dbDatabase) - os.makedirs(self.dbFolder, exist_ok=True) # Cache for loaded data self._tablesCache: Dict[str, List[Dict[str, Any]]] = {} @@ -52,6 +54,17 @@ class DatabaseConnector: logger.debug(f"Context: userId={self.userId}") + def initDbSystem(self): + """Initialize the database system - creates necessary directories and structure.""" + try: + # Ensure the database directory exists + self.dbFolder = os.path.join(self.dbHost, self.dbDatabase) + os.makedirs(self.dbFolder, exist_ok=True) + logger.info(f"Database system initialized: {self.dbFolder}") + except Exception as e: + logger.error(f"Error initializing database system: {e}") + raise + def _initializeSystemTable(self): """Initializes the system table if it doesn't exist yet.""" systemTablePath = self._getTablePath(self._systemTableName) @@ -131,6 +144,11 @@ class DatabaseConnector: return lock + def _get_table_lock(self, table: str, timeout_seconds: int = 30): + """Get table-level lock for metadata operations""" + table_lock_key = f"table_{table}" + return self._get_file_lock(table_lock_key, timeout_seconds) + def _ensureTableDirectory(self, table: str) -> bool: """Ensures the table directory exists.""" if table == self._systemTableName: @@ -145,7 +163,9 @@ class DatabaseConnector: return False def _loadTableMetadata(self, table: str) -> Dict[str, Any]: - """Loads table metadata (list of record IDs) without loading actual records.""" + """Loads table metadata (list of record IDs) without loading actual records. + NOTE: This method is safe to call without additional locking. + """ if table in self._tableMetadataCache: return self._tableMetadataCache[table] @@ -159,7 +179,7 @@ class DatabaseConnector: try: if os.path.exists(tablePath): for fileName in os.listdir(tablePath): - if fileName.endswith('.json'): + if fileName.endswith('.json') and fileName != '_metadata.json': recordId = fileName[:-5] # Remove .json extension metadata["recordIds"].append(recordId) @@ -183,17 +203,23 @@ class DatabaseConnector: return None def _saveRecord(self, table: str, recordId: str, record: Dict[str, Any]) -> bool: - """Saves a single record to the table.""" + """Saves a single record to the table with atomic metadata operations.""" recordPath = self._getRecordPath(table, recordId) - lock = self._get_file_lock(recordPath) + record_lock = self._get_file_lock(recordPath) + table_lock = self._get_table_lock(table) try: - # Acquire lock with timeout - if not lock.acquire(timeout=30): # 30 second timeout - raise TimeoutError(f"Could not acquire lock for {recordPath} within 30 seconds") + # Acquire both locks with timeout - record lock first, then table lock + if not record_lock.acquire(timeout=30): + raise TimeoutError(f"Could not acquire record lock for {recordPath} within 30 seconds") + + if not table_lock.acquire(timeout=30): + record_lock.release() + raise TimeoutError(f"Could not acquire table lock for {table} within 30 seconds") # Record lock acquisition time self._lock_timeouts[recordPath] = time.time() + self._lock_timeouts[f"table_{table}"] = time.time() # Ensure table directory exists if not self._ensureTableDirectory(table): @@ -239,14 +265,14 @@ class DatabaseConnector: # Atomic move from temp to final location os.replace(tempPath, recordPath) - # Update metadata + # ATOMIC: Update metadata while holding both locks metadata = self._loadTableMetadata(table) if recordId not in metadata["recordIds"]: metadata["recordIds"].append(recordId) metadata["recordIds"].sort() self._saveTableMetadata(table, metadata) - # Update cache if it exists + # Update cache if it exists (also protected by table lock) if table in self._tablesCache: # Find and update existing record or append new one found = False @@ -272,14 +298,22 @@ class DatabaseConnector: return False finally: - # ALWAYS release lock, even on error + # ALWAYS release both locks, even on error try: - if lock.locked(): - lock.release() + if table_lock.locked(): + table_lock.release() + if f"table_{table}" in self._lock_timeouts: + del self._lock_timeouts[f"table_{table}"] + except Exception as release_error: + logger.error(f"Error releasing table lock for {table}: {release_error}") + + try: + if record_lock.locked(): + record_lock.release() if recordPath in self._lock_timeouts: del self._lock_timeouts[recordPath] except Exception as release_error: - logger.error(f"Error releasing lock for {recordPath}: {release_error}") + logger.error(f"Error releasing record lock for {recordPath}: {release_error}") def _loadTable(self, table: str) -> List[Dict[str, Any]]: """Loads all records from a table folder.""" @@ -403,40 +437,21 @@ class DatabaseConnector: def _saveTableMetadata(self, table: str, metadata: Dict[str, Any]) -> bool: - """Saves table metadata to a metadata file.""" + """Saves table metadata to a metadata file. + NOTE: This method assumes the caller already holds the table lock. + """ try: # Create metadata file path metadataPath = os.path.join(self._getTablePath(table), "_metadata.json") - # Get lock for metadata file - lock = self._get_file_lock(metadataPath) + # Save metadata (caller should already hold table lock) + with open(metadataPath, 'w', encoding='utf-8') as f: + json.dump(metadata, f, indent=2, ensure_ascii=False) - try: - # Acquire lock with timeout - if not lock.acquire(timeout=30): - raise TimeoutError(f"Could not acquire lock for metadata {metadataPath} within 30 seconds") - - # Record lock acquisition time - self._lock_timeouts[metadataPath] = time.time() - - # Save metadata - with open(metadataPath, 'w', encoding='utf-8') as f: - json.dump(metadata, f, indent=2, ensure_ascii=False) - - # Update cache - self._tableMetadataCache[table] = metadata - - return True - - finally: - # ALWAYS release lock - try: - if lock.locked(): - lock.release() - if metadataPath in self._lock_timeouts: - del self._lock_timeouts[metadataPath] - except Exception as release_error: - logger.error(f"Error releasing metadata lock for {metadataPath}: {release_error}") + # Update cache + self._tableMetadataCache[table] = metadata + + return True except Exception as e: logger.error(f"Error saving metadata for table {table}: {e}") @@ -582,42 +597,82 @@ class DatabaseConnector: return existingRecord def recordDelete(self, table: str, recordId: str) -> bool: - """Deletes a record from the table.""" - # Load metadata - metadata = self._loadTableMetadata(table) - - if recordId not in metadata["recordIds"]: - return False - - # Check if it's an initial record - initialId = self.getInitialId(table) - if initialId is not None and initialId == recordId: - self._removeInitialId(table) - logger.info(f"Initial ID {recordId} for table {table} has been removed from the system table") - - # Delete the record file + """Deletes a record from the table with atomic metadata operations.""" recordPath = self._getRecordPath(table, recordId) + record_lock = self._get_file_lock(recordPath) + table_lock = self._get_table_lock(table) + try: + # Acquire both locks with timeout - record lock first, then table lock + if not record_lock.acquire(timeout=30): + raise TimeoutError(f"Could not acquire record lock for {recordPath} within 30 seconds") + + if not table_lock.acquire(timeout=30): + record_lock.release() + raise TimeoutError(f"Could not acquire table lock for {table} within 30 seconds") + + # Record lock acquisition time + self._lock_timeouts[recordPath] = time.time() + self._lock_timeouts[f"table_{table}"] = time.time() + + # Load metadata + metadata = self._loadTableMetadata(table) + + if recordId not in metadata["recordIds"]: + return False + + # Check if it's an initial record + initialId = self.getInitialId(table) + if initialId is not None and initialId == recordId: + self._removeInitialId(table) + logger.info(f"Initial ID {recordId} for table {table} has been removed from the system table") + + # Delete the record file if os.path.exists(recordPath): os.remove(recordPath) - # Update metadata cache + # ATOMIC: Update metadata while holding both locks metadata["recordIds"].remove(recordId) - self._tableMetadataCache[table] = metadata + self._saveTableMetadata(table, metadata) - # Update table cache if it exists + # Update table cache if it exists (also protected by table lock) if table in self._tablesCache: self._tablesCache[table] = [r for r in self._tablesCache[table] if r.get("id") != recordId] return True + else: + return False + except Exception as e: - logger.error(f"Error deleting record file {recordPath}: {e}") + logger.error(f"Error deleting record {recordId} from table {table}: {e}") return False - - return False + + finally: + # ALWAYS release both locks, even on error + try: + if table_lock.locked(): + table_lock.release() + if f"table_{table}" in self._lock_timeouts: + del self._lock_timeouts[f"table_{table}"] + except Exception as release_error: + logger.error(f"Error releasing table lock for {table}: {release_error}") + + try: + if record_lock.locked(): + record_lock.release() + if recordPath in self._lock_timeouts: + del self._lock_timeouts[recordPath] + except Exception as release_error: + logger.error(f"Error releasing record lock for {recordPath}: {release_error}") - def getInitialId(self, table: str) -> Optional[str]: + def getInitialId(self, table_or_model) -> Optional[str]: """Returns the initial ID for a table.""" + # Handle both string table names (legacy) and model classes (new) + if isinstance(table_or_model, str): + table = table_or_model + else: + table = table_or_model.__name__ + systemData = self._loadSystemTable() initialId = systemData.get(table) logger.debug(f"Initial ID for table '{table}': {initialId}") diff --git a/modules/connectors/connectorDbPostgre.py b/modules/connectors/connectorDbPostgre.py new file mode 100644 index 00000000..dfee166a --- /dev/null +++ b/modules/connectors/connectorDbPostgre.py @@ -0,0 +1,845 @@ +import psycopg2 +import psycopg2.extras +import json +import os +import logging +from typing import List, Dict, Any, Optional, Union, get_origin, get_args +from datetime import datetime +import uuid +from pydantic import BaseModel +import threading +import time + +from modules.shared.attributeUtils import to_dict +from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.configuration import APP_CONFIG +from modules.interfaces.interfaceAppModel import SystemTable + +logger = logging.getLogger(__name__) + +# No mapping needed - table name = Pydantic model name exactly + +def _get_model_fields(model_class) -> Dict[str, str]: + """Get all fields from Pydantic model and map to SQL types.""" + if not hasattr(model_class, '__fields__'): + return {} + + fields = {} + for field_name, field_info in model_class.__fields__.items(): + field_type = field_info.type_ + + # Check for JSONB fields (Dict, List, or complex types) + if (field_type == dict or + field_type == list or + (hasattr(field_type, '__origin__') and field_type.__origin__ in (dict, list)) or + field_name in ['execParameters', 'expectedDocumentFormats', 'resultDocuments', 'logs', 'messages', 'stats', 'tasks']): + fields[field_name] = 'JSONB' + # Simple type mapping + elif field_type in (str, type(None)) or (get_origin(field_type) is Union and type(None) in get_args(field_type)): + fields[field_name] = 'TEXT' + elif field_type == int: + fields[field_name] = 'INTEGER' + elif field_type == float: + fields[field_name] = 'DOUBLE PRECISION' + elif field_type == bool: + fields[field_name] = 'BOOLEAN' + else: + fields[field_name] = 'TEXT' # Default to TEXT + + return fields + +# No caching needed with proper database + +class DatabaseConnector: + """ + A connector for PostgreSQL-based data storage. + Provides generic database operations without user/mandate filtering. + Uses PostgreSQL with JSONB columns for flexible data storage. + """ + def __init__(self, dbHost: str, dbDatabase: str, dbUser: str = None, dbPassword: str = None, dbPort: int = None, userId: str = None): + # Store the input parameters + self.dbHost = dbHost + self.dbDatabase = dbDatabase + self.dbUser = dbUser + self.dbPassword = dbPassword + self.dbPort = dbPort + + # Set userId (default to empty string if None) + self.userId = userId if userId is not None else "" + + # Initialize database system first (creates database if needed) + self.connection = None + self.initDbSystem() + + # No caching needed with proper database - PostgreSQL handles performance + + # Thread safety + self._lock = threading.Lock() + + # Initialize system table + self._systemTableName = "_system" + self._initializeSystemTable() + + + def initDbSystem(self): + """Initialize the database system - creates database and tables.""" + try: + # Create database if it doesn't exist + self._create_database_if_not_exists() + + # Create tables + self._create_tables() + + # Establish connection to the database + self._connect() + + logger.info("PostgreSQL database system initialized successfully") + except Exception as e: + logger.error(f"FATAL ERROR: Database system initialization failed: {e}") + raise + + def _create_database_if_not_exists(self): + """Create the database if it doesn't exist.""" + try: + # Use the configured user for database creation + conn = psycopg2.connect( + host=self.dbHost, + port=self.dbPort, + database="postgres", + user=self.dbUser, + password=self.dbPassword, + client_encoding='utf8' + ) + conn.autocommit = True + + with conn.cursor() as cursor: + # Check if database exists + cursor.execute("SELECT 1 FROM pg_database WHERE datname = %s", (self.dbDatabase,)) + exists = cursor.fetchone() + + if not exists: + # Create database + cursor.execute(f"CREATE DATABASE {self.dbDatabase}") + logger.info(f"Created database: {self.dbDatabase}") + else: + logger.info(f"Database {self.dbDatabase} already exists") + + conn.close() + + except Exception as e: + logger.error(f"FATAL ERROR: Cannot create database: {e}") + logger.error("Database connection failed - application cannot start") + raise RuntimeError(f"FATAL ERROR: Cannot create database '{self.dbDatabase}': {e}") + + + def _create_tables(self): + """Create only the system table - application tables are created by interfaces.""" + try: + # Use the configured user for table creation + conn = psycopg2.connect( + host=self.dbHost, + port=self.dbPort, + database=self.dbDatabase, + user=self.dbUser, + password=self.dbPassword, + client_encoding='utf8' + ) + conn.autocommit = True + + with conn.cursor() as cursor: + # Create only the system table + cursor.execute(""" + CREATE TABLE IF NOT EXISTS _system ( + id SERIAL PRIMARY KEY, + table_name VARCHAR(255) UNIQUE NOT NULL, + initial_id VARCHAR(255) NOT NULL, + _createdAt DOUBLE PRECISION, + _modifiedAt DOUBLE PRECISION + ) + """) + + logger.info("System table created successfully") + + conn.close() + + except Exception as e: + logger.error(f"FATAL ERROR: Cannot create system table: {e}") + logger.error("Database system table creation failed - application cannot start") + raise RuntimeError(f"FATAL ERROR: Cannot create system table: {e}") + + def _connect(self): + """Establish connection to PostgreSQL database.""" + try: + # Use configured user for main connection with proper parameter handling + self.connection = psycopg2.connect( + host=self.dbHost, + port=self.dbPort, + database=self.dbDatabase, + user=self.dbUser, + password=self.dbPassword, + client_encoding='utf8', + cursor_factory=psycopg2.extras.RealDictCursor + ) + self.connection.autocommit = False # Use transactions + logger.info(f"Connected to PostgreSQL database: {self.dbDatabase}") + except Exception as e: + logger.error(f"Failed to connect to PostgreSQL: {e}") + raise + + def _ensure_connection(self): + """Ensure database connection is alive, reconnect if necessary.""" + try: + if self.connection is None or self.connection.closed: + self._connect() + else: + # Test connection with a simple query + with self.connection.cursor() as cursor: + cursor.execute("SELECT 1") + except Exception as e: + logger.warning(f"Connection lost, reconnecting: {e}") + self._connect() + + def _initializeSystemTable(self): + """Initializes the system table if it doesn't exist yet.""" + try: + # First ensure the system table exists + self._ensureTableExists(SystemTable) + + with self.connection.cursor() as cursor: + # Check if system table has any data + cursor.execute('SELECT COUNT(*) FROM "_system"') + row = cursor.fetchone() + count = row['count'] if row else 0 + + self.connection.commit() + except Exception as e: + logger.error(f"Error initializing system table: {e}") + self.connection.rollback() + raise + + def _loadSystemTable(self) -> Dict[str, str]: + """Loads the system table with the initial IDs.""" + try: + with self.connection.cursor() as cursor: + cursor.execute('SELECT "table_name", "initial_id" FROM "_system"') + rows = cursor.fetchall() + + system_data = {} + for row in rows: + system_data[row['table_name']] = row['initial_id'] + + return system_data + except Exception as e: + logger.error(f"Error loading system table: {e}") + return {} + + def _saveSystemTable(self, data: Dict[str, str]) -> bool: + """Saves the system table with the initial IDs.""" + try: + with self.connection.cursor() as cursor: + # Clear existing data + cursor.execute('DELETE FROM "_system"') + + # Insert new data + for table_name, initial_id in data.items(): + cursor.execute(""" + INSERT INTO "_system" ("table_name", "initial_id", "_modifiedAt") + VALUES (%s, %s, %s) + """, (table_name, initial_id, get_utc_timestamp())) + + self.connection.commit() + return True + except Exception as e: + logger.error(f"Error saving system table: {e}") + self.connection.rollback() + return False + + def _ensureSystemTableExists(self) -> bool: + """Ensures the system table exists, creates it if it doesn't.""" + try: + self._ensure_connection() + + with self.connection.cursor() as cursor: + # Check if system table exists + cursor.execute("SELECT COUNT(*) FROM pg_stat_user_tables WHERE relname = %s", (self._systemTableName,)) + exists = cursor.fetchone()['count'] > 0 + + if not exists: + # Create system table + cursor.execute(f""" + CREATE TABLE "{self._systemTableName}" ( + "table_name" VARCHAR(255) PRIMARY KEY, + "initial_id" VARCHAR(255), + "_createdAt" DOUBLE PRECISION, + "_modifiedAt" DOUBLE PRECISION + ) + """) + logger.info("System table created successfully") + else: + # Check if we need to add missing columns to existing table + cursor.execute(""" + SELECT column_name FROM information_schema.columns + WHERE table_name = %s AND table_schema = 'public' + """, (self._systemTableName,)) + existing_columns = [row['column_name'] for row in cursor.fetchall()] + + if '_modifiedAt' not in existing_columns: + cursor.execute(f'ALTER TABLE "{self._systemTableName}" ADD COLUMN "_modifiedAt" DOUBLE PRECISION') + logger.info("Added _modifiedAt column to existing system table") + + + return True + except Exception as e: + logger.error(f"Error ensuring system table exists: {e}") + return False + + def _ensureTableExists(self, model_class: type) -> bool: + """Ensures a table exists, creates it if it doesn't.""" + table = model_class.__name__ + + if table == "SystemTable": + # Handle system table specially - it uses _system as the actual table name + return self._ensureSystemTableExists() + + try: + self._ensure_connection() + + with self.connection.cursor() as cursor: + # Check if table exists by querying information_schema with case-insensitive search + cursor.execute(''' + SELECT COUNT(*) FROM information_schema.tables + WHERE LOWER(table_name) = LOWER(%s) AND table_schema = 'public' + ''', (table,)) + exists = cursor.fetchone()['count'] > 0 + + if not exists: + # Create table from Pydantic model + self._create_table_from_model(cursor, table, model_class) + logger.info(f"Created table '{table}' with columns from Pydantic model") + + self.connection.commit() + return True + except Exception as e: + logger.error(f"Error ensuring table {table} exists: {e}") + if hasattr(self, 'connection') and self.connection: + self.connection.rollback() + return False + + + def _create_table_from_model(self, cursor, table: str, model_class: type) -> None: + """Create table with columns matching Pydantic model fields.""" + fields = _get_model_fields(model_class) + + # Build column definitions with quoted identifiers to preserve exact case + columns = ['"id" VARCHAR(255) PRIMARY KEY'] + for field_name, sql_type in fields.items(): + if field_name != 'id': # Skip id, already defined + columns.append(f'"{field_name}" {sql_type}') + + # Add metadata columns + columns.extend([ + '"_createdAt" DOUBLE PRECISION', + '"_modifiedAt" DOUBLE PRECISION', + '"_createdBy" VARCHAR(255)', + '"_modifiedBy" VARCHAR(255)' + ]) + + # Create table + sql = f'CREATE TABLE IF NOT EXISTS "{table}" ({", ".join(columns)})' + cursor.execute(sql) + + # Create indexes for foreign keys + for field_name in fields: + if field_name.endswith('Id') and field_name != 'id': + cursor.execute(f'CREATE INDEX IF NOT EXISTS "idx_{table}_{field_name}" ON "{table}" ("{field_name}")') + + + def _save_record(self, cursor, table: str, recordId: str, record: Dict[str, Any], model_class: type) -> None: + """Save record to normalized table with explicit columns.""" + # Get columns from Pydantic model instead of database schema + fields = _get_model_fields(model_class) + columns = ['id'] + [field for field in fields.keys() if field != 'id'] + ['_createdAt', '_createdBy', '_modifiedAt', '_modifiedBy'] + + + if not columns: + logger.error(f"No columns found for table {table}") + return + + # Filter record data to only include columns that exist in the table + filtered_record = {k: v for k, v in record.items() if k in columns} + + # Ensure id is set + filtered_record['id'] = recordId + + # Prepare values in the correct order + values = [] + for col in columns: + value = filtered_record.get(col) + + # Handle timestamp fields - store as Unix timestamps (floats) for consistency + if col in ['_createdAt', '_modifiedAt'] and value is not None: + if isinstance(value, str): + # Try to parse string as timestamp + try: + value = float(value) + except: + pass # Keep as string if parsing fails + + # Convert enum values to their string representation + elif hasattr(value, 'value'): + value = value.value + + # Handle JSONB fields - ensure proper JSON format for PostgreSQL + elif col in fields and fields[col] == 'JSONB' and value is not None: + import json + if isinstance(value, (dict, list)): + # Convert Python objects to JSON string for PostgreSQL JSONB + value = json.dumps(value) + elif isinstance(value, str): + # Validate that it's valid JSON, if not, try to parse and re-serialize + try: + # Test if it's already valid JSON + json.loads(value) + # If successful, keep as is + pass + except (json.JSONDecodeError, TypeError): + # If not valid JSON, convert to JSON string + value = json.dumps(value) + else: + # Convert other types to JSON + value = json.dumps(value) + + values.append(value) + + + # Build INSERT/UPDATE with quoted identifiers + col_names = ', '.join([f'"{col}"' for col in columns]) + placeholders = ', '.join(['%s'] * len(columns)) + updates = ', '.join([f'"{col}" = EXCLUDED."{col}"' for col in columns[1:] if col not in ['_createdAt', '_createdBy']]) + + sql = f'INSERT INTO "{table}" ({col_names}) VALUES ({placeholders}) ON CONFLICT ("id") DO UPDATE SET {updates}' + + cursor.execute(sql, values) + + def _loadRecord(self, model_class: type, recordId: str) -> Optional[Dict[str, Any]]: + """Loads a single record from the normalized table.""" + table = model_class.__name__ + + try: + if not self._ensureTableExists(model_class): + return None + + with self.connection.cursor() as cursor: + cursor.execute(f'SELECT * FROM "{table}" WHERE "id" = %s', (recordId,)) + row = cursor.fetchone() + if not row: + return None + + # Convert row to dict and handle JSONB fields + record = dict(row) + fields = _get_model_fields(model_class) + + + # Parse JSONB fields back to Python objects + for field_name, field_type in fields.items(): + if field_type == 'JSONB' and field_name in record and record[field_name] is not None: + import json + try: + if isinstance(record[field_name], str): + # Parse JSON string back to Python object + record[field_name] = json.loads(record[field_name]) + elif isinstance(record[field_name], (dict, list)): + # Already a Python object, keep as is + pass + else: + # Try to parse as JSON + record[field_name] = json.loads(str(record[field_name])) + except (json.JSONDecodeError, TypeError, ValueError): + # If parsing fails, keep as string + logger.warning(f"Could not parse JSONB field {field_name}, keeping as string: {record[field_name]}") + pass + + return record + except Exception as e: + logger.error(f"Error loading record {recordId} from table {table}: {e}") + return None + + def _saveRecord(self, model_class: type, recordId: str, record: Dict[str, Any]) -> bool: + """Saves a single record to the table.""" + table = model_class.__name__ + + try: + if not self._ensureTableExists(model_class): + return False + + recordId = str(recordId) + if "id" in record and str(record["id"]) != recordId: + raise ValueError(f"Record ID mismatch: {recordId} != {record['id']}") + + # Add metadata + currentTime = get_utc_timestamp() + if "_createdAt" not in record: + record["_createdAt"] = currentTime + record["_createdBy"] = self.userId + record["_modifiedAt"] = currentTime + record["_modifiedBy"] = self.userId + + with self.connection.cursor() as cursor: + self._save_record(cursor, table, recordId, record, model_class) + + self.connection.commit() + return True + except Exception as e: + logger.error(f"Error saving record {recordId} to table {table}: {e}") + self.connection.rollback() + return False + + def _loadTable(self, model_class: type) -> List[Dict[str, Any]]: + """Loads all records from a normalized table.""" + table = model_class.__name__ + + if table == self._systemTableName: + return self._loadSystemTable() + + try: + if not self._ensureTableExists(model_class): + return [] + + with self.connection.cursor() as cursor: + cursor.execute(f'SELECT * FROM "{table}" ORDER BY "id"') + records = [dict(row) for row in cursor.fetchall()] + + # Handle JSONB fields for all records + fields = _get_model_fields(model_class) + for record in records: + for field_name, field_type in fields.items(): + if field_type == 'JSONB' and field_name in record: + if record[field_name] is None: + # Convert None to appropriate default based on field name + if field_name in ['logs', 'messages', 'tasks', 'expectedDocumentFormats', 'resultDocuments']: + record[field_name] = [] + elif field_name in ['execParameters', 'stats']: + record[field_name] = {} + else: + record[field_name] = None + else: + import json + try: + if isinstance(record[field_name], str): + # Parse JSON string back to Python object + record[field_name] = json.loads(record[field_name]) + elif isinstance(record[field_name], (dict, list)): + # Already a Python object, keep as is + pass + else: + # Try to parse as JSON + record[field_name] = json.loads(str(record[field_name])) + except (json.JSONDecodeError, TypeError, ValueError): + # If parsing fails, keep as string + logger.warning(f"Could not parse JSONB field {field_name}, keeping as string: {record[field_name]}") + pass + + return records + except Exception as e: + logger.error(f"Error loading table {table}: {e}") + return [] + + + + def _registerInitialId(self, table: str, initialId: str) -> bool: + """Registers the initial ID for a table.""" + try: + systemData = self._loadSystemTable() + + if table not in systemData: + systemData[table] = initialId + success = self._saveSystemTable(systemData) + if success: + logger.info(f"Initial ID {initialId} for table {table} registered") + return success + else: + # Check if the existing initial ID still exists in the table + existingInitialId = systemData[table] + records = self.getRecordset(model_class, recordFilter={"id": existingInitialId}) + if not records: + # The initial record no longer exists, update to the new one + systemData[table] = initialId + success = self._saveSystemTable(systemData) + if success: + logger.info(f"Initial ID updated from {existingInitialId} to {initialId} for table {table}") + return success + else: + return True + except Exception as e: + logger.error(f"Error registering the initial ID for table {table}: {e}") + return False + + def _removeInitialId(self, table: str) -> bool: + """Removes the initial ID for a table from the system table.""" + try: + systemData = self._loadSystemTable() + + if table in systemData: + del systemData[table] + success = self._saveSystemTable(systemData) + if success: + logger.info(f"Initial ID for table {table} removed from system table") + return success + return True # If not present, this is not an error + except Exception as e: + logger.error(f"Error removing initial ID for table {table}: {e}") + return False + + def updateContext(self, userId: str) -> None: + """Updates the context of the database connector.""" + if userId is None: + raise ValueError("userId must be provided") + + self.userId = userId + logger.info(f"Updated database context: userId={self.userId}") + + # No cache to clear - database handles data consistency + + def clearTableCache(self, model_class: type) -> None: + """No-op: Database handles data consistency automatically.""" + # No caching with proper database - PostgreSQL handles consistency + pass + + # Public API + + def getTables(self) -> List[str]: + """Returns a list of all available tables.""" + tables = [] + + try: + with self.connection.cursor() as cursor: + cursor.execute(""" + SELECT table_name + FROM information_schema.tables + WHERE table_schema = 'public' + AND table_name NOT LIKE '_%' + ORDER BY table_name + """) + rows = cursor.fetchall() + tables = [row['table_name'] for row in rows] + except Exception as e: + logger.error(f"Error reading the database: {e}") + + return tables + + def getFields(self, model_class: type) -> List[str]: + """Returns a list of all fields in a table.""" + data = self._loadTable(model_class) + + if not data: + return [] + + fields = list(data[0].keys()) if data else [] + + return fields + + def getSchema(self, model_class: type, language: str = None) -> Dict[str, Dict[str, Any]]: + """Returns a schema object for a table with data types and labels.""" + data = self._loadTable(model_class) + + schema = {} + + if not data: + return schema + + firstRecord = data[0] + + for field, value in firstRecord.items(): + dataType = type(value).__name__ + label = field + + schema[field] = { + "type": dataType, + "label": label + } + + return schema + + def getRecordset(self, model_class: type, fieldFilter: List[str] = None, recordFilter: Dict[str, Any] = None) -> List[Dict[str, Any]]: + """Returns a list of records from a table, filtered by criteria.""" + table = model_class.__name__ + + try: + if not self._ensureTableExists(model_class): + return [] + + # Build WHERE clause from recordFilter + where_conditions = [] + where_values = [] + + if recordFilter: + for field, value in recordFilter.items(): + where_conditions.append(f'"{field}" = %s') + where_values.append(value) + + # Build the query + if where_conditions: + where_clause = " WHERE " + " AND ".join(where_conditions) + else: + where_clause = "" + + query = f'SELECT * FROM "{table}"{where_clause} ORDER BY "id"' + + with self.connection.cursor() as cursor: + cursor.execute(query, where_values) + records = [dict(row) for row in cursor.fetchall()] + + # Handle JSONB fields for all records + fields = _get_model_fields(model_class) + for record in records: + for field_name, field_type in fields.items(): + if field_type == 'JSONB' and field_name in record: + if record[field_name] is None: + # Convert None to appropriate default based on field name + if field_name in ['logs', 'messages', 'tasks', 'expectedDocumentFormats', 'resultDocuments']: + record[field_name] = [] + elif field_name in ['execParameters', 'stats']: + record[field_name] = {} + else: + record[field_name] = None + else: + import json + try: + if isinstance(record[field_name], str): + # Parse JSON string back to Python object + record[field_name] = json.loads(record[field_name]) + elif isinstance(record[field_name], (dict, list)): + # Already a Python object, keep as is + pass + else: + # Try to parse as JSON + record[field_name] = json.loads(str(record[field_name])) + except (json.JSONDecodeError, TypeError, ValueError): + # If parsing fails, keep as string + logger.warning(f"Could not parse JSONB field {field_name}, keeping as string: {record[field_name]}") + pass + + # If fieldFilter is available, reduce the fields + if fieldFilter and isinstance(fieldFilter, list): + result = [] + for record in records: + filteredRecord = {} + for field in fieldFilter: + if field in record: + filteredRecord[field] = record[field] + result.append(filteredRecord) + return result + + return records + except Exception as e: + logger.error(f"Error loading records from table {table}: {e}") + return [] + + def recordCreate(self, model_class: type, record: Union[Dict[str, Any], BaseModel]) -> Dict[str, Any]: + """Creates a new record in a table based on Pydantic model class.""" + # If record is a Pydantic model, convert to dict + if isinstance(record, BaseModel): + record = to_dict(record) + elif isinstance(record, dict): + record = record.copy() + else: + raise ValueError("Record must be a Pydantic model or dictionary") + + # Ensure record has an ID + if "id" not in record: + record["id"] = str(uuid.uuid4()) + + # Save record + self._saveRecord(model_class, record["id"], record) + + # Check if this is the first record in the table and register as initial ID + table = model_class.__name__ + existingInitialId = self.getInitialId(model_class) + if existingInitialId is None: + # This is the first record, register it as the initial ID + self._registerInitialId(table, record["id"]) + logger.info(f"Registered initial ID {record['id']} for table {table}") + + return record + + def recordModify(self, model_class: type, recordId: str, record: Union[Dict[str, Any], BaseModel]) -> Dict[str, Any]: + """Modifies an existing record in a table based on Pydantic model class.""" + # Load existing record + existingRecord = self._loadRecord(model_class, recordId) + if not existingRecord: + table = model_class.__name__ + raise ValueError(f"Record {recordId} not found in table {table}") + + # If record is a Pydantic model, convert to dict + if isinstance(record, BaseModel): + record = to_dict(record) + elif isinstance(record, dict): + record = record.copy() + else: + raise ValueError("Record must be a Pydantic model or dictionary") + + # CRITICAL: Ensure we never modify the ID + if "id" in record and str(record["id"]) != recordId: + logger.error(f"Attempted to modify record ID from {recordId} to {record['id']}") + raise ValueError("Cannot modify record ID - it must match the provided recordId") + + # Update existing record with new data + existingRecord.update(record) + + # Save updated record + self._saveRecord(model_class, recordId, existingRecord) + return existingRecord + + def recordDelete(self, model_class: type, recordId: str) -> bool: + """Deletes a record from the table based on Pydantic model class.""" + table = model_class.__name__ + + try: + if not self._ensureTableExists(model_class): + return False + + with self.connection.cursor() as cursor: + # Check if record exists + cursor.execute(f'SELECT "id" FROM "{table}" WHERE "id" = %s', (recordId,)) + if not cursor.fetchone(): + return False + + # Check if it's an initial record + initialId = self.getInitialId(model_class) + if initialId is not None and initialId == recordId: + self._removeInitialId(table) + logger.info(f"Initial ID {recordId} for table {table} has been removed from the system table") + + # Delete the record + cursor.execute(f'DELETE FROM "{table}" WHERE "id" = %s', (recordId,)) + + # No cache to update - database handles consistency + + self.connection.commit() + return True + + except Exception as e: + logger.error(f"Error deleting record {recordId} from table {table}: {e}") + self.connection.rollback() + return False + + + def getInitialId(self, model_class: type) -> Optional[str]: + """Returns the initial ID for a table.""" + table = model_class.__name__ + systemData = self._loadSystemTable() + initialId = systemData.get(table) + return initialId + + def close(self): + """Close the database connection.""" + if hasattr(self, 'connection') and self.connection and not self.connection.closed: + self.connection.close() + + def __del__(self): + """Cleanup method to close connection.""" + try: + self.close() + except Exception: + # Ignore errors during cleanup + pass diff --git a/modules/connectors/connectorSharepoint.py b/modules/connectors/connectorSharepoint.py new file mode 100644 index 00000000..89bdffbe --- /dev/null +++ b/modules/connectors/connectorSharepoint.py @@ -0,0 +1,443 @@ +"""Connector for SharePoint operations using Microsoft Graph API.""" + +import logging +import json +import aiohttp +import asyncio +from typing import Dict, Any, List, Optional +from datetime import datetime, UTC + +logger = logging.getLogger(__name__) + + +class ConnectorSharepoint: + """SharePoint connector using Microsoft Graph API for reliable authentication.""" + + def __init__(self, access_token: str): + """Initialize with access token. + + Args: + access_token: Microsoft Graph access token + """ + self.access_token = access_token + self.base_url = "https://graph.microsoft.com/v1.0" + + async def _make_graph_api_call(self, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]: + """Make a Microsoft Graph API call with proper error handling.""" + try: + headers = { + "Authorization": f"Bearer {self.access_token}", + "Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json" + } + + # Remove leading slash from endpoint to avoid double slash + clean_endpoint = endpoint.lstrip('/') + url = f"{self.base_url}/{clean_endpoint}" + logger.debug(f"Making Graph API call: {method} {url}") + + timeout = aiohttp.ClientTimeout(total=30) + + async with aiohttp.ClientSession(timeout=timeout) as session: + if method == "GET": + async with session.get(url, headers=headers) as response: + if response.status == 200: + return await response.json() + else: + error_text = await response.text() + logger.error(f"Graph API call failed: {response.status} - {error_text}") + return {"error": f"API call failed: {response.status} - {error_text}"} + + elif method == "PUT": + async with session.put(url, headers=headers, data=data) as response: + if response.status in [200, 201]: + return await response.json() + else: + error_text = await response.text() + logger.error(f"Graph API call failed: {response.status} - {error_text}") + return {"error": f"API call failed: {response.status} - {error_text}"} + + elif method == "POST": + async with session.post(url, headers=headers, data=data) as response: + if response.status in [200, 201]: + return await response.json() + else: + error_text = await response.text() + logger.error(f"Graph API call failed: {response.status} - {error_text}") + return {"error": f"API call failed: {response.status} - {error_text}"} + + except asyncio.TimeoutError: + logger.error(f"Graph API call timed out after 30 seconds: {endpoint}") + return {"error": f"API call timed out after 30 seconds: {endpoint}"} + except Exception as e: + logger.error(f"Error making Graph API call: {str(e)}") + return {"error": f"Error making Graph API call: {str(e)}"} + + async def discover_sites(self) -> List[Dict[str, Any]]: + """Discover all SharePoint sites accessible to the user.""" + try: + result = await self._make_graph_api_call("sites?search=*") + + if "error" in result: + logger.error(f"Error discovering SharePoint sites: {result['error']}") + return [] + + sites = result.get("value", []) + logger.info(f"Discovered {len(sites)} SharePoint sites") + + processed_sites = [] + for site in sites: + site_info = { + "id": site.get("id"), + "displayName": site.get("displayName"), + "name": site.get("name"), + "webUrl": site.get("webUrl"), + "description": site.get("description"), + "createdDateTime": site.get("createdDateTime"), + "lastModifiedDateTime": site.get("lastModifiedDateTime") + } + processed_sites.append(site_info) + logger.debug(f"Site: {site_info['displayName']} - {site_info['webUrl']}") + + return processed_sites + + except Exception as e: + logger.error(f"Error discovering SharePoint sites: {str(e)}") + return [] + + async def find_site_by_name(self, site_name: str) -> Optional[Dict[str, Any]]: + """Find a specific SharePoint site by name using direct Graph API call.""" + try: + # Try to get the site directly by name using Graph API + endpoint = f"sites/{site_name}" + result = await self._make_graph_api_call(endpoint) + + if result and "error" not in result: + site_info = { + "id": result.get("id"), + "displayName": result.get("displayName"), + "name": result.get("name"), + "webUrl": result.get("webUrl"), + "description": result.get("description"), + "createdDateTime": result.get("createdDateTime"), + "lastModifiedDateTime": result.get("lastModifiedDateTime") + } + logger.info(f"Found site directly: {site_info['displayName']} - {site_info['webUrl']}") + return site_info + + except Exception as e: + logger.debug(f"Direct site lookup failed for '{site_name}': {str(e)}") + + # Fallback to discovery if direct lookup fails + logger.info(f"Direct lookup failed, trying discovery for site: {site_name}") + sites = await self.discover_sites() + if not sites: + logger.warning("No sites discovered") + return None + + logger.info(f"Discovered {len(sites)} SharePoint sites:") + for site in sites: + logger.info(f" - {site.get('displayName', 'Unknown')} (ID: {site.get('id', 'Unknown')})") + + # Try exact match first + for site in sites: + if site.get("displayName", "").strip().lower() == site_name.strip().lower(): + logger.info(f"Found exact match: {site.get('displayName')}") + return site + + # Try partial match + for site in sites: + if site_name.lower() in site.get("displayName", "").lower(): + logger.info(f"Found partial match: {site.get('displayName')}") + return site + + logger.warning(f"No site found matching: {site_name}") + return None + + async def find_site_by_web_url(self, web_url: str) -> Optional[Dict[str, Any]]: + """Find a SharePoint site using its web URL (useful for guest sites).""" + try: + # Use the web URL format: sites/{hostname}:/sites/{site-path} + # Extract hostname and site path from the web URL + if not web_url.startswith("https://"): + web_url = f"https://{web_url}" + + # Parse the URL to extract hostname and site path + from urllib.parse import urlparse + parsed = urlparse(web_url) + hostname = parsed.hostname + path_parts = parsed.path.strip('/').split('/') + + if len(path_parts) >= 2 and path_parts[0] == 'sites': + site_path = '/'.join(path_parts[1:]) # Everything after 'sites/' + else: + logger.error(f"Invalid SharePoint URL format: {web_url}") + return None + + endpoint = f"sites/{hostname}:/sites/{site_path}" + logger.debug(f"Trying web URL format: {endpoint}") + + result = await self._make_graph_api_call(endpoint) + + if result and "error" not in result: + site_info = { + "id": result.get("id"), + "displayName": result.get("displayName"), + "name": result.get("name"), + "webUrl": result.get("webUrl"), + "description": result.get("description"), + "createdDateTime": result.get("createdDateTime"), + "lastModifiedDateTime": result.get("lastModifiedDateTime") + } + logger.info(f"Found site by web URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})") + return site_info + else: + logger.warning(f"Site not found using web URL: {web_url}") + return None + + except Exception as e: + logger.error(f"Error finding site by web URL: {str(e)}") + return None + + async def find_site_by_url(self, hostname: str, site_path: str) -> Optional[Dict[str, Any]]: + """Find a SharePoint site using the site URL format.""" + try: + # For guest sites, try different URL formats + url_formats = [ + f"sites/{hostname}:/sites/{site_path}", # Standard format + f"sites/{hostname}:/sites/{site_path}/", # With trailing slash + f"sites/{hostname}:/sites/{site_path.lower()}", # Lowercase + f"sites/{hostname}:/sites/{site_path.lower()}/", # Lowercase with slash + ] + + for endpoint in url_formats: + logger.debug(f"Trying URL format: {endpoint}") + result = await self._make_graph_api_call(endpoint) + + if result and "error" not in result: + site_info = { + "id": result.get("id"), + "displayName": result.get("displayName"), + "name": result.get("name"), + "webUrl": result.get("webUrl"), + "description": result.get("description"), + "createdDateTime": result.get("createdDateTime"), + "lastModifiedDateTime": result.get("lastModifiedDateTime") + } + logger.info(f"Found site by URL: {site_info['displayName']} - {site_info['webUrl']} (ID: {site_info['id']})") + return site_info + else: + logger.debug(f"URL format failed: {endpoint} - {result.get('error', 'Unknown error')}") + + logger.warning(f"Site not found using any URL format for: {hostname}:/sites/{site_path}") + return None + + except Exception as e: + logger.error(f"Error finding site by URL: {str(e)}") + return None + + async def get_folder_by_path(self, site_id: str, folder_path: str) -> Optional[Dict[str, Any]]: + """Get folder information by path within a site.""" + try: + # Clean the path + clean_path = folder_path.lstrip('/') + endpoint = f"sites/{site_id}/drive/root:/{clean_path}" + + result = await self._make_graph_api_call(endpoint) + + if "error" in result: + logger.warning(f"Folder not found at path {folder_path}: {result['error']}") + return None + + return result + + except Exception as e: + logger.error(f"Error getting folder by path: {str(e)}") + return None + + async def upload_file(self, site_id: str, folder_path: str, file_name: str, content: bytes) -> Dict[str, Any]: + """Upload a file to SharePoint.""" + try: + # Clean the path + clean_path = folder_path.lstrip('/') + upload_path = f"{clean_path.rstrip('/')}/{file_name}" + endpoint = f"sites/{site_id}/drive/root:/{upload_path}:/content" + + logger.info(f"Uploading file to: {endpoint}") + + result = await self._make_graph_api_call(endpoint, method="PUT", data=content) + + if "error" in result: + logger.error(f"Upload failed: {result['error']}") + return result + + logger.info(f"File uploaded successfully: {file_name}") + return result + + except Exception as e: + logger.error(f"Error uploading file: {str(e)}") + return {"error": f"Error uploading file: {str(e)}"} + + async def download_file(self, site_id: str, file_id: str) -> Optional[bytes]: + """Download a file from SharePoint.""" + try: + endpoint = f"sites/{site_id}/drive/items/{file_id}/content" + + headers = {"Authorization": f"Bearer {self.access_token}"} + timeout = aiohttp.ClientTimeout(total=30) + + async with aiohttp.ClientSession(timeout=timeout) as session: + async with session.get(f"{self.base_url}/{endpoint}", headers=headers) as response: + if response.status == 200: + return await response.read() + else: + logger.error(f"Download failed: {response.status}") + return None + + except Exception as e: + logger.error(f"Error downloading file: {str(e)}") + return None + + async def list_folder_contents(self, site_id: str, folder_path: str = "") -> List[Dict[str, Any]]: + """List contents of a folder.""" + try: + if not folder_path or folder_path == "/": + endpoint = f"sites/{site_id}/drive/root/children" + else: + clean_path = folder_path.lstrip('/') + endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/children" + + result = await self._make_graph_api_call(endpoint) + + if "error" in result: + logger.warning(f"Failed to list folder contents: {result['error']}") + return None + + items = result.get("value", []) + processed_items = [] + + for item in items: + # Determine if it's a folder or file + is_folder = 'folder' in item + + item_info = { + "id": item.get("id"), + "name": item.get("name"), + "type": "folder" if is_folder else "file", + "size": item.get("size", 0), + "createdDateTime": item.get("createdDateTime"), + "lastModifiedDateTime": item.get("lastModifiedDateTime"), + "webUrl": item.get("webUrl") + } + + if "file" in item: + item_info["mimeType"] = item["file"].get("mimeType") + item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl") + + if "folder" in item: + item_info["childCount"] = item["folder"].get("childCount", 0) + + processed_items.append(item_info) + + return processed_items + + except Exception as e: + logger.error(f"Error listing folder contents: {str(e)}") + return [] + + async def search_files(self, site_id: str, query: str) -> List[Dict[str, Any]]: + """Search for files in a site.""" + try: + search_query = query.replace("'", "''") # Escape single quotes for OData + endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')" + + result = await self._make_graph_api_call(endpoint) + + if "error" in result: + logger.warning(f"Search failed: {result['error']}") + return [] + + items = result.get("value", []) + processed_items = [] + + for item in items: + is_folder = 'folder' in item + + item_info = { + "id": item.get("id"), + "name": item.get("name"), + "type": "folder" if is_folder else "file", + "size": item.get("size", 0), + "createdDateTime": item.get("createdDateTime"), + "lastModifiedDateTime": item.get("lastModifiedDateTime"), + "webUrl": item.get("webUrl"), + "parentPath": item.get("parentReference", {}).get("path", "") + } + + if "file" in item: + item_info["mimeType"] = item["file"].get("mimeType") + item_info["downloadUrl"] = item.get("@microsoft.graph.downloadUrl") + + processed_items.append(item_info) + + return processed_items + + except Exception as e: + logger.error(f"Error searching files: {str(e)}") + return [] + + async def copy_file_async(self, site_id: str, source_folder: str, source_file: str, dest_folder: str, dest_file: str) -> None: + """Copy a file from source to destination folder (like original synchronizer).""" + try: + # First, download the source file + source_path = f"{source_folder}/{source_file}" + file_content = await self.download_file_by_path(site_id=site_id, file_path=source_path) + + if not file_content: + raise Exception(f"Failed to download source file: {source_path}") + + # Upload to destination + await self.upload_file( + site_id=site_id, + folder_path=dest_folder, + file_name=dest_file, + content=file_content + ) + + logger.info(f"File copied: {source_file} -> {dest_file}") + + except Exception as e: + logger.error(f"Error copying file: {str(e)}") + raise + + async def download_file_by_path(self, site_id: str, file_path: str) -> Optional[bytes]: + """Download a file by its path within a site.""" + try: + # Clean the path + clean_path = file_path.strip('/') + endpoint = f"sites/{site_id}/drive/root:/{clean_path}:/content" + + # Use direct HTTP call for file downloads (binary content) + headers = { + "Authorization": f"Bearer {self.access_token}", + } + + # Remove leading slash from endpoint to avoid double slash + clean_endpoint = endpoint.lstrip('/') + url = f"{self.base_url}/{clean_endpoint}" + logger.debug(f"Downloading file: GET {url}") + + timeout = aiohttp.ClientTimeout(total=30) + + async with aiohttp.ClientSession(timeout=timeout) as session: + async with session.get(url, headers=headers) as response: + if response.status == 200: + return await response.read() + else: + error_text = await response.text() + logger.error(f"File download failed: {response.status} - {error_text}") + return None + + except Exception as e: + logger.error(f"Error downloading file by path: {str(e)}") + return None + diff --git a/modules/connectors/connectorTicketJira.py b/modules/connectors/connectorTicketJira.py new file mode 100644 index 00000000..93020f2c --- /dev/null +++ b/modules/connectors/connectorTicketJira.py @@ -0,0 +1,237 @@ +"""Jira connector for CRUD operations.""" + +from dataclasses import dataclass +import logging +import aiohttp +import json + +from modules.interfaces.interfaceTicketModel import ( + TicketBase, + TicketFieldAttribute, + Task, +) + + +logger = logging.getLogger(__name__) + + +@dataclass +class ConnectorTicketJira(TicketBase): + jira_username: str + jira_api_token: str + jira_url: str + project_code: str + issue_type: str + + @classmethod + async def create( + cls, + *, + jira_username: str, + jira_api_token: str, + jira_url: str, + project_code: str, + issue_type: str, + ): + return ConnectorTicketJira( + jira_username=jira_username, + jira_api_token=jira_api_token, + jira_url=jira_url, + project_code=project_code, + issue_type=issue_type, + ) + + async def read_attributes(self) -> list[TicketFieldAttribute]: + """ + Read field attributes from Jira by querying for a single issue + and extracting the field mappings. + + Returns: + list[TicketFieldAttribute]: List of field attributes with names and IDs + """ + jql_query = f"project={self.project_code} AND issuetype={self.issue_type}" + + # Prepare the request URL and parameters + url = f"{self.jira_url}/rest/api/2/search" + params = {"jql": jql_query, "maxResults": 1, "expand": "names"} + + # Prepare authentication + auth = aiohttp.BasicAuth(self.jira_username, self.jira_api_token) + + try: + async with aiohttp.ClientSession() as session: + async with session.get(url, params=params, auth=auth) as response: + if response.status != 200: + error_text = await response.text() + logger.error( + f"Jira API request failed with status {response.status}: {error_text}" + ) + raise Exception( + f"Jira API request failed with status {response.status}" + ) + + data = await response.json() + + # Extract issues and field names + issues = data.get("issues", []) + field_names = data.get("names", {}) + + if not issues: + logger.warning(f"No issues found for query: {jql_query}") + return [] + + # Extract field attributes from the first issue + attributes = [] + issue = issues[0] + fields = issue.get("fields", {}) + + for field_id, value in fields.items(): + field_name = field_names.get(field_id, field_id) + attributes.append( + TicketFieldAttribute(field_name=field_name, field=field_id) + ) + + logger.info( + f"Successfully retrieved {len(attributes)} field attributes from Jira" + ) + return attributes + + except aiohttp.ClientError as e: + logger.error(f"HTTP client error while fetching Jira attributes: {str(e)}") + raise Exception(f"Failed to connect to Jira: {str(e)}") + except json.JSONDecodeError as e: + logger.error(f"Failed to parse Jira API response: {str(e)}") + raise Exception(f"Invalid response from Jira API: {str(e)}") + except Exception as e: + logger.error(f"Unexpected error while fetching Jira attributes: {str(e)}") + raise + + async def read_tasks(self, *, limit: int = 0) -> list[Task]: + """ + Read tasks from Jira with pagination support. + + Args: + limit: Maximum number of tasks to retrieve. 0 means no limit. + + Returns: + list[Task]: List of tasks with their data + """ + jql_query = f"project={self.project_code} AND issuetype={self.issue_type}" + + # Initialize variables for pagination + start_at = 0 + max_results = 50 + total = 1 # Initialize with a value greater than 0 to enter the loop + tasks = [] + + # Prepare authentication + auth = aiohttp.BasicAuth(self.jira_username, self.jira_api_token) + url = f"{self.jira_url}/rest/api/2/search" + + try: + async with aiohttp.ClientSession() as session: + while start_at < total and (limit == 0 or len(tasks) < limit): + # Prepare request parameters + params = { + "jql": jql_query, + "startAt": start_at, + "maxResults": max_results, + } + + headers = {"Content-Type": "application/json"} + + async with session.get( + url, params=params, auth=auth, headers=headers + ) as response: + if response.status != 200: + error_text = await response.text() + logger.error( + f"Failed to fetch tasks from Jira. Status code: {response.status}, Response: {error_text}" + ) + break + + data = await response.json() + issues = data.get("issues", []) + total = data.get("total", 0) + + for issue in issues: + # Store the raw JIRA issue data directly + # This matches what the reference implementation expects + task = Task(data=issue) + tasks.append(task) + + # Check limit + if limit > 0 and len(tasks) >= limit: + break + + start_at += max_results + logger.debug(f"Issues packages reading: {len(tasks)}") + + logger.info(f"JIRA issues read: {len(tasks)}") + return tasks + + except aiohttp.ClientError as e: + logger.error(f"HTTP client error while fetching Jira tasks: {str(e)}") + raise Exception(f"Failed to connect to Jira: {str(e)}") + except json.JSONDecodeError as e: + logger.error(f"Failed to parse Jira API response: {str(e)}") + raise Exception(f"Invalid response from Jira API: {str(e)}") + except Exception as e: + logger.error(f"Unexpected error while fetching Jira tasks: {str(e)}") + raise + + async def write_tasks(self, tasklist: list[Task]) -> None: + """ + Write/update tasks to Jira. + + Args: + tasklist: List of Task objects containing task data to update + """ + headers = {"Accept": "application/json", "Content-Type": "application/json"} + auth = aiohttp.BasicAuth(self.jira_username, self.jira_api_token) + + try: + async with aiohttp.ClientSession() as session: + for task in tasklist: + task_data = task.data + task_id = ( + task_data.get("ID") + or task_data.get("id") + or task_data.get("key") + ) + + if not task_id: + logger.warning("Task missing ID or key, skipping update") + continue + + # Extract fields to update from task data + # The task data should contain the field updates in a "fields" key + fields = task_data.get("fields", {}) + + if not fields: + logger.debug(f"No fields to update for task {task_id}") + continue + + # Prepare update data + update_data = {"fields": fields} + + # Make the update request + url = f"{self.jira_url}/rest/api/2/issue/{task_id}" + + async with session.put( + url, json=update_data, headers=headers, auth=auth + ) as response: + if response.status == 204: + logger.info(f"JIRA task {task_id} updated successfully.") + else: + error_text = await response.text() + logger.error( + f"JIRA failed to update task {task_id}: {response.status} - {error_text}" + ) + + except aiohttp.ClientError as e: + logger.error(f"HTTP client error while updating Jira tasks: {str(e)}") + raise Exception(f"Failed to connect to Jira: {str(e)}") + except Exception as e: + logger.error(f"Unexpected error while updating Jira tasks: {str(e)}") + raise diff --git a/modules/interfaces/interfaceAppAccess.py b/modules/interfaces/interfaceAppAccess.py index 7277d853..25b318ad 100644 --- a/modules/interfaces/interfaceAppAccess.py +++ b/modules/interfaces/interfaceAppAccess.py @@ -5,7 +5,7 @@ Access control for the Application. import logging from typing import Dict, Any, List, Optional from datetime import datetime -from modules.interfaces.interfaceAppModel import UserPrivilege, User +from modules.interfaces.interfaceAppModel import UserPrivilege, User, UserInDB, AuthEvent, Mandate from modules.shared.timezoneUtils import get_utc_now # Configure logger @@ -29,28 +29,29 @@ class AppAccess: self.db = db - def uam(self, table: str, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def uam(self, model_class: type, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Unified user access management function that filters data based on user privileges and adds access control attributes. Args: - table: Name of the table + model_class: Pydantic model class for the table recordset: Recordset to filter based on access rules Returns: Filtered recordset with access control attributes """ filtered_records = [] + table_name = model_class.__name__ # Only SYSADMIN can see mandates - if table == "mandates": + if table_name == "Mandate": if self.privilege == UserPrivilege.SYSADMIN: filtered_records = recordset else: filtered_records = [] # Special handling for users table - elif table == "users": + elif table_name == "UserInDB": if self.privilege == UserPrivilege.SYSADMIN: # SysAdmin sees all users filtered_records = recordset @@ -61,13 +62,13 @@ class AppAccess: # Regular users only see themselves filtered_records = [r for r in recordset if r.get("id") == self.userId] # Special handling for connections table - elif table == "connections": + elif table_name == "UserConnection": if self.privilege == UserPrivilege.SYSADMIN: # SysAdmin sees all connections filtered_records = recordset elif self.privilege == UserPrivilege.ADMIN: # Admin sees connections for users in their mandate - users: List[Dict[str, Any]] = self.db.getRecordset("users", recordFilter={"mandateId": self.mandateId}) + users: List[Dict[str, Any]] = self.db.getRecordset(UserInDB, recordFilter={"mandateId": self.mandateId}) user_ids: List[str] = [str(u["id"]) for u in users] filtered_records = [r for r in recordset if r.get("userId") in user_ids] else: @@ -89,11 +90,11 @@ class AppAccess: record_id = record.get("id") # Set access control flags based on user permissions - if table == "mandates": + if table_name == "Mandate": record["_hideView"] = False # SYSADMIN can view - record["_hideEdit"] = not self.canModify("mandates", record_id) - record["_hideDelete"] = not self.canModify("mandates", record_id) - elif table == "users": + record["_hideEdit"] = not self.canModify(Mandate, record_id) + record["_hideDelete"] = not self.canModify(Mandate, record_id) + elif table_name == "UserInDB": record["_hideView"] = False # Everyone can view users they have access to # SysAdmin can edit/delete any user if self.privilege == UserPrivilege.SYSADMIN: @@ -107,7 +108,7 @@ class AppAccess: else: record["_hideEdit"] = record.get("id") != self.userId record["_hideDelete"] = True # Regular users cannot delete users - elif table == "connections": + elif table_name == "UserConnection": # Everyone can view connections they have access to record["_hideView"] = False # SysAdmin can edit/delete any connection @@ -116,7 +117,7 @@ class AppAccess: record["_hideDelete"] = False # Admin can edit/delete connections for users in their mandate elif self.privilege == UserPrivilege.ADMIN: - users: List[Dict[str, Any]] = self.db.getRecordset("users", recordFilter={"mandateId": self.mandateId}) + users: List[Dict[str, Any]] = self.db.getRecordset(UserInDB, recordFilter={"mandateId": self.mandateId}) user_ids: List[str] = [str(u["id"]) for u in users] record["_hideEdit"] = record.get("userId") not in user_ids record["_hideDelete"] = record.get("userId") not in user_ids @@ -125,35 +126,37 @@ class AppAccess: record["_hideEdit"] = record.get("userId") != self.userId record["_hideDelete"] = record.get("userId") != self.userId - elif table == "auth_events": + elif table_name == "AuthEvent": # Only show auth events for the current user or if admin if self.privilege in [UserPrivilege.SYSADMIN, UserPrivilege.ADMIN]: record["_hideView"] = False else: record["_hideView"] = record.get("userId") != self.userId record["_hideEdit"] = True # Auth events can't be edited - record["_hideDelete"] = not self.canModify("auth_events", record_id) + record["_hideDelete"] = not self.canModify(AuthEvent, record_id) else: # Default access control for other tables record["_hideView"] = False - record["_hideEdit"] = not self.canModify(table, record_id) - record["_hideDelete"] = not self.canModify(table, record_id) + record["_hideEdit"] = not self.canModify(model_class, record_id) + record["_hideDelete"] = not self.canModify(model_class, record_id) return filtered_records - def canModify(self, table: str, recordId: Optional[str] = None) -> bool: + def canModify(self, model_class: type, recordId: Optional[str] = None) -> bool: """ Checks if the current user can modify (create/update/delete) records in a table. Args: - table: Name of the table + model_class: Pydantic model class for the table recordId: Optional record ID for specific record check Returns: Boolean indicating permission """ + table_name = model_class.__name__ + # For mandates, only SYSADMIN can modify - if table == "mandates": + if table_name == "Mandate": return self.privilege == UserPrivilege.SYSADMIN # System admins can modify anything else @@ -163,17 +166,17 @@ class AppAccess: # Check specific record permissions if recordId is not None: # Get the record to check ownership - records: List[Dict[str, Any]] = self.db.getRecordset(table, recordFilter={"id": str(recordId)}) + records: List[Dict[str, Any]] = self.db.getRecordset(model_class, recordFilter={"id": str(recordId)}) if not records: return False record = records[0] # Special handling for connections - if table == "connections": + if table_name == "UserConnection": # Admin can modify connections for users in their mandate if self.privilege == UserPrivilege.ADMIN: - users: List[Dict[str, Any]] = self.db.getRecordset("users", recordFilter={"mandateId": self.mandateId}) + users: List[Dict[str, Any]] = self.db.getRecordset(UserInDB, recordFilter={"mandateId": self.mandateId}) user_ids: List[str] = [str(u["id"]) for u in users] return record.get("userId") in user_ids # Users can only modify their own connections diff --git a/modules/interfaces/interfaceAppModel.py b/modules/interfaces/interfaceAppModel.py index ec95aaf5..73d8d146 100644 --- a/modules/interfaces/interfaceAppModel.py +++ b/modules/interfaces/interfaceAppModel.py @@ -353,4 +353,93 @@ class GoogleToken(Token): class MsftToken(Token): """Microsoft OAuth token model""" pass + +class AuthEvent(BaseModel, ModelMixin): + """Data model for authentication events""" + id: str = Field( + default_factory=lambda: str(uuid.uuid4()), + description="Unique ID of the auth event", + frontend_type="text", + frontend_readonly=True, + frontend_required=False + ) + userId: str = Field( + description="ID of the user this event belongs to", + frontend_type="text", + frontend_readonly=True, + frontend_required=True + ) + eventType: str = Field( + description="Type of authentication event (e.g., 'login', 'logout', 'token_refresh')", + frontend_type="text", + frontend_readonly=True, + frontend_required=True + ) + timestamp: float = Field( + default_factory=get_utc_timestamp, + description="Unix timestamp when the event occurred", + frontend_type="datetime", + frontend_readonly=True, + frontend_required=True + ) + ipAddress: Optional[str] = Field( + default=None, + description="IP address from which the event originated", + frontend_type="text", + frontend_readonly=True, + frontend_required=False + ) + userAgent: Optional[str] = Field( + default=None, + description="User agent string from the request", + frontend_type="text", + frontend_readonly=True, + frontend_required=False + ) + success: bool = Field( + default=True, + description="Whether the authentication event was successful", + frontend_type="boolean", + frontend_readonly=True, + frontend_required=True + ) + details: Optional[str] = Field( + default=None, + description="Additional details about the event", + frontend_type="text", + frontend_readonly=True, + frontend_required=False + ) + +# Register labels for AuthEvent +register_model_labels( + "AuthEvent", + {"en": "Authentication Event", "fr": "Événement d'authentification"}, + { + "id": {"en": "ID", "fr": "ID"}, + "userId": {"en": "User ID", "fr": "ID utilisateur"}, + "eventType": {"en": "Event Type", "fr": "Type d'événement"}, + "timestamp": {"en": "Timestamp", "fr": "Horodatage"}, + "ipAddress": {"en": "IP Address", "fr": "Adresse IP"}, + "userAgent": {"en": "User Agent", "fr": "Agent utilisateur"}, + "success": {"en": "Success", "fr": "Succès"}, + "details": {"en": "Details", "fr": "Détails"} + } +) + +class SystemTable(BaseModel, ModelMixin): + """Data model for system table entries""" + table_name: str = Field( + description="Name of the table", + frontend_type="text", + frontend_readonly=True, + frontend_required=True + ) + initial_id: Optional[str] = Field( + default=None, + description="Initial ID for the table", + frontend_type="text", + frontend_readonly=True, + frontend_required=False + ) \ No newline at end of file diff --git a/modules/interfaces/interfaceAppObjects.py b/modules/interfaces/interfaceAppObjects.py index e9683158..c71e0c03 100644 --- a/modules/interfaces/interfaceAppObjects.py +++ b/modules/interfaces/interfaceAppObjects.py @@ -12,14 +12,14 @@ import json from passlib.context import CryptContext import uuid -from modules.connectors.connectorDbJson import DatabaseConnector +from modules.connectors.connectorDbPostgre import DatabaseConnector from modules.shared.configuration import APP_CONFIG from modules.shared.timezoneUtils import get_utc_now, get_utc_timestamp from modules.interfaces.interfaceAppAccess import AppAccess from modules.interfaces.interfaceAppModel import ( User, Mandate, UserInDB, UserConnection, AuthAuthority, UserPrivilege, - ConnectionStatus, Token + ConnectionStatus, Token, AuthEvent ) logger = logging.getLogger(__name__) @@ -79,26 +79,38 @@ class AppObjects: # Update database context self.db.updateContext(self.userId) + def __del__(self): + """Cleanup method to close database connection.""" + if hasattr(self, 'db') and self.db is not None: + try: + self.db.close() + except Exception as e: + logger.error(f"Error closing database connection: {e}") + def _initializeDatabase(self): - """Initializes the database connection.""" + """Initializes the database connection directly.""" try: # Get configuration values with defaults dbHost = APP_CONFIG.get("DB_APP_HOST", "_no_config_default_data") dbDatabase = APP_CONFIG.get("DB_APP_DATABASE", "app") dbUser = APP_CONFIG.get("DB_APP_USER") dbPassword = APP_CONFIG.get("DB_APP_PASSWORD_SECRET") + dbPort = int(APP_CONFIG.get("DB_APP_PORT", 5432)) - # Ensure the database directory exists - os.makedirs(dbHost, exist_ok=True) - + # Create database connector directly self.db = DatabaseConnector( dbHost=dbHost, dbDatabase=dbDatabase, dbUser=dbUser, - dbPassword=dbPassword + dbPassword=dbPassword, + dbPort=dbPort, + userId=self.userId ) - logger.info("Database initialized successfully") + # Initialize database system + self.db.initDbSystem() + + logger.info(f"Database initialized successfully for user {self.userId}") except Exception as e: logger.error(f"Failed to initialize database: {str(e)}") raise @@ -110,8 +122,8 @@ class AppObjects: def _initRootMandate(self): """Creates the Root mandate if it doesn't exist.""" - existingMandateId = self.getInitialId("mandates") - mandates = self.db.getRecordset("mandates") + existingMandateId = self.getInitialId(Mandate) + mandates = self.db.getRecordset(Mandate) if existingMandateId is None or not mandates: logger.info("Creating Root mandate") rootMandate = Mandate( @@ -119,23 +131,20 @@ class AppObjects: language="en", enabled=True ) - createdMandate = self.db.recordCreate("mandates", rootMandate.to_dict()) + createdMandate = self.db.recordCreate(Mandate, rootMandate) logger.info(f"Root mandate created with ID {createdMandate['id']}") - # Register the initial ID - self.db._registerInitialId("mandates", createdMandate['id']) - # Update mandate context self.mandateId = createdMandate['id'] def _initAdminUser(self): """Creates the Admin user if it doesn't exist.""" - existingUserId = self.getInitialId("users") - users = self.db.getRecordset("users") + existingUserId = self.getInitialId(UserInDB) + users = self.db.getRecordset(UserInDB) if existingUserId is None or not users: logger.info("Creating Admin user") adminUser = UserInDB( - mandateId=self.getInitialId("mandates"), + mandateId=self.getInitialId(Mandate), username="admin", email="admin@example.com", fullName="Administrator", @@ -146,30 +155,27 @@ class AppObjects: hashedPassword=self._getPasswordHash("The 1st Poweron Admin"), # Use a secure password in production! connections=[] ) - createdUser = self.db.recordCreate("users", adminUser.to_dict()) + createdUser = self.db.recordCreate(UserInDB, adminUser) logger.info(f"Admin user created with ID {createdUser['id']}") - # Register the initial ID - self.db._registerInitialId("users", createdUser['id']) - # Update user context self.currentUser = createdUser self.userId = createdUser.get("id") - def _uam(self, table: str, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def _uam(self, model_class: type, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Unified user access management function that filters data based on user privileges and adds access control attributes. Args: - table: Name of the table + model_class: Pydantic model class for the table recordset: Recordset to filter based on access rules Returns: Filtered recordset with access control attributes """ # First apply access control - filteredRecords = self.access.uam(table, recordset) + filteredRecords = self.access.uam(model_class, recordset) # Then filter out database-specific fields cleanedRecords = [] @@ -180,26 +186,23 @@ class AppObjects: return cleanedRecords - def _canModify(self, table: str, recordId: Optional[str] = None) -> bool: + def _canModify(self, model_class: type, recordId: Optional[str] = None) -> bool: """ Checks if the current user can modify (create/update/delete) records in a table. Args: - table: Name of the table + model_class: Pydantic model class for the table recordId: Optional record ID for specific record check Returns: Boolean indicating permission """ - return self.access.canModify(table, recordId) + return self.access.canModify(model_class, recordId) - def _clearTableCache(self, table: str) -> None: - """Clears the cache for a specific table to ensure fresh data.""" - self.db.clearTableCache(table) - def getInitialId(self, table: str) -> Optional[str]: + def getInitialId(self, model_class: type) -> Optional[str]: """Returns the initial ID for a table.""" - return self.db.getInitialId(table) + return self.db.getInitialId(model_class) def _getPasswordHash(self, password: str) -> str: """Creates a hash for a password.""" @@ -214,8 +217,8 @@ class AppObjects: def getUsersByMandate(self, mandateId: str) -> List[User]: """Returns users for a specific mandate if user has access.""" # Get users for this mandate - users = self.db.getRecordset("users", recordFilter={"mandateId": mandateId}) - filteredUsers = self._uam("users", users) + users = self.db.getRecordset(UserInDB, recordFilter={"mandateId": mandateId}) + filteredUsers = self._uam(UserInDB, users) # Convert to User models return [User.from_dict(user) for user in filteredUsers] @@ -224,7 +227,7 @@ class AppObjects: """Returns a user by username.""" try: # Get users table - users = self.db.getRecordset("users") + users = self.db.getRecordset(UserInDB) if not users: return None @@ -244,7 +247,7 @@ class AppObjects: """Returns a user by ID if user has access.""" try: # Get all users - users = self.db.getRecordset("users") + users = self.db.getRecordset(UserInDB) if not users: return None @@ -252,7 +255,7 @@ class AppObjects: for user_dict in users: if user_dict.get("id") == userId: # Apply access control - filteredUsers = self._uam("users", [user_dict]) + filteredUsers = self._uam(UserInDB, [user_dict]) if filteredUsers: return User.from_dict(filteredUsers[0]) return None @@ -267,7 +270,7 @@ class AppObjects: """Returns all connections for a user.""" try: # Get connections for this user - connections = self.db.getRecordset("connections", recordFilter={"userId": userId}) + connections = self.db.getRecordset(UserConnection, recordFilter={"userId": userId}) # Convert to UserConnection objects result = [] @@ -334,10 +337,8 @@ class AppObjects: ) # Save to connections table - self.db.recordCreate("connections", connection.to_dict()) + self.db.recordCreate(UserConnection, connection) - # Clear cache to ensure fresh data - self._clearTableCache("connections") return connection @@ -349,7 +350,7 @@ class AppObjects: """Remove a connection to an external service""" try: # Get connection - connections = self.db.getRecordset("connections", recordFilter={ + connections = self.db.getRecordset(UserConnection, recordFilter={ "id": connectionId }) @@ -357,10 +358,8 @@ class AppObjects: raise ValueError(f"Connection {connectionId} not found") # Delete connection - self.db.recordDelete("connections", connectionId) + self.db.recordDelete(UserConnection, connectionId) - # Clear cache to ensure fresh data - self._clearTableCache("connections") except Exception as e: logger.error(f"Error removing user connection: {str(e)}") @@ -369,7 +368,6 @@ class AppObjects: def authenticateLocalUser(self, username: str, password: str) -> Optional[User]: """Authenticates a user by username and password using local authentication.""" # Clear the users table from cache and reload it - self._clearTableCache("users") # Get user by username user = self.getUserByUsername(username) @@ -386,7 +384,7 @@ class AppObjects: raise ValueError("User does not have local authentication enabled") # Get the full user record with password hash for verification - userRecord = self.db.getRecordset("users", recordFilter={"id": user.id})[0] + userRecord = self.db.getRecordset(UserInDB, recordFilter={"id": user.id})[0] if not userRecord.get("hashedPassword"): raise ValueError("User has no password set") @@ -430,12 +428,10 @@ class AppObjects: ) # Create user record - createdRecord = self.db.recordCreate("users", userData.to_dict()) + createdRecord = self.db.recordCreate(UserInDB, userData) if not createdRecord or not createdRecord.get("id"): raise ValueError("Failed to create user record") - # Clear cache to ensure fresh data - self._clearTableCache("users") # Add external connection if provided if externalId and externalUsername: @@ -448,12 +444,11 @@ class AppObjects: ) # Get created user using the returned ID - createdUser = self.db.getRecordset("users", recordFilter={"id": createdRecord["id"]}) + createdUser = self.db.getRecordset(UserInDB, recordFilter={"id": createdRecord["id"]}) if not createdUser or len(createdUser) == 0: raise ValueError("Failed to retrieve created user") # Clear cache to ensure fresh data (already done above) - # No need for additional cache clearing since _clearTableCache("users") was called return User.from_dict(createdUser[0]) @@ -478,10 +473,8 @@ class AppObjects: updatedUser = User.from_dict(updatedData) # Update user record - self.db.recordModify("users", userId, updatedUser.to_dict()) + self.db.recordModify(UserInDB, userId, updatedUser) - # Clear cache to ensure fresh data - self._clearTableCache("users") # Get updated user updatedUser = self.getUser(userId) @@ -508,20 +501,20 @@ class AppObjects: # Delete user auth events - events = self.db.getRecordset("auth_events", recordFilter={"userId": userId}) + events = self.db.getRecordset(AuthEvent, recordFilter={"userId": userId}) for event in events: - self.db.recordDelete("auth_events", event["id"]) + self.db.recordDelete(AuthEvent, event["id"]) # Delete user tokens - tokens = self.db.getRecordset("tokens", recordFilter={"userId": userId}) + tokens = self.db.getRecordset(Token, recordFilter={"userId": userId}) for token in tokens: - self.db.recordDelete("tokens", token["id"]) + self.db.recordDelete(Token, token["id"]) # Delete user connections - connections = self.db.getRecordset("connections", recordFilter={"userId": userId}) + connections = self.db.getRecordset(UserConnection, recordFilter={"userId": userId}) for conn in connections: - self.db.recordDelete("connections", conn["id"]) + self.db.recordDelete(UserConnection, conn["id"]) logger.info(f"All referenced data for user {userId} has been deleted") @@ -537,19 +530,17 @@ class AppObjects: if not user: raise ValueError(f"User {userId} not found") - if not self._canModify("users", userId): + if not self._canModify(UserInDB, userId): raise PermissionError(f"No permission to delete user {userId}") # Delete all referenced data first self._deleteUserReferencedData(userId) # Delete user record - success = self.db.recordDelete("users", userId) + success = self.db.recordDelete(UserInDB, userId) if not success: raise ValueError(f"Failed to delete user {userId}") - # Clear cache to ensure fresh data - self._clearTableCache("users") logger.info(f"User {userId} successfully deleted") return True @@ -562,17 +553,17 @@ class AppObjects: def getAllMandates(self) -> List[Mandate]: """Returns all mandates based on user access level.""" - allMandates = self.db.getRecordset("mandates") - filteredMandates = self._uam("mandates", allMandates) + allMandates = self.db.getRecordset(Mandate) + filteredMandates = self._uam(Mandate, allMandates) return [Mandate.from_dict(mandate) for mandate in filteredMandates] def getMandate(self, mandateId: str) -> Optional[Mandate]: """Returns a mandate by ID if user has access.""" - mandates = self.db.getRecordset("mandates", recordFilter={"id": mandateId}) + mandates = self.db.getRecordset(Mandate, recordFilter={"id": mandateId}) if not mandates: return None - filteredMandates = self._uam("mandates", mandates) + filteredMandates = self._uam(Mandate, mandates) if not filteredMandates: return None @@ -580,7 +571,7 @@ class AppObjects: def createMandate(self, name: str, language: str = "en") -> Mandate: """Creates a new mandate if user has permission.""" - if not self._canModify("mandates"): + if not self._canModify(Mandate): raise PermissionError("No permission to create mandates") # Create mandate data using model @@ -590,12 +581,10 @@ class AppObjects: ) # Create mandate record - createdRecord = self.db.recordCreate("mandates", mandateData.to_dict()) + createdRecord = self.db.recordCreate(Mandate, mandateData) if not createdRecord or not createdRecord.get("id"): raise ValueError("Failed to create mandate record") - # Clear cache to ensure fresh data - self._clearTableCache("mandates") return Mandate.from_dict(createdRecord) @@ -603,7 +592,7 @@ class AppObjects: """Updates a mandate if user has access.""" try: # First check if user has permission to modify mandates - if not self._canModify("mandates", mandateId): + if not self._canModify(Mandate, mandateId): raise PermissionError(f"No permission to update mandate {mandateId}") # Get mandate with access control @@ -617,10 +606,9 @@ class AppObjects: updatedMandate = Mandate.from_dict(updatedData) # Update mandate record - self.db.recordModify("mandates", mandateId, updatedMandate.to_dict()) + self.db.recordModify(Mandate, mandateId, updatedMandate) # Clear cache to ensure fresh data - self._clearTableCache("mandates") # Get updated mandate updatedMandate = self.getMandate(mandateId) @@ -641,7 +629,7 @@ class AppObjects: if not mandate: return False - if not self._canModify("mandates", mandateId): + if not self._canModify(Mandate, mandateId): raise PermissionError(f"No permission to delete mandate {mandateId}") # Check if mandate has users @@ -650,10 +638,9 @@ class AppObjects: raise ValueError(f"Cannot delete mandate {mandateId} with existing users") # Delete mandate - success = self.db.recordDelete("mandates", mandateId) + success = self.db.recordDelete(Mandate, mandateId) # Clear cache to ensure fresh data - self._clearTableCache("mandates") return success @@ -664,11 +651,11 @@ class AppObjects: def _getInitialUser(self) -> Optional[Dict[str, Any]]: """Get the initial user record directly from database without access control.""" try: - initialUserId = self.db.getInitialId("users") + initialUserId = self.getInitialId(UserInDB) if not initialUserId: return None - users = self.db.getRecordset("users", recordFilter={"id": initialUserId}) + users = self.db.getRecordset(UserInDB, recordFilter={"id": initialUserId}) return users[0] if users else None except Exception as e: logger.error(f"Error getting initial user: {str(e)}") @@ -731,7 +718,7 @@ class AppObjects: # If replace_existing is True, delete old access tokens for this user and authority first if replace_existing: try: - old_tokens = self.db.getRecordset("tokens", recordFilter={ + old_tokens = self.db.getRecordset(Token, recordFilter={ "userId": self.currentUser.id, "authority": token.authority, "connectionId": None # Ensure we only delete access tokens @@ -739,9 +726,8 @@ class AppObjects: deleted_count = 0 for old_token in old_tokens: if old_token["id"] != token.id: # Don't delete the new token if it already exists - self.db.recordDelete("tokens", old_token["id"]) + self.db.recordDelete(Token, old_token["id"]) deleted_count += 1 - logger.debug(f"Deleted old access token {old_token['id']} for user {self.currentUser.id} and authority {token.authority}") if deleted_count > 0: logger.info(f"Replaced {deleted_count} old access tokens for user {self.currentUser.id} and authority {token.authority}") @@ -756,10 +742,8 @@ class AppObjects: token_dict["userId"] = self.currentUser.id # Save to database - self.db.recordCreate("tokens", token_dict) + self.db.recordCreate(Token, token_dict) - # Clear cache to ensure fresh data - self._clearTableCache("tokens") except Exception as e: logger.error(f"Error saving access token: {str(e)}") @@ -788,15 +772,14 @@ class AppObjects: # If replace_existing is True, delete old tokens for this connectionId first if replace_existing: try: - old_tokens = self.db.getRecordset("tokens", recordFilter={ + old_tokens = self.db.getRecordset(Token, recordFilter={ "connectionId": token.connectionId }) deleted_count = 0 for old_token in old_tokens: if old_token["id"] != token.id: # Don't delete the new token if it already exists - self.db.recordDelete("tokens", old_token["id"]) + self.db.recordDelete(Token, old_token["id"]) deleted_count += 1 - logger.debug(f"Deleted old token {old_token['id']} for connectionId {token.connectionId}") if deleted_count > 0: logger.info(f"Replaced {deleted_count} old tokens for connectionId {token.connectionId}") @@ -811,10 +794,8 @@ class AppObjects: token_dict["userId"] = self.currentUser.id # Save to database - self.db.recordCreate("tokens", token_dict) + self.db.recordCreate(Token, token_dict) - # Clear cache to ensure fresh data - self._clearTableCache("tokens") except Exception as e: logger.error(f"Error saving connection token: {str(e)}") @@ -828,7 +809,7 @@ class AppObjects: raise ValueError("No valid user context available for token retrieval") # Get access tokens for this user and authority (must NOT have connectionId) - tokens = self.db.getRecordset("tokens", recordFilter={ + tokens = self.db.getRecordset(Token, recordFilter={ "userId": self.currentUser.id, "authority": authority, "connectionId": None # Ensure we only get access tokens @@ -877,21 +858,10 @@ class AppObjects: # Get token for this specific connection # Query for specific connection - tokens = self.db.getRecordset("tokens", recordFilter={ + tokens = self.db.getRecordset(Token, recordFilter={ "connectionId": connectionId }) - # Debug: Log what we found - logger.debug(f"getConnectionToken: Found {len(tokens)} tokens for connectionId {connectionId}") - if tokens: - for i, token in enumerate(tokens): - logger.debug(f"getConnectionToken: Token {i}: id={token.get('id')}, expiresAt={token.get('expiresAt')}, createdAt={token.get('createdAt')}") - else: - # Debug: Check if there are any tokens at all in the database - all_tokens = self.db.getRecordset("tokens", recordFilter={}) - logger.debug(f"getConnectionToken: No tokens found for connectionId {connectionId}. Total tokens in database: {len(all_tokens)}") - if all_tokens: - logger.debug(f"getConnectionToken: Sample tokens: {[{'id': t.get('id'), 'connectionId': t.get('connectionId'), 'authority': t.get('authority')} for t in all_tokens[:3]]}") if not tokens: logger.warning(f"No connection token found for connectionId: {connectionId}") @@ -907,25 +877,21 @@ class AppObjects: if latest_token.expiresAt and latest_token.expiresAt < (current_time + thirty_minutes): if auto_refresh: - logger.debug(f"getConnectionToken: Token expires soon, attempting refresh. expiresAt: {latest_token.expiresAt}, current_time: {current_time}") - # Import TokenManager here to avoid circular imports from modules.security.tokenManager import TokenManager token_manager = TokenManager() # Try to refresh the token - logger.debug(f"getConnectionToken: Calling token_manager.refresh_token for token {latest_token.id}") refreshed_token = token_manager.refresh_token(latest_token) if refreshed_token: - logger.debug(f"getConnectionToken: Token refresh successful, saving new token {refreshed_token.id}") # Save the new token (which will automatically replace old ones) self.saveConnectionToken(refreshed_token) logger.info(f"Proactively refreshed connection token for connectionId {connectionId} (expired in {latest_token.expiresAt - current_time} seconds)") return refreshed_token else: - logger.warning(f"getConnectionToken: Token refresh failed for connectionId {connectionId}") + logger.warning(f"Token refresh failed for connectionId {connectionId}") return None else: logger.warning(f"Connection token for connectionId {connectionId} expires soon (expiresAt: {latest_token.expiresAt})") @@ -945,7 +911,7 @@ class AppObjects: raise ValueError("No valid user context available for token deletion") # Get access tokens to delete (must NOT have connectionId) - tokens = self.db.getRecordset("tokens", recordFilter={ + tokens = self.db.getRecordset(Token, recordFilter={ "userId": self.currentUser.id, "authority": authority, "connectionId": None # Ensure we only delete access tokens @@ -953,10 +919,8 @@ class AppObjects: # Delete each token for token in tokens: - self.db.recordDelete("tokens", token["id"]) + self.db.recordDelete(Token, token["id"]) - # Clear cache to ensure fresh data - self._clearTableCache("tokens") except Exception as e: logger.error(f"Error deleting access token: {str(e)}") @@ -970,16 +934,14 @@ class AppObjects: raise ValueError("connectionId is required for deleteConnectionTokenByConnectionId") # Get connection tokens to delete - tokens = self.db.getRecordset("tokens", recordFilter={ + tokens = self.db.getRecordset(Token, recordFilter={ "connectionId": connectionId }) # Delete each token for token in tokens: - self.db.recordDelete("tokens", token["id"]) + self.db.recordDelete(Token, token["id"]) - # Clear cache to ensure fresh data - self._clearTableCache("tokens") except Exception as e: logger.error(f"Error deleting connection token for connectionId {connectionId}: {str(e)}") @@ -994,17 +956,16 @@ class AppObjects: cleaned_count = 0 # Get all tokens - all_tokens = self.db.getRecordset("tokens", recordFilter={}) + all_tokens = self.db.getRecordset(Token, recordFilter={}) for token_data in all_tokens: if token_data.get("expiresAt") and token_data.get("expiresAt") < current_time: # Token is expired, delete it - self.db.recordDelete("tokens", token_data["id"]) + self.db.recordDelete(Token, token_data["id"]) cleaned_count += 1 # Clear cache to ensure fresh data if cleaned_count > 0: - self._clearTableCache("tokens") logger.info(f"Cleaned up {cleaned_count} expired tokens") return cleaned_count @@ -1061,16 +1022,19 @@ def getRootUser() -> User: tempInterface = AppObjects() # Get the initial user directly - initialUserId = tempInterface.db.getInitialId("users") + initialUserId = tempInterface.getInitialId(UserInDB) if not initialUserId: raise ValueError("No initial user ID found in database") - users = tempInterface.db.getRecordset("users", recordFilter={"id": initialUserId}) + users = tempInterface.db.getRecordset(UserInDB, recordFilter={"id": initialUserId}) if not users: raise ValueError("Initial user not found in database") - + + # Convert to User model and return the model instance - return User.from_dict(users[0]) + user_data = users[0] + + return User.parse_obj(user_data) except Exception as e: logger.error(f"Error getting root user: {str(e)}") diff --git a/modules/interfaces/interfaceChatAccess.py b/modules/interfaces/interfaceChatAccess.py index 22961874..0b4055dc 100644 --- a/modules/interfaces/interfaceChatAccess.py +++ b/modules/interfaces/interfaceChatAccess.py @@ -5,6 +5,7 @@ Handles user access management and permission checks. from typing import Dict, Any, List, Optional from modules.interfaces.interfaceAppModel import User, UserPrivilege +from modules.interfaces.interfaceChatModel import ChatWorkflow, ChatMessage, ChatLog, ChatStat, ChatDocument class ChatAccess: """ @@ -23,19 +24,20 @@ class ChatAccess: self.db = db - def uam(self, table: str, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def uam(self, model_class: type, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Unified user access management function that filters data based on user privileges and adds access control attributes. Args: - table: Name of the table + model_class: Pydantic model class for the table recordset: Recordset to filter based on access rules Returns: Filtered recordset with access control attributes """ userPrivilege = self.currentUser.privilege + table_name = model_class.__name__ filtered_records = [] # Apply filtering based on privilege @@ -54,32 +56,32 @@ class ChatAccess: record_id = record.get("id") # Set access control flags based on user permissions - if table == "workflows": + if table_name == "ChatWorkflow": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("workflows", record_id) - record["_hideDelete"] = not self.canModify("workflows", record_id) - elif table == "workflowMessages": + record["_hideEdit"] = not self.canModify(ChatWorkflow, record_id) + record["_hideDelete"] = not self.canModify(ChatWorkflow, record_id) + elif table_name == "ChatMessage": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("workflows", record.get("workflowId")) - record["_hideDelete"] = not self.canModify("workflows", record.get("workflowId")) - elif table == "workflowLogs": + record["_hideEdit"] = not self.canModify(ChatWorkflow, record.get("workflowId")) + record["_hideDelete"] = not self.canModify(ChatWorkflow, record.get("workflowId")) + elif table_name == "ChatLog": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("workflows", record.get("workflowId")) - record["_hideDelete"] = not self.canModify("workflows", record.get("workflowId")) + record["_hideEdit"] = not self.canModify(ChatWorkflow, record.get("workflowId")) + record["_hideDelete"] = not self.canModify(ChatWorkflow, record.get("workflowId")) else: # Default access control for other tables record["_hideView"] = False - record["_hideEdit"] = not self.canModify(table, record_id) - record["_hideDelete"] = not self.canModify(table, record_id) + record["_hideEdit"] = not self.canModify(model_class, record_id) + record["_hideDelete"] = not self.canModify(model_class, record_id) return filtered_records - def canModify(self, table: str, recordId: Optional[str] = None) -> bool: + def canModify(self, model_class: type, recordId: Optional[str] = None) -> bool: """ Checks if the current user can modify (create/update/delete) records in a table. Args: - table: Name of the table + model_class: Pydantic model class for the table recordId: Optional record ID for specific record check Returns: @@ -94,7 +96,7 @@ class ChatAccess: # For regular users and admins, check specific cases if recordId is not None: # Get the record to check ownership - records: List[Dict[str, Any]] = self.db.getRecordset(table, recordFilter={"id": recordId}) + records: List[Dict[str, Any]] = self.db.getRecordset(model_class, recordFilter={"id": recordId}) if not records: return False diff --git a/modules/interfaces/interfaceChatModel.py b/modules/interfaces/interfaceChatModel.py index 629b59ee..ed71963a 100644 --- a/modules/interfaces/interfaceChatModel.py +++ b/modules/interfaces/interfaceChatModel.py @@ -174,6 +174,7 @@ register_model_labels( class ChatDocument(BaseModel, ModelMixin): """Data model for a chat document""" id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key") + messageId: str = Field(description="Foreign key to message") fileId: str = Field(description="Foreign key to file") # Direct file attributes (copied from file object) @@ -197,7 +198,11 @@ register_model_labels( {"en": "Chat Document", "fr": "Document de chat"}, { "id": {"en": "ID", "fr": "ID"}, + "messageId": {"en": "Message ID", "fr": "ID du message"}, "fileId": {"en": "File ID", "fr": "ID du fichier"}, + "fileName": {"en": "File Name", "fr": "Nom du fichier"}, + "fileSize": {"en": "File Size", "fr": "Taille du fichier"}, + "mimeType": {"en": "MIME Type", "fr": "Type MIME"}, "roundNumber": {"en": "Round Number", "fr": "Numéro de tour"}, "taskNumber": {"en": "Task Number", "fr": "Numéro de tâche"}, "actionNumber": {"en": "Action Number", "fr": "Numéro d'action"}, @@ -400,6 +405,8 @@ register_model_labels( class ChatStat(BaseModel, ModelMixin): """Data model for chat statistics - ONLY statistics, not workflow progress""" id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key") + workflowId: Optional[str] = Field(None, description="Foreign key to workflow (for workflow stats)") + messageId: Optional[str] = Field(None, description="Foreign key to message (for message stats)") processingTime: Optional[float] = Field(None, description="Processing time in seconds") tokenCount: Optional[int] = Field(None, description="Number of tokens processed") bytesSent: Optional[int] = Field(None, description="Number of bytes sent") @@ -413,6 +420,8 @@ register_model_labels( {"en": "Chat Statistics", "fr": "Statistiques de chat"}, { "id": {"en": "ID", "fr": "ID"}, + "workflowId": {"en": "Workflow ID", "fr": "ID du workflow"}, + "messageId": {"en": "Message ID", "fr": "ID du message"}, "processingTime": {"en": "Processing Time", "fr": "Temps de traitement"}, "tokenCount": {"en": "Token Count", "fr": "Nombre de tokens"}, "bytesSent": {"en": "Bytes Sent", "fr": "Octets envoyés"}, @@ -650,8 +659,8 @@ register_model_labels( class TaskStep(BaseModel, ModelMixin): id: str objective: str - dependencies: Optional[list[str]] = [] - success_criteria: Optional[list[str]] = [] + dependencies: Optional[list[str]] = Field(default_factory=list) + success_criteria: Optional[list[str]] = Field(default_factory=list) estimated_complexity: Optional[str] = None userMessage: Optional[str] = Field(None, description="User-friendly message in user's language") @@ -733,23 +742,23 @@ class TaskContext(BaseModel, ModelMixin): # Available resources available_documents: Optional[str] = "No documents available" - available_connections: Optional[list[str]] = [] + available_connections: Optional[list[str]] = Field(default_factory=list) # Previous execution state - previous_results: Optional[list[str]] = [] + previous_results: Optional[list[str]] = Field(default_factory=list) previous_handover: Optional[TaskHandover] = None # Current execution state - improvements: Optional[list[str]] = [] + improvements: Optional[list[str]] = Field(default_factory=list) retry_count: Optional[int] = 0 - previous_action_results: Optional[list] = [] + previous_action_results: Optional[list] = Field(default_factory=list) previous_review_result: Optional[dict] = None is_regeneration: Optional[bool] = False # Failure analysis - failure_patterns: Optional[list[str]] = [] - failed_actions: Optional[list] = [] - successful_actions: Optional[list] = [] + failure_patterns: Optional[list[str]] = Field(default_factory=list) + failed_actions: Optional[list] = Field(default_factory=list) + successful_actions: Optional[list] = Field(default_factory=list) # Criteria progress tracking for retries criteria_progress: Optional[dict] = None @@ -771,20 +780,20 @@ class TaskContext(BaseModel, ModelMixin): class ReviewContext(BaseModel, ModelMixin): task_step: TaskStep - task_actions: Optional[list] = [] - action_results: Optional[list] = [] - step_result: Optional[dict] = {} + task_actions: Optional[list] = Field(default_factory=list) + action_results: Optional[list] = Field(default_factory=list) + step_result: Optional[dict] = Field(default_factory=dict) workflow_id: Optional[str] = None - previous_results: Optional[list[str]] = [] + previous_results: Optional[list[str]] = Field(default_factory=list) class ReviewResult(BaseModel, ModelMixin): status: str reason: Optional[str] = None - improvements: Optional[list[str]] = [] + improvements: Optional[list[str]] = Field(default_factory=list) quality_score: Optional[int] = 5 - missing_outputs: Optional[list[str]] = [] - met_criteria: Optional[list[str]] = [] - unmet_criteria: Optional[list[str]] = [] + missing_outputs: Optional[list[str]] = Field(default_factory=list) + met_criteria: Optional[list[str]] = Field(default_factory=list) + unmet_criteria: Optional[list[str]] = Field(default_factory=list) confidence: Optional[float] = 0.5 userMessage: Optional[str] = Field(None, description="User-friendly message in user's language") diff --git a/modules/interfaces/interfaceChatObjects.py b/modules/interfaces/interfaceChatObjects.py index 95ebb3a5..43ad3f97 100644 --- a/modules/interfaces/interfaceChatObjects.py +++ b/modules/interfaces/interfaceChatObjects.py @@ -7,7 +7,7 @@ import os import logging import uuid from datetime import datetime, UTC, timezone -from typing import Dict, Any, List, Optional, Union +from typing import Dict, Any, List, Optional, Union, get_origin, get_args import asyncio @@ -18,7 +18,7 @@ from modules.interfaces.interfaceChatModel import ( from modules.interfaces.interfaceAppModel import User # DYNAMIC PART: Connectors to the Interface -from modules.connectors.connectorDbJson import DatabaseConnector +from modules.connectors.connectorDbPostgre import DatabaseConnector from modules.shared.timezoneUtils import get_utc_timestamp # Basic Configurations @@ -52,6 +52,55 @@ class ChatObjects: if currentUser: self.setUserContext(currentUser) + # ===== Generic Utility Methods ===== + + def _is_object_field(self, field_type) -> bool: + """Check if a field type represents a complex object (not a simple type).""" + # Simple scalar types + if field_type in (str, int, float, bool, type(None)): + return False + + # Everything else is an object + return True + + def _separate_object_fields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]: + """Separate simple fields from object fields based on Pydantic model structure.""" + simple_fields = {} + object_fields = {} + + # Get field information from the Pydantic model + model_fields = {} + if hasattr(model_class, '__fields__'): + model_fields = model_class.__fields__ + + for field_name, value in data.items(): + # Check if this field should be stored as JSONB in the database + if field_name in model_fields: + field_info = model_fields[field_name] + field_type = field_info.type_ + + # Check if this is a JSONB field (Dict, List, or complex types) + if (field_type == dict or + field_type == list or + (hasattr(field_type, '__origin__') and field_type.__origin__ in (dict, list)) or + field_name in ['execParameters', 'expectedDocumentFormats', 'resultDocuments']): + # Store as JSONB - include in simple_fields for database storage + simple_fields[field_name] = value + elif isinstance(value, (str, int, float, bool, type(None))): + # Simple scalar types + simple_fields[field_name] = value + else: + # Complex objects that should be filtered out + object_fields[field_name] = value + else: + # Field not in model - treat as scalar if simple, otherwise filter out + if isinstance(value, (str, int, float, bool, type(None))): + simple_fields[field_name] = value + else: + object_fields[field_name] = value + + return simple_fields, object_fields + def _initializeServices(self): pass @@ -72,28 +121,39 @@ class ChatObjects: # Update database context self.db.updateContext(self.userId) + + def __del__(self): + """Cleanup method to close database connection.""" + if hasattr(self, 'db') and self.db is not None: + try: + self.db.close() + except Exception as e: + logger.error(f"Error closing database connection: {e}") - logger.debug(f"User context set: userId={self.userId}, mandateId={self.mandateId}") def _initializeDatabase(self): - """Initializes the database connection.""" + """Initializes the database connection directly.""" try: # Get configuration values with defaults dbHost = APP_CONFIG.get("DB_CHAT_HOST", "_no_config_default_data") dbDatabase = APP_CONFIG.get("DB_CHAT_DATABASE", "chat") dbUser = APP_CONFIG.get("DB_CHAT_USER") dbPassword = APP_CONFIG.get("DB_CHAT_PASSWORD_SECRET") + dbPort = int(APP_CONFIG.get("DB_CHAT_PORT", 5432)) - # Ensure the database directory exists - os.makedirs(dbHost, exist_ok=True) - + # Create database connector directly self.db = DatabaseConnector( dbHost=dbHost, dbDatabase=dbDatabase, dbUser=dbUser, - dbPassword=dbPassword + dbPassword=dbPassword, + dbPort=dbPort, + userId=self.userId ) + # Initialize database system + self.db.initDbSystem() + logger.info("Database initialized successfully") except Exception as e: logger.error(f"Failed to initialize database: {str(e)}") @@ -103,10 +163,10 @@ class ChatObjects: """Initializes standard records in the database if they don't exist.""" pass - def _uam(self, table: str, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def _uam(self, model_class: type, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Delegate to access control module.""" # First apply access control - filteredRecords = self.access.uam(table, recordset) + filteredRecords = self.access.uam(model_class, recordset) # Then filter out database-specific fields cleanedRecords = [] @@ -117,56 +177,58 @@ class ChatObjects: return cleanedRecords - def _canModify(self, table: str, recordId: Optional[str] = None) -> bool: + def _canModify(self, model_class: type, recordId: Optional[str] = None) -> bool: """Delegate to access control module.""" - return self.access.canModify(table, recordId) + return self.access.canModify(model_class, recordId) - def _clearTableCache(self, table: str) -> None: - """Clears the cache for a specific table to ensure fresh data.""" - self.db.clearTableCache(table) # Utilities - def getInitialId(self, table: str) -> Optional[str]: + def getInitialId(self, model_class: type) -> Optional[str]: """Returns the initial ID for a table.""" - return self.db.getInitialId(table) + return self.db.getInitialId(model_class) # Workflow methods - - def getAllWorkflows(self) -> List[Dict[str, Any]]: + + def getWorkflows(self) -> List[Dict[str, Any]]: """Returns workflows based on user access level.""" - allWorkflows = self.db.getRecordset("workflows") - return self._uam("workflows", allWorkflows) + allWorkflows = self.db.getRecordset(ChatWorkflow) + return self._uam(ChatWorkflow, allWorkflows) def getWorkflow(self, workflowId: str) -> Optional[ChatWorkflow]: """Returns a workflow by ID if user has access.""" - workflows = self.db.getRecordset("workflows", recordFilter={"id": workflowId}) + workflows = self.db.getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) if not workflows: return None - filteredWorkflows = self._uam("workflows", workflows) + filteredWorkflows = self._uam(ChatWorkflow, workflows) if not filteredWorkflows: return None workflow = filteredWorkflows[0] try: + # Load related data from normalized tables + logs = self.getLogs(workflowId) + messages = self.getMessages(workflowId) + stats = self.getWorkflowStats(workflowId) + # Validate workflow data against ChatWorkflow model return ChatWorkflow( id=workflow["id"], status=workflow.get("status", "running"), name=workflow.get("name"), - currentRound=workflow.get("currentRound", 0), # Default value + currentRound=workflow.get("currentRound", 0), currentTask=workflow.get("currentTask", 0), currentAction=workflow.get("currentAction", 0), totalTasks=workflow.get("totalTasks", 0), totalActions=workflow.get("totalActions", 0), lastActivity=workflow.get("lastActivity", get_utc_timestamp()), startedAt=workflow.get("startedAt", get_utc_timestamp()), - logs=[ChatLog(**log) for log in workflow.get("logs", [])], - messages=[ChatMessage(**msg) for msg in workflow.get("messages", [])], - stats=ChatStat(**workflow.get("stats", {})) if workflow.get("stats") else None, + logs=logs, + messages=messages, + stats=stats, mandateId=workflow.get("mandateId", self.currentUser.mandateId) ) except Exception as e: @@ -175,7 +237,7 @@ class ChatObjects: def createWorkflow(self, workflowData: Dict[str, Any]) -> ChatWorkflow: """Creates a new workflow if user has permission.""" - if not self._canModify("workflows"): + if not self._canModify(ChatWorkflow): raise PermissionError("No permission to create workflows") # Set timestamp if not present @@ -185,19 +247,20 @@ class ChatObjects: if "lastActivity" not in workflowData: workflowData["lastActivity"] = currentTime + + # Use generic field separation based on ChatWorkflow model + simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData) # Create workflow in database - created = self.db.recordCreate("workflows", workflowData) + created = self.db.recordCreate(ChatWorkflow, simple_fields) - # Clear cache to ensure fresh data - self._clearTableCache("workflows") - # Convert to ChatWorkflow model + # Convert to ChatWorkflow model (empty related data for new workflow) return ChatWorkflow( id=created["id"], status=created.get("status", "running"), name=created.get("name"), - currentRound=created.get("currentRound", 0), # Default value + currentRound=created.get("currentRound", 0), currentTask=created.get("currentTask", 0), currentAction=created.get("currentAction", 0), totalTasks=created.get("totalTasks", 0), @@ -206,7 +269,7 @@ class ChatObjects: startedAt=created.get("startedAt", currentTime), logs=[], messages=[], - stats=ChatStat(**created.get("stats", {})) if created.get("stats") else None, + stats=None, mandateId=created.get("mandateId", self.currentUser.mandateId) ) @@ -217,17 +280,61 @@ class ChatObjects: if not workflow: return None - if not self._canModify("workflows", workflowId): + if not self._canModify(ChatWorkflow, workflowId): raise PermissionError(f"No permission to update workflow {workflowId}") - # Set update time - workflowData["lastActivity"] = get_utc_timestamp() + # Use generic field separation based on ChatWorkflow model + simple_fields, object_fields = self._separate_object_fields(ChatWorkflow, workflowData) - # Update workflow in database - updated = self.db.recordModify("workflows", workflowId, workflowData) + # Set update time for main workflow + simple_fields["lastActivity"] = get_utc_timestamp() - # Clear cache to ensure fresh data - self._clearTableCache("workflows") + # Update main workflow in database + updated = self.db.recordModify(ChatWorkflow, workflowId, simple_fields) + + + # Handle object field updates (inline to avoid helper dependency) + if 'logs' in object_fields: + logs_data = object_fields['logs'] + try: + for log_data in logs_data: + if hasattr(log_data, 'dict'): + log_dict = log_data.dict() + elif hasattr(log_data, 'to_dict'): + log_dict = log_data.to_dict() + else: + log_dict = log_data + log_dict["workflowId"] = workflowId + self.createLog(log_dict) + except Exception as e: + logger.error(f"Error updating workflow logs: {str(e)}") + if 'messages' in object_fields: + messages_data = object_fields['messages'] + try: + for message_data in messages_data: + if hasattr(message_data, 'dict'): + msg_dict = message_data.dict() + elif hasattr(message_data, 'to_dict'): + msg_dict = message_data.to_dict() + else: + msg_dict = message_data + msg_dict["workflowId"] = workflowId + self.updateMessage(msg_dict.get("id"), msg_dict) + except Exception as e: + logger.error(f"Error updating workflow messages: {str(e)}") + if 'stats' in object_fields: + stats_data = object_fields['stats'] + try: + if stats_data: + stats_data["workflowId"] = workflowId + self.db.recordCreate(ChatStat, stats_data) + except Exception as e: + logger.error(f"Error updating workflow stats: {str(e)}") + + # Load fresh data from normalized tables + logs = self.getLogs(workflowId) + messages = self.getMessages(workflowId) + stats = self.getWorkflowStats(workflowId) # Convert to ChatWorkflow model return ChatWorkflow( @@ -241,70 +348,118 @@ class ChatObjects: totalActions=updated.get("totalActions", workflow.totalActions), lastActivity=updated.get("lastActivity", workflow.lastActivity), startedAt=updated.get("startedAt", workflow.startedAt), - logs=[ChatLog(**log) for log in updated.get("logs", workflow.logs)], - messages=[ChatMessage(**msg) for msg in updated.get("messages", workflow.messages)], - stats=ChatStat(**updated.get("stats", workflow.stats.dict() if workflow.stats else {})) if updated.get("stats") or workflow.stats else None, + logs=logs, + messages=messages, + stats=stats, mandateId=updated.get("mandateId", workflow.mandateId) ) def deleteWorkflow(self, workflowId: str) -> bool: - """Deletes a workflow if user has access.""" - # Check if the workflow exists and user has access - workflow = self.getWorkflow(workflowId) - if not workflow: - return False + """Deletes a workflow and all related data if user has access.""" + try: + # Check if the workflow exists and user has access + workflow = self.getWorkflow(workflowId) + if not workflow: + return False + + if not self._canModify(ChatWorkflow, workflowId): + raise PermissionError(f"No permission to delete workflow {workflowId}") - if not self._canModify("workflows", workflowId): - raise PermissionError(f"No permission to delete workflow {workflowId}") - - # Delete workflow - success = self.db.recordDelete("workflows", workflowId) - - # Clear cache to ensure fresh data - self._clearTableCache("workflows") - - return success + # CASCADE DELETE: Delete all related data first + + # 1. Delete all workflow messages and their related data + messages = self.getMessages(workflowId) + for message in messages: + messageId = message.id + if messageId: + # Delete message stats + existing_stats = self.db.getRecordset(ChatStat, recordFilter={"messageId": messageId}) + for stat in existing_stats: + self.db.recordDelete(ChatStat, stat["id"]) + + # Delete message documents (but NOT the files!) + existing_docs = self.db.getRecordset(ChatDocument, recordFilter={"messageId": messageId}) + for doc in existing_docs: + self.db.recordDelete(ChatDocument, doc["id"]) + + # Delete the message itself + self.db.recordDelete(ChatMessage, messageId) + + # 2. Delete workflow stats + existing_stats = self.db.getRecordset(ChatStat, recordFilter={"workflowId": workflowId}) + for stat in existing_stats: + self.db.recordDelete(ChatStat, stat["id"]) + + # 3. Delete workflow logs + existing_logs = self.db.getRecordset(ChatLog, recordFilter={"workflowId": workflowId}) + for log in existing_logs: + self.db.recordDelete(ChatLog, log["id"]) + + # 4. Finally delete the workflow itself + success = self.db.recordDelete(ChatWorkflow, workflowId) + + return success + + except Exception as e: + logger.error(f"Error deleting workflow {workflowId}: {str(e)}") + return False - # Workflow Messages - def getWorkflowMessages(self, workflowId: str) -> List[ChatMessage]: + # Message methods + + def getMessages(self, workflowId: str) -> List[ChatMessage]: """Returns messages for a workflow if user has access to the workflow.""" - # Check workflow access first - workflow = self.getWorkflow(workflowId) - if not workflow: + # Check workflow access first (without calling getWorkflow to avoid circular reference) + workflows = self.db.getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) + if not workflows: return [] - # Get messages for this workflow - messages = self.db.getRecordset("workflowMessages", recordFilter={"workflowId": workflowId}) + filteredWorkflows = self._uam(ChatWorkflow, workflows) + if not filteredWorkflows: + return [] + + # Get messages for this workflow from normalized table + messages = self.db.getRecordset(ChatMessage, recordFilter={"workflowId": workflowId}) # Sort messages by publishedAt timestamp to ensure chronological order messages.sort(key=lambda x: x.get("publishedAt", x.get("timestamp", "0"))) - # Convert messages to ChatMessage objects with proper document handling + # Convert messages to ChatMessage objects and load documents chat_messages = [] for msg in messages: - # Ensure documents field is properly converted to ChatDocument objects - if "documents" in msg and msg["documents"]: - try: - # Convert each document back to ChatDocument object - documents = [] - for doc in msg["documents"]: - if isinstance(doc, dict): - documents.append(ChatDocument(**doc)) - else: - documents.append(doc) - msg["documents"] = documents - except Exception as e: - logger.warning(f"Error converting documents for message {msg.get('id', 'unknown')}: {e}") - msg["documents"] = [] - else: - msg["documents"] = [] + # Load documents from normalized documents table + documents = self.getDocuments(msg["id"]) - chat_messages.append(ChatMessage(**msg)) + # Create ChatMessage object with loaded documents + chat_message = ChatMessage( + id=msg["id"], + workflowId=msg["workflowId"], + parentMessageId=msg.get("parentMessageId"), + documents=documents, + documentsLabel=msg.get("documentsLabel"), + message=msg.get("message"), + role=msg.get("role", "assistant"), + status=msg.get("status", "step"), + sequenceNr=msg.get("sequenceNr", 0), + publishedAt=msg.get("publishedAt", get_utc_timestamp()), + stats=self.getMessageStats(msg["id"]), + success=msg.get("success"), + actionId=msg.get("actionId"), + actionMethod=msg.get("actionMethod"), + actionName=msg.get("actionName"), + roundNumber=msg.get("roundNumber"), + taskNumber=msg.get("taskNumber"), + actionNumber=msg.get("actionNumber"), + taskProgress=msg.get("taskProgress"), + actionProgress=msg.get("actionProgress") + ) + + chat_messages.append(chat_message) + return chat_messages - def createWorkflowMessage(self, messageData: Dict[str, Any]) -> ChatMessage: + def createMessage(self, messageData: Dict[str, Any]) -> ChatMessage: """Creates a message for a workflow if user has access.""" try: # Ensure ID is present @@ -323,7 +478,7 @@ class ChatObjects: if not workflow: raise PermissionError(f"No access to workflow {workflowId}") - if not self._canModify("workflows", workflowId): + if not self._canModify(ChatWorkflow, workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") # Validate that ID is not None @@ -346,33 +501,38 @@ class ChatObjects: # This ensures messages have the correct progress context when workflows are continued if "roundNumber" not in messageData: messageData["roundNumber"] = workflow.currentRound - logger.debug(f"Auto-setting roundNumber to {workflow.currentRound} for message {messageData['id']}") if "taskNumber" not in messageData: messageData["taskNumber"] = workflow.currentTask - logger.debug(f"Auto-setting taskNumber to {workflow.currentTask} for message {messageData['id']}") if "actionNumber" not in messageData: messageData["actionNumber"] = workflow.currentAction - logger.debug(f"Auto-setting actionNumber to {workflow.currentAction} for message {messageData['id']}") - # Convert ChatDocument objects to dictionaries for database storage - if "documents" in messageData and messageData["documents"]: - documents_for_db = [] - for doc in messageData["documents"]: - if isinstance(doc, ChatDocument): - # Convert ChatDocument to dictionary - documents_for_db.append(doc.dict()) - else: - # Already a dictionary - documents_for_db.append(doc) - messageData["documents"] = documents_for_db + # Use generic field separation based on ChatMessage model + simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData) - # Create message in database - createdMessage = self.db.recordCreate("workflowMessages", messageData) + # Handle documents separately - they will be stored in normalized documents table + documents_to_create = object_fields.get("documents", []) - # Clear cache to ensure fresh data - self._clearTableCache("workflowMessages") + # Create message in normalized table using only simple fields + createdMessage = self.db.recordCreate(ChatMessage, simple_fields) + + + # Create documents in normalized documents table + created_documents = [] + for doc_data in documents_to_create: + # Convert to dict if it's a Pydantic object + if hasattr(doc_data, 'dict'): + doc_dict = doc_data.dict() + elif hasattr(doc_data, 'to_dict'): + doc_dict = doc_data.to_dict() + else: + doc_dict = doc_data + + doc_dict["messageId"] = createdMessage["id"] + created_doc = self.createDocument(doc_dict) + if created_doc: + created_documents.append(created_doc) # Convert to ChatMessage model return ChatMessage( @@ -380,15 +540,14 @@ class ChatObjects: workflowId=createdMessage["workflowId"], parentMessageId=createdMessage.get("parentMessageId"), agentName=createdMessage.get("agentName"), - documents=[ChatDocument(**doc) for doc in createdMessage.get("documents", [])], - documentsLabel=createdMessage.get("documentsLabel"), # <-- FIX: ensure label is set + documents=created_documents, + documentsLabel=createdMessage.get("documentsLabel"), message=createdMessage.get("message"), role=createdMessage.get("role", "assistant"), status=createdMessage.get("status", "step"), sequenceNr=len(workflow.messages) + 1, # Use messages list length for sequence number publishedAt=createdMessage.get("publishedAt", get_utc_timestamp()), - stats=ChatStat(**createdMessage.get("stats", {})) if createdMessage.get("stats") else None, - # CRITICAL FIX: Include the progress fields in the ChatMessage object + stats=object_fields.get("stats"), # Use stats from object_fields roundNumber=createdMessage.get("roundNumber"), taskNumber=createdMessage.get("taskNumber"), actionNumber=createdMessage.get("actionNumber"), @@ -402,18 +561,17 @@ class ChatObjects: logger.error(f"Error creating workflow message: {str(e)}") return None - def updateWorkflowMessage(self, messageId: str, messageData: Dict[str, Any]) -> Dict[str, Any]: + def updateMessage(self, messageId: str, messageData: Dict[str, Any]) -> Dict[str, Any]: """Updates a workflow message if user has access to the workflow.""" try: - logger.debug(f"Updating message {messageId} in database") # Ensure messageId is provided if not messageId: - logger.error("No messageId provided for updateWorkflowMessage") + logger.error("No messageId provided for updateMessage") raise ValueError("messageId cannot be empty") # Check if message exists in database - messages = self.db.getRecordset("workflowMessages", recordFilter={"id": messageId}) + messages = self.db.getRecordset(ChatMessage, recordFilter={"id": messageId}) if not messages: logger.warning(f"Message with ID {messageId} does not exist in database") @@ -426,11 +584,11 @@ class ChatObjects: if not workflow: raise PermissionError(f"No access to workflow {workflowId}") - if not self._canModify("workflows", workflowId): + if not self._canModify(ChatWorkflow, workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") logger.info(f"Creating new message with ID {messageId} for workflow {workflowId}") - return self.db.recordCreate("workflowMessages", messageData) + return self.db.recordCreate(ChatMessage, messageData) else: logger.error(f"Workflow ID missing for new message {messageId}") return None @@ -444,31 +602,53 @@ class ChatObjects: if not workflow: raise PermissionError(f"No access to workflow {workflowId}") - if not self._canModify("workflows", workflowId): + if not self._canModify(ChatWorkflow, workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") + # Use generic field separation based on ChatMessage model + simple_fields, object_fields = self._separate_object_fields(ChatMessage, messageData) + # Ensure required fields present for key in ["role", "agentName"]: - if key not in messageData and key not in existingMessage: - messageData[key] = "assistant" if key == "role" else "" + if key not in simple_fields and key not in existingMessage: + simple_fields[key] = "assistant" if key == "role" else "" # Ensure ID is in the dataset - if 'id' not in messageData: - messageData['id'] = messageId + if 'id' not in simple_fields: + simple_fields['id'] = messageId # Convert createdAt to startedAt if needed - if "createdAt" in messageData and "startedAt" not in messageData: - messageData["startedAt"] = messageData["createdAt"] - del messageData["createdAt"] + if "createdAt" in simple_fields and "startedAt" not in simple_fields: + simple_fields["startedAt"] = simple_fields["createdAt"] + del simple_fields["createdAt"] - # Update the message - updatedMessage = self.db.recordModify("workflowMessages", messageId, messageData) - if updatedMessage: - logger.debug(f"Message {messageId} updated successfully") - - # Clear cache to ensure fresh data - self._clearTableCache("workflowMessages") - else: + # Update the message with simple fields only + updatedMessage = self.db.recordModify(ChatMessage, messageId, simple_fields) + + # Handle object field updates (documents, stats) inline + if 'documents' in object_fields: + documents_data = object_fields['documents'] + try: + for doc_data in documents_data: + if hasattr(doc_data, 'dict'): + doc_dict = doc_data.dict() + elif hasattr(doc_data, 'to_dict'): + doc_dict = doc_data.to_dict() + else: + doc_dict = doc_data + doc_dict["messageId"] = messageId + self.createDocument(doc_dict) + except Exception as e: + logger.error(f"Error updating message documents: {str(e)}") + if 'stats' in object_fields: + stats_data = object_fields['stats'] + try: + if stats_data: + stats_data["messageId"] = messageId + self.db.recordCreate(ChatStat, stats_data) + except Exception as e: + logger.error(f"Error updating message stats: {str(e)}") + if not updatedMessage: logger.warning(f"Failed to update message {messageId}") return updatedMessage @@ -476,8 +656,8 @@ class ChatObjects: logger.error(f"Error updating message {messageId}: {str(e)}", exc_info=True) raise ValueError(f"Error updating message {messageId}: {str(e)}") - def deleteWorkflowMessage(self, workflowId: str, messageId: str) -> bool: - """Deletes a workflow message if user has access to the workflow.""" + def deleteMessage(self, workflowId: str, messageId: str) -> bool: + """Deletes a workflow message and all related data if user has access to the workflow.""" try: # Check workflow access workflow = self.getWorkflow(workflowId) @@ -485,24 +665,34 @@ class ChatObjects: logger.warning(f"No access to workflow {workflowId}") return False - if not self._canModify("workflows", workflowId): + if not self._canModify(ChatWorkflow, workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") # Check if the message exists - messages = self.getWorkflowMessages(workflowId) + messages = self.getMessages(workflowId) message = next((m for m in messages if m.get("id") == messageId), None) if not message: logger.warning(f"Message {messageId} for workflow {workflowId} not found") return False - # Delete the message from the database - success = self.db.recordDelete("workflowMessages", messageId) + # CASCADE DELETE: Delete all related data first - # Clear cache to ensure fresh data - self._clearTableCache("workflowMessages") + # 1. Delete message stats + existing_stats = self.db.getRecordset(ChatStat, recordFilter={"messageId": messageId}) + for stat in existing_stats: + self.db.recordDelete(ChatStat, stat["id"]) + + # 2. Delete message documents (but NOT the files!) + existing_docs = self.db.getRecordset(ChatDocument, recordFilter={"messageId": messageId}) + for doc in existing_docs: + self.db.recordDelete(ChatDocument, doc["id"]) + + # 3. Finally delete the message itself + success = self.db.recordDelete(ChatMessage, messageId) return success + except Exception as e: logger.error(f"Error deleting message {messageId}: {str(e)}") return False @@ -516,51 +706,19 @@ class ChatObjects: logger.warning(f"No access to workflow {workflowId}") return False - if not self._canModify("workflows", workflowId): + if not self._canModify(ChatWorkflow, workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") - logger.debug(f"Removing file {fileId} from message {messageId} in workflow {workflowId}") - # Get all workflow messages - allMessages = self.getWorkflowMessages(workflowId) - logger.debug(f"Workflow {workflowId} has {len(allMessages)} messages") + # Get documents for this message from normalized table + documents = self.db.getRecordset(ChatDocument, recordFilter={"messageId": messageId}) - # Try different approaches to find the message - message = None - - # Exact match - message = next((m for m in allMessages if m.get("id") == messageId), None) - - # Case-insensitive match - if not message and isinstance(messageId, str): - message = next((m for m in allMessages - if isinstance(m.get("id"), str) and m.get("id").lower() == messageId.lower()), None) - - # Partial match (starts with) - if not message and isinstance(messageId, str): - message = next((m for m in allMessages - if isinstance(m.get("id"), str) and m.get("id").startswith(messageId)), None) - - if not message: - logger.warning(f"Message {messageId} not found in workflow {workflowId}") + if not documents: + logger.warning(f"No documents found for message {messageId}") return False - # Log the found message - logger.debug(f"Found message: {message.get('id')}") - - # Check if message has documents - if "documents" not in message or not message["documents"]: - logger.warning(f"No documents in message {messageId}") - return False - - # Log existing documents - documents = message.get("documents", []) - logger.debug(f"Message has {len(documents)} documents") - - # Create a new list of documents without the one to delete - updatedDocuments = [] + # Find and delete the specific document removed = False - for doc in documents: docId = doc.get("id") fileIdValue = doc.get("fileId") @@ -574,161 +732,78 @@ class ChatObjects: ) if shouldRemove: - removed = True - logger.debug(f"Found file to remove: docId={docId}, fileId={fileIdValue}") - else: - updatedDocuments.append(doc) + # Delete the document from normalized table + success = self.db.recordDelete(ChatDocument, docId) + if success: + removed = True + else: + logger.warning(f"Failed to delete document {docId}") if not removed: logger.warning(f"No matching file {fileId} found in message {messageId}") return False - # Update message with modified documents array - messageUpdate = { - "documents": updatedDocuments - } - - # Apply the update directly to the database - updated = self.db.recordModify("workflowMessages", message["id"], messageUpdate) - - if updated: - logger.debug(f"Successfully removed file {fileId} from message {messageId}") return True - else: - logger.warning(f"Failed to update message {messageId} in database") - return False except Exception as e: logger.error(f"Error removing file {fileId} from message {messageId}: {str(e)}") return False - # Workflow Logs + + # Document methods - def getWorkflowLogs(self, workflowId: str) -> List[ChatLog]: + def getDocuments(self, messageId: str) -> List[ChatDocument]: + """Returns documents for a message from normalized table.""" + try: + documents = self.db.getRecordset(ChatDocument, recordFilter={"messageId": messageId}) + return [ChatDocument(**doc) for doc in documents] + except Exception as e: + logger.error(f"Error getting message documents: {str(e)}") + return [] + + def createDocument(self, documentData: Dict[str, Any]) -> ChatDocument: + """Creates a document for a message in normalized table.""" + try: + # Validate document data + document = ChatDocument(**documentData) + + # Create document in normalized table + created = self.db.recordCreate(ChatDocument, document) + + + return ChatDocument(**created) + except Exception as e: + logger.error(f"Error creating message document: {str(e)}") + return None + + + # Log methods + + def getLogs(self, workflowId: str) -> List[ChatLog]: """Returns logs for a workflow if user has access to the workflow.""" - # Check workflow access first - workflow = self.getWorkflow(workflowId) - if not workflow: + # Check workflow access first (without calling getWorkflow to avoid circular reference) + workflows = self.db.getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) + if not workflows: return [] - # Get logs for this workflow - logs = self.db.getRecordset("workflowLogs", recordFilter={"workflowId": workflowId}) + filteredWorkflows = self._uam(ChatWorkflow, workflows) + if not filteredWorkflows: + return [] + + # Get logs for this workflow from normalized table + logs = self.db.getRecordset(ChatLog, recordFilter={"workflowId": workflowId}) # Sort logs by timestamp (Unix timestamps) logs.sort(key=lambda x: float(x.get("timestamp", 0))) return [ChatLog(**log) for log in logs] - def updateWorkflowStats(self, workflowId: str, bytesSent: int = 0, bytesReceived: int = 0) -> bool: - """Updates workflow statistics during execution with incremental values.""" - try: - # Get current workflow - workflow = self.getWorkflow(workflowId) - if not workflow: - logger.error(f"Workflow {workflowId} not found for stats update") - return False - - if not self._canModify("workflows", workflowId): - logger.error(f"No permission to update workflow {workflowId} stats") - return False - - # Get current stats - ensure we have proper defaults - if workflow.stats: - currentStats = workflow.stats.dict() - # Ensure all required fields exist - currentStats.setdefault("bytesSent", 0) - currentStats.setdefault("bytesReceived", 0) - currentStats.setdefault("tokenCount", 0) - currentStats.setdefault("processingTime", 0) - else: - currentStats = { - "bytesSent": 0, - "bytesReceived": 0, - "tokenCount": 0, - "processingTime": 0 - } - - # Calculate processing time as duration since workflow start using Unix timestamps - workflow = self.getWorkflow(workflowId) - if workflow and workflow.startedAt: - try: - # Parse start time as Unix timestamp (handle both old ISO format and new Unix format) - start_time_str = workflow.startedAt - try: - # Try to parse as Unix timestamp first - start_time = int(float(start_time_str)) - except ValueError: - # If that fails, try to parse as ISO format and convert to Unix - try: - # Handle ISO format timestamps (for backward compatibility) - if start_time_str.endswith('Z'): - start_time_str = start_time_str.replace('Z', '+00:00') - dt = datetime.fromisoformat(start_time_str) - start_time = int(dt.timestamp()) - except: - # If all parsing fails, use current time - logger.warning(f"Could not parse start time: {start_time_str}, using current time") - start_time = int(get_utc_timestamp()) - - current_time = int(get_utc_timestamp()) - processing_time = current_time - start_time - - # Ensure processing time is reasonable (not negative or extremely large) - if processing_time < 0: - logger.warning(f"Negative processing time calculated: {processing_time}, using 0") - processing_time = 0 - elif processing_time > 86400 * 365: # More than 1 year - logger.warning(f"Unreasonably large processing time: {processing_time}, using 0") - processing_time = 0 - - except Exception as e: - logger.warning(f"Error calculating processing time: {str(e)}") - processing_time = currentStats.get("processingTime", 0) or 0 - else: - # Fallback to existing processing time or 0 - processing_time = currentStats.get("processingTime", 0) or 0 - - # Update stats with incremental values - ensure no None values - current_bytes_sent = currentStats.get("bytesSent", 0) or 0 - current_bytes_received = currentStats.get("bytesReceived", 0) or 0 - - currentStats["bytesSent"] = current_bytes_sent + bytesSent - currentStats["bytesReceived"] = current_bytes_received + bytesReceived - currentStats["tokenCount"] = currentStats["bytesSent"] + currentStats["bytesReceived"] - currentStats["processingTime"] = processing_time - - # Update workflow in database - self.db.recordModify("workflows", workflowId, { - "stats": currentStats - }) - - # Log to stats table - stats_record = { - "timestamp": get_utc_timestamp(), - "workflowId": workflowId, - "bytesSent": bytesSent, - "bytesReceived": bytesReceived, - "tokenCount": bytesSent + bytesReceived, - "processingTime": processing_time - } - - # Create stats record in database - self.db.recordCreate("stats", stats_record) - - # logger.debug(f"Updated workflow {workflowId} stats: {currentStats}") - # logger.debug(f"Logged stats record: {stats_record}") - return True - - except Exception as e: - logger.error(f"Error updating workflow stats: {str(e)}") - return False - - def createWorkflowLog(self, logData: Dict[str, Any]) -> ChatLog: + def createLog(self, logData: Dict[str, Any]) -> ChatLog: """Creates a log entry for a workflow if user has access.""" # Check workflow access workflowId = logData.get("workflowId") if not workflowId: - logger.error("No workflowId provided for createWorkflowLog") + logger.error("No workflowId provided for createLog") return None workflow = self.getWorkflow(workflowId) @@ -736,7 +811,7 @@ class ChatObjects: logger.warning(f"No access to workflow {workflowId}") return None - if not self._canModify("workflows", workflowId): + if not self._canModify(ChatWorkflow, workflowId): logger.warning(f"No permission to modify workflow {workflowId}") return None @@ -768,62 +843,211 @@ class ChatObjects: logger.error(f"Invalid log data: {str(e)}") return None - # Create log in database - createdLog = self.db.recordCreate("workflowLogs", log_model.to_dict()) - - # Clear cache to ensure fresh data - self._clearTableCache("workflowLogs") + # Create log in normalized table + createdLog = self.db.recordCreate(ChatLog, log_model) # Return validated ChatLog instance return ChatLog(**createdLog) + # Stats methods - - - def loadWorkflowState(self, workflowId: str) -> Optional[ChatWorkflow]: - """Loads workflow state if user has access.""" + def getMessageStats(self, messageId: str) -> Optional[ChatStat]: + """Returns statistics for a message from normalized table.""" try: - # Check workflow access + stats = self.db.getRecordset(ChatStat, recordFilter={"messageId": messageId}) + if not stats: + return None + # Return the most recent stats record + stats.sort(key=lambda x: x.get("created_at", ""), reverse=True) + return ChatStat(**stats[0]) + except Exception as e: + logger.error(f"Error getting message stats: {str(e)}") + return None + + def getWorkflowStats(self, workflowId: str) -> Optional[ChatStat]: + """Returns statistics for a workflow if user has access.""" + # Check workflow access first (without calling getWorkflow to avoid circular reference) + workflows = self.db.getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) + if not workflows: + return None + + filteredWorkflows = self._uam(ChatWorkflow, workflows) + if not filteredWorkflows: + return None + + # Get stats for this workflow from normalized table + stats = self.db.getRecordset(ChatStat, recordFilter={"workflowId": workflowId}) + + if not stats: + return None + + # Return the most recent stats record + stats.sort(key=lambda x: x.get("created_at", ""), reverse=True) + return ChatStat(**stats[0]) + + def getUnifiedChatData(self, workflowId: str, afterTimestamp: Optional[float] = None) -> Dict[str, Any]: + """ + Returns unified chat data (messages, logs, stats) for a workflow in chronological order. + Uses timestamp-based selective data transfer for efficient polling. + """ + # Check workflow access first + workflows = self.db.getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) + if not workflows: + return {"items": []} + + filteredWorkflows = self._uam(ChatWorkflow, workflows) + if not filteredWorkflows: + return {"items": []} + + # Get all data types and filter in Python (PostgreSQL connector doesn't support $gt operators) + items = [] + + # Get messages + messages = self.db.getRecordset(ChatMessage, recordFilter={"workflowId": workflowId}) + for msg in messages: + # Apply timestamp filtering in Python + msg_timestamp = msg.get("publishedAt", get_utc_timestamp()) + if afterTimestamp is not None and msg_timestamp <= afterTimestamp: + continue + + # Load documents for each message + documents = self.getDocuments(msg["id"]) + + # Create ChatMessage object with loaded documents + chat_message = ChatMessage( + id=msg["id"], + workflowId=msg["workflowId"], + parentMessageId=msg.get("parentMessageId"), + documents=documents, + documentsLabel=msg.get("documentsLabel"), + message=msg.get("message"), + role=msg.get("role", "assistant"), + status=msg.get("status", "step"), + sequenceNr=msg.get("sequenceNr", 0), + publishedAt=msg.get("publishedAt", get_utc_timestamp()), + stats=self.getMessageStats(msg["id"]), + success=msg.get("success"), + actionId=msg.get("actionId"), + actionMethod=msg.get("actionMethod"), + actionName=msg.get("actionName"), + roundNumber=msg.get("roundNumber"), + taskNumber=msg.get("taskNumber"), + actionNumber=msg.get("actionNumber"), + taskProgress=msg.get("taskProgress"), + actionProgress=msg.get("actionProgress") + ) + + # Use publishedAt as the timestamp for chronological ordering + items.append({ + "type": "message", + "createdAt": msg_timestamp, + "item": chat_message.dict() + }) + + # Get logs + logs = self.db.getRecordset(ChatLog, recordFilter={"workflowId": workflowId}) + for log in logs: + # Apply timestamp filtering in Python + log_timestamp = log.get("timestamp", get_utc_timestamp()) + if afterTimestamp is not None and log_timestamp <= afterTimestamp: + continue + + chat_log = ChatLog(**log) + items.append({ + "type": "log", + "createdAt": log_timestamp, + "item": chat_log.dict() + }) + + # Get stats + stats = self.db.getRecordset(ChatStat, recordFilter={"workflowId": workflowId}) + for stat in stats: + # Apply timestamp filtering in Python + stat_timestamp = stat.get("_createdAt", get_utc_timestamp()) + if afterTimestamp is not None and stat_timestamp <= afterTimestamp: + continue + + chat_stat = ChatStat(**stat) + items.append({ + "type": "stat", + "createdAt": stat_timestamp, + "item": chat_stat.dict() + }) + + # Sort all items by createdAt timestamp for chronological order + items.sort(key=lambda x: x["createdAt"]) + + return {"items": items} + + def updateWorkflowStats(self, workflowId: str, bytesSent: int = 0, bytesReceived: int = 0) -> bool: + """Updates workflow statistics during execution with incremental values.""" + try: + # Get current workflow workflow = self.getWorkflow(workflowId) if not workflow: - return None + logger.error(f"Workflow {workflowId} not found for stats update") + return False + + if not self._canModify(ChatWorkflow, workflowId): + logger.error(f"No permission to update workflow {workflowId} stats") + return False - logger.debug(f"Loaded base workflow {workflowId} from database") + # Get current stats from normalized table + currentStats = self.getWorkflowStats(workflowId) + if currentStats: + current_bytes_sent = currentStats.bytesSent or 0 + current_bytes_received = currentStats.bytesReceived or 0 + current_processing_time = currentStats.processingTime or 0 + else: + current_bytes_sent = 0 + current_bytes_received = 0 + current_processing_time = 0 - # Load messages - messages = self.getWorkflowMessages(workflowId) - # Messages are already sorted by publishedAt in getWorkflowMessages + # Calculate processing time as duration since workflow start + if workflow and workflow.startedAt: + try: + start_time = int(float(workflow.startedAt)) + current_time = int(get_utc_timestamp()) + processing_time = current_time - start_time + + # Ensure processing time is reasonable + if processing_time < 0: + processing_time = 0 + elif processing_time > 86400 * 365: # More than 1 year + processing_time = 0 + except Exception as e: + logger.warning(f"Error calculating processing time: {str(e)}") + processing_time = current_processing_time + else: + processing_time = current_processing_time - messageCount = len(messages) - logger.debug(f"Loaded {messageCount} messages for workflow {workflowId}") + # Update stats with incremental values + new_bytes_sent = current_bytes_sent + bytesSent + new_bytes_received = current_bytes_received + bytesReceived + new_token_count = new_bytes_sent + new_bytes_received - # Log document counts for each message - for msg in messages: - docCount = len(msg.documents) if hasattr(msg, 'documents') else 0 - if docCount > 0: - logger.debug(f"Message {msg.id} has {docCount} documents loaded from database") + # Create or update stats record in normalized table + stats_record = { + "workflowId": workflowId, + "processingTime": processing_time, + "tokenCount": new_token_count, + "bytesSent": new_bytes_sent, + "bytesReceived": new_bytes_received, + "successRate": None, + "errorCount": None + } - # Load logs - logs = self.getWorkflowLogs(workflowId) - # Logs are already sorted by timestamp in getWorkflowLogs + # Create new stats record + self.db.recordCreate(ChatStat, stats_record) + + + return True - # Create a new ChatWorkflow object with loaded messages and logs - return ChatWorkflow( - id=workflow.id, - status=workflow.status, - name=workflow.name, - currentRound=workflow.currentRound, - lastActivity=workflow.lastActivity, - startedAt=workflow.startedAt, - logs=logs, - messages=messages, - stats=workflow.stats, - mandateId=workflow.mandateId - ) except Exception as e: - logger.error(f"Error loading workflow state: {str(e)}") - return None + logger.error(f"Error updating workflow stats: {str(e)}") + return False + # Workflow Actions @@ -844,7 +1068,7 @@ class ChatObjects: if workflowId: # Continue existing workflow - load complete state including messages - workflow = self.loadWorkflowState(workflowId) + workflow = self.getWorkflow(workflowId) if not workflow: raise ValueError(f"Workflow {workflowId} not found") @@ -861,7 +1085,7 @@ class ChatObjects: }) # Add log entry for workflow stop - self.createWorkflowLog({ + self.createLog({ "workflowId": workflowId, "message": "Workflow stopped for new prompt", "type": "info", @@ -881,12 +1105,12 @@ class ChatObjects: }) # Reload workflow object to get updated currentRound from database - workflow = self.loadWorkflowState(workflowId) + workflow = self.getWorkflow(workflowId) if not workflow: raise ValueError(f"Failed to reload workflow {workflowId} after update") # Add log entry for workflow resumption - self.createWorkflowLog({ + self.createLog({ "workflowId": workflowId, "message": f"Workflow resumed (round {workflow.currentRound})", "type": "info", @@ -971,7 +1195,7 @@ class ChatObjects: }) # Add log entry - self.createWorkflowLog({ + self.createLog({ "workflowId": workflowId, "message": "Workflow stopped", "type": "warning", diff --git a/modules/interfaces/interfaceComponentAccess.py b/modules/interfaces/interfaceComponentAccess.py index 6db839af..ca5201bb 100644 --- a/modules/interfaces/interfaceComponentAccess.py +++ b/modules/interfaces/interfaceComponentAccess.py @@ -5,7 +5,9 @@ Handles user access management and permission checks. import logging from typing import Dict, Any, List, Optional -from modules.interfaces.interfaceAppModel import User +from modules.interfaces.interfaceAppModel import User, UserInDB +from modules.interfaces.interfaceComponentModel import Prompt, FileItem, FileData +from modules.interfaces.interfaceChatModel import ChatWorkflow, ChatMessage, ChatLog # Configure logger logger = logging.getLogger(__name__) @@ -47,19 +49,20 @@ class ComponentAccess: return True - def uam(self, table: str, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def uam(self, model_class: type, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Unified user access management function that filters data based on user privileges and adds access control attributes. Args: - table: Name of the table + model_class: Pydantic model class for the table recordset: Recordset to filter based on access rules Returns: Filtered recordset with access control attributes """ userPrivilege = self.privilege + table_name = model_class.__name__ filtered_records = [] @@ -73,9 +76,9 @@ class ComponentAccess: filtered_records = [r for r in recordset if r.get("mandateId") == self.mandateId] else: # Regular users # For prompts, users can see all prompts from their mandate - if table == "prompts": + if table_name == "Prompt": filtered_records = [r for r in recordset if r.get("mandateId") == self.mandateId] - elif table == "users": + elif table_name == "UserInDB": # For users table, users can only see their own record filtered_records = [r for r in recordset if r.get("id") == self.userId] else: @@ -90,32 +93,32 @@ class ComponentAccess: record_id = record.get("id") # Set access control flags based on user permissions - if table == "prompts": + if table_name == "Prompt": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("prompts", record_id) - record["_hideDelete"] = not self.canModify("prompts", record_id) + record["_hideEdit"] = not self.canModify(Prompt, record_id) + record["_hideDelete"] = not self.canModify(Prompt, record_id) # Add attribute-level permissions for mandateId if "mandateId" in record: - record["_hideEdit_mandateId"] = not self.canModifyAttribute("prompts", "mandateId") - elif table == "files": + record["_hideEdit_mandateId"] = not self.canModifyAttribute(Prompt, "mandateId") + elif table_name == "FileItem": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("files", record_id) - record["_hideDelete"] = not self.canModify("files", record_id) - record["_hideDownload"] = not self.canModify("files", record_id) - elif table == "workflows": + record["_hideEdit"] = not self.canModify(FileItem, record_id) + record["_hideDelete"] = not self.canModify(FileItem, record_id) + record["_hideDownload"] = not self.canModify(FileItem, record_id) + elif table_name == "ChatWorkflow": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("workflows", record_id) - record["_hideDelete"] = not self.canModify("workflows", record_id) - elif table == "workflowMessages": + record["_hideEdit"] = not self.canModify(ChatWorkflow, record_id) + record["_hideDelete"] = not self.canModify(ChatWorkflow, record_id) + elif table_name == "ChatMessage": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("workflows", record.get("workflowId")) - record["_hideDelete"] = not self.canModify("workflows", record.get("workflowId")) - elif table == "workflowLogs": + record["_hideEdit"] = not self.canModify(ChatWorkflow, record.get("workflowId")) + record["_hideDelete"] = not self.canModify(ChatWorkflow, record.get("workflowId")) + elif table_name == "ChatLog": record["_hideView"] = False # Everyone can view - record["_hideEdit"] = not self.canModify("workflows", record.get("workflowId")) - record["_hideDelete"] = not self.canModify("workflows", record.get("workflowId")) - elif table == "users": + record["_hideEdit"] = not self.canModify(ChatWorkflow, record.get("workflowId")) + record["_hideDelete"] = not self.canModify(ChatWorkflow, record.get("workflowId")) + elif table_name == "UserInDB": # For users table, users can only modify their own connections record["_hideView"] = False record["_hideEdit"] = record_id != self.userId @@ -128,17 +131,17 @@ class ComponentAccess: else: # Default access control for other tables record["_hideView"] = False - record["_hideEdit"] = not self.canModify(table, record_id) - record["_hideDelete"] = not self.canModify(table, record_id) + record["_hideEdit"] = not self.canModify(model_class, record_id) + record["_hideDelete"] = not self.canModify(model_class, record_id) return filtered_records - def canModify(self, table: str, recordId: Optional[int] = None) -> bool: + def canModify(self, model_class: type, recordId: Optional[int] = None) -> bool: """ Checks if the current user can modify (create/update/delete) records in a table. Args: - table: Name of the table + model_class: Pydantic model class for the table recordId: Optional record ID for specific record check Returns: @@ -153,14 +156,14 @@ class ComponentAccess: # For regular users and admins, check specific cases if recordId is not None: # Get the record to check ownership - records: List[Dict[str, Any]] = self.db.getRecordset(table, recordFilter={"id": recordId}) + records: List[Dict[str, Any]] = self.db.getRecordset(model_class, recordFilter={"id": recordId}) if not records: return False record = records[0] # Special case for users table - users can modify their own connections - if table == "users": + if model_class.__name__ == "UserInDB": if record.get("id") == self.userId: return True return False diff --git a/modules/interfaces/interfaceComponentObjects.py b/modules/interfaces/interfaceComponentObjects.py index 59b10ddf..877769f1 100644 --- a/modules/interfaces/interfaceComponentObjects.py +++ b/modules/interfaces/interfaceComponentObjects.py @@ -14,10 +14,10 @@ from modules.interfaces.interfaceComponentAccess import ComponentAccess from modules.interfaces.interfaceComponentModel import ( FilePreview, Prompt, FileItem, FileData ) -from modules.interfaces.interfaceAppModel import User +from modules.interfaces.interfaceAppModel import User, Mandate # DYNAMIC PART: Connectors to the Interface -from modules.connectors.connectorDbJson import DatabaseConnector +from modules.connectors.connectorDbPostgre import DatabaseConnector # Basic Configurations from modules.shared.configuration import APP_CONFIG @@ -87,28 +87,40 @@ class ComponentObjects: # Update database context self.db.updateContext(self.userId) + + def __del__(self): + """Cleanup method to close database connection.""" + if hasattr(self, 'db') and self.db is not None: + try: + self.db.close() + except Exception as e: + logger.error(f"Error closing database connection: {e}") logger.debug(f"User context set: userId={self.userId}") def _initializeDatabase(self): - """Initializes the database connection.""" + """Initializes the database connection directly.""" try: # Get configuration values with defaults dbHost = APP_CONFIG.get("DB_MANAGEMENT_HOST", "_no_config_default_data") dbDatabase = APP_CONFIG.get("DB_MANAGEMENT_DATABASE", "management") dbUser = APP_CONFIG.get("DB_MANAGEMENT_USER") dbPassword = APP_CONFIG.get("DB_MANAGEMENT_PASSWORD_SECRET") + dbPort = int(APP_CONFIG.get("DB_MANAGEMENT_PORT")) - # Ensure the database directory exists - os.makedirs(dbHost, exist_ok=True) - + # Create database connector directly self.db = DatabaseConnector( dbHost=dbHost, dbDatabase=dbDatabase, dbUser=dbUser, - dbPassword=dbPassword + dbPassword=dbPassword, + dbPort=dbPort, + userId=self.userId if hasattr(self, 'userId') else None ) + # Initialize database system + self.db.initDbSystem() + logger.info("Database initialized successfully") except Exception as e: logger.error(f"Failed to initialize database: {str(e)}") @@ -132,7 +144,7 @@ class ComponentObjects: """Initializes standard prompts if they don't exist yet.""" try: # Check if any prompts exist - existingPrompts = self.db.getRecordset("prompts") + existingPrompts = self.db.getRecordset(Prompt) if existingPrompts: logger.info("Prompts already exist, skipping initialization") return @@ -142,7 +154,7 @@ class ComponentObjects: rootInterface = getRootInterface() # Get initial mandate ID through the root interface - mandateId = rootInterface.getInitialId("mandates") + mandateId = rootInterface.getInitialId(Mandate) if not mandateId: logger.error("No initial mandate ID found") return @@ -195,7 +207,7 @@ class ComponentObjects: # Create prompts for prompt in standardPrompts: - self.db.recordCreate("prompts", prompt.to_dict()) + self.db.recordCreate(Prompt, prompt) logger.info(f"Created standard prompt: {prompt.name}") # Restore original user context if it existed @@ -218,10 +230,10 @@ class ComponentObjects: self.access = None self.db.updateContext("") # Reset database context - def _uam(self, table: str, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def _uam(self, model_class: type, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Delegate to access control module.""" # First apply access control - filteredRecords = self.access.uam(table, recordset) + filteredRecords = self.access.uam(model_class, recordset) # Then filter out database-specific fields cleanedRecords = [] @@ -232,19 +244,16 @@ class ComponentObjects: return cleanedRecords - def _canModify(self, table: str, recordId: Optional[str] = None) -> bool: + def _canModify(self, model_class: type, recordId: Optional[str] = None) -> bool: """Delegate to access control module.""" - return self.access.canModify(table, recordId) + return self.access.canModify(model_class, recordId) - def _clearTableCache(self, table: str) -> None: - """Clears the cache for a specific table to ensure fresh data.""" - self.db.clearTableCache(table) # Utilities - def getInitialId(self, table: str) -> Optional[str]: + def getInitialId(self, model_class: type) -> Optional[str]: """Returns the initial ID for a table.""" - return self.db.getInitialId(table) + return self.db.getInitialId(model_class) @@ -253,8 +262,8 @@ class ComponentObjects: def getAllPrompts(self) -> List[Prompt]: """Returns prompts based on user access level.""" try: - allPrompts = self.db.getRecordset("prompts") - filteredPrompts = self._uam("prompts", allPrompts) + allPrompts = self.db.getRecordset(Prompt) + filteredPrompts = self._uam(Prompt, allPrompts) # Convert to Prompt objects return [Prompt.from_dict(prompt) for prompt in filteredPrompts] @@ -265,25 +274,23 @@ class ComponentObjects: def getPrompt(self, promptId: str) -> Optional[Prompt]: """Returns a prompt by ID if user has access.""" - prompts = self.db.getRecordset("prompts", recordFilter={"id": promptId}) + prompts = self.db.getRecordset(Prompt, recordFilter={"id": promptId}) if not prompts: return None - filteredPrompts = self._uam("prompts", prompts) + filteredPrompts = self._uam(Prompt, prompts) return Prompt.from_dict(filteredPrompts[0]) if filteredPrompts else None def createPrompt(self, promptData: Dict[str, Any]) -> Dict[str, Any]: """Creates a new prompt if user has permission.""" - if not self._canModify("prompts"): + if not self._canModify(Prompt): raise PermissionError("No permission to create prompts") - # Create prompt record - createdRecord = self.db.recordCreate("prompts", promptData) + # Create prompt record + createdRecord = self.db.recordCreate(Prompt, promptData) if not createdRecord or not createdRecord.get("id"): raise ValueError("Failed to create prompt record") - # Clear cache to ensure fresh data - self._clearTableCache("prompts") return createdRecord @@ -296,10 +303,9 @@ class ComponentObjects: raise ValueError(f"Prompt {promptId} not found") # Update prompt record directly with the update data - self.db.recordModify("prompts", promptId, updateData) + self.db.recordModify(Prompt, promptId, updateData) # Clear cache to ensure fresh data - self._clearTableCache("prompts") # Get updated prompt updatedPrompt = self.getPrompt(promptId) @@ -319,14 +325,12 @@ class ComponentObjects: if not prompt: return False - if not self._canModify("prompts", promptId): + if not self._canModify(Prompt, promptId): raise PermissionError(f"No permission to delete prompt {promptId}") # Delete prompt - success = self.db.recordDelete("prompts", promptId) + success = self.db.recordDelete(Prompt, promptId) - # Clear cache to ensure fresh data - self._clearTableCache("prompts") return success @@ -337,12 +341,12 @@ class ComponentObjects: If fileName is provided, also checks for exact name+hash match. Only returns files the current user has access to.""" # First get all files with the hash - allFilesWithHash = self.db.getRecordset("files", recordFilter={ + allFilesWithHash = self.db.getRecordset(FileItem, recordFilter={ "fileHash": fileHash }) # Filter by user access using UAM - accessibleFiles = self._uam("files", allFilesWithHash) + accessibleFiles = self._uam(FileItem, allFilesWithHash) if not accessibleFiles: return None @@ -458,8 +462,8 @@ class ComponentObjects: def getAllFiles(self) -> List[FileItem]: """Returns files based on user access level.""" - allFiles = self.db.getRecordset("files") - filteredFiles = self._uam("files", allFiles) + allFiles = self.db.getRecordset(FileItem) + filteredFiles = self._uam(FileItem, allFiles) # Convert database records to FileItem instances fileItems = [] @@ -492,11 +496,11 @@ class ComponentObjects: def getFile(self, fileId: str) -> Optional[FileItem]: """Returns a file by ID if user has access.""" - files = self.db.getRecordset("files", recordFilter={"id": fileId}) + files = self.db.getRecordset(FileItem, recordFilter={"id": fileId}) if not files: return None - filteredFiles = self._uam("files", files) + filteredFiles = self._uam(FileItem, files) if not filteredFiles: return None @@ -524,7 +528,7 @@ class ComponentObjects: def _isfileNameUnique(self, fileName: str, excludeFileId: Optional[str] = None) -> bool: """Checks if a fileName is unique for the current user.""" # Get all files for current user - files = self.db.getRecordset("files", recordFilter={ + files = self.db.getRecordset(FileItem, recordFilter={ "_createdBy": self.currentUser.id }) @@ -556,7 +560,7 @@ class ComponentObjects: def createFile(self, name: str, mimeType: str, content: bytes) -> FileItem: """Creates a new file entry if user has permission. Computes fileHash and fileSize from content.""" import hashlib - if not self._canModify("files"): + if not self._canModify(FileItem): raise PermissionError("No permission to create files") # Ensure fileName is unique @@ -579,10 +583,8 @@ class ComponentObjects: ) # Store in database - self.db.recordCreate("files", fileItem.to_dict()) + self.db.recordCreate(FileItem, fileItem) - # Clear cache to ensure fresh data - self._clearTableCache("files") return fileItem @@ -593,7 +595,7 @@ class ComponentObjects: if not file: raise FileNotFoundError(f"File with ID {fileId} not found") - if not self._canModify("files", fileId): + if not self._canModify(FileItem, fileId): raise PermissionError(f"No permission to update file {fileId}") # If fileName is being updated, ensure it's unique @@ -601,10 +603,8 @@ class ComponentObjects: updateData["fileName"] = self._generateUniquefileName(updateData["fileName"], fileId) # Update file - success = self.db.recordModify("files", fileId, updateData) + success = self.db.recordModify(FileItem, fileId, updateData) - # Clear cache to ensure fresh data - self._clearTableCache("files") return success @@ -617,30 +617,29 @@ class ComponentObjects: if not file: raise FileNotFoundError(f"File with ID {fileId} not found") - if not self._canModify("files", fileId): + if not self._canModify(FileItem, fileId): raise PermissionError(f"No permission to delete file {fileId}") # Check for other references to this file (by hash) fileHash = file.fileHash if fileHash: - otherReferences = [f for f in self.db.getRecordset("files", recordFilter={"fileHash": fileHash}) + otherReferences = [f for f in self.db.getRecordset(FileItem, recordFilter={"fileHash": fileHash}) if f["id"] != fileId] # Only delete associated fileData if no other references exist if not otherReferences: try: - fileDataEntries = self.db.getRecordset("fileData", recordFilter={"id": fileId}) + fileDataEntries = self.db.getRecordset(FileData, recordFilter={"id": fileId}) if fileDataEntries: - self.db.recordDelete("fileData", fileId) + self.db.recordDelete(FileData, fileId) logger.debug(f"FileData for file {fileId} deleted") except Exception as e: logger.warning(f"Error deleting FileData for file {fileId}: {str(e)}") # Delete the FileItem entry - success = self.db.recordDelete("files", fileId) + success = self.db.recordDelete(FileItem, fileId) # Clear cache to ensure fresh data - self._clearTableCache("files") return success @@ -699,10 +698,9 @@ class ComponentObjects: "base64Encoded": base64Encoded } - self.db.recordCreate("fileData", fileDataObj) + self.db.recordCreate(FileData, fileDataObj) # Clear cache to ensure fresh data - self._clearTableCache("fileData") logger.debug(f"Successfully stored data for file {fileId} (base64Encoded: {base64Encoded})") return True @@ -720,7 +718,7 @@ class ComponentObjects: import base64 - fileDataEntries = self.db.getRecordset("fileData", recordFilter={"id": fileId}) + fileDataEntries = self.db.getRecordset(FileData, recordFilter={"id": fileId}) if not fileDataEntries: logger.warning(f"No data found for file ID {fileId}") return None @@ -820,7 +818,7 @@ class ComponentObjects: """Saves an uploaded file if user has permission.""" try: # Check file creation permission - if not self._canModify("files"): + if not self._canModify(FileItem): raise PermissionError("No permission to upload files") logger.debug(f"Starting upload process for file: {fileName}") diff --git a/modules/interfaces/interfaceTicketModel.py b/modules/interfaces/interfaceTicketModel.py new file mode 100644 index 00000000..98329a7b --- /dev/null +++ b/modules/interfaces/interfaceTicketModel.py @@ -0,0 +1,26 @@ +"""Base class for ticket classes.""" + +from typing import Any, Dict +from pydantic import BaseModel, Field +from abc import ABC, abstractmethod + + +class TicketFieldAttribute(BaseModel): + field_name: str = Field(description="Human-readable field name") + field: str = Field(description="JIRA field ID/key") + + +class Task(BaseModel): + # A very flexible approach for now. Might want to be more strict in the future. + data: Dict[str, Any] = Field(default_factory=dict, description="Task data") + + +class TicketBase(ABC): + @abstractmethod + async def read_attributes(self) -> list[TicketFieldAttribute]: ... + + @abstractmethod + async def read_tasks(self, limit: int = 0) -> list[Task]: ... + + @abstractmethod + async def write_tasks(self, tasklist: list[Task]) -> None: ... diff --git a/modules/interfaces/interfaceTicketObjects.py b/modules/interfaces/interfaceTicketObjects.py new file mode 100644 index 00000000..991c9da0 --- /dev/null +++ b/modules/interfaces/interfaceTicketObjects.py @@ -0,0 +1,658 @@ +from dataclasses import dataclass +from io import BytesIO, StringIO +from typing import Any +import pandas as pd +from modules.shared.timezoneUtils import get_utc_now + +from modules.connectors.connectorSharepoint import ConnectorSharepoint + +from modules.interfaces.interfaceTicketModel import TicketBase, Task + + +@dataclass(slots=True) +class TicketSharepointSyncInterface: + connector_ticket: TicketBase + connector_sharepoint: ConnectorSharepoint + task_sync_definition: dict + sync_folder: str + sync_file: str + backup_folder: str + audit_folder: str + site_id: str # Keep for compatibility but not used with REST API + + @classmethod + async def create( + cls, + connector_ticket: TicketBase, + connector_sharepoint: ConnectorSharepoint, + task_sync_definition: dict, + sync_folder: str, + sync_file: str, + backup_folder: str, + audit_folder: str, + site_id: str, + ) -> "TicketSharepointSyncInterface": + return cls( + connector_ticket=connector_ticket, + connector_sharepoint=connector_sharepoint, + task_sync_definition=task_sync_definition, + sync_folder=sync_folder, + sync_file=sync_file, + backup_folder=backup_folder, + audit_folder=audit_folder, + site_id=site_id, + ) + + async def create_backup(self): + """Creates a backup of the current sync file in the backup folder.""" + timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S") + backup_filename = f"backup_{timestamp}_{self.sync_file}" + + await self.connector_sharepoint.copy_file_async( + site_id=self.site_id, + source_folder=self.sync_folder, + source_file=self.sync_file, + dest_folder=self.backup_folder, + dest_file=backup_filename, + ) + + async def sync_from_jira_to_csv(self): + """Syncs tasks from JIRA to a CSV file in SharePoint.""" + start_time = get_utc_now() + audit_log = [] + + audit_log.append("=== JIRA TO CSV SYNC STARTED ===") + audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}") + audit_log.append(f"Sync File: {self.sync_file}") + audit_log.append(f"Sync Folder: {self.sync_folder}") + audit_log.append("") + + try: + # 1. Read JIRA tickets + audit_log.append("Step 1: Reading JIRA tickets...") + tickets = await self.connector_ticket.read_tasks(limit=0) + audit_log.append(f"JIRA issues read: {len(tickets)}") + audit_log.append("") + + # 2. Transform tasks according to task_sync_definition + audit_log.append("Step 2: Transforming JIRA data...") + transformed_tasks = self._transform_tasks(tickets, include_put=True) + jira_data = [task.data for task in transformed_tasks] + audit_log.append(f"JIRA issues transformed: {len(jira_data)}") + audit_log.append("") + + # 3. Create JIRA export file in audit folder + audit_log.append("Step 3: Creating JIRA export file...") + try: + timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S") + jira_export_filename = f"jira_export_{timestamp}.csv" + # Use default headers for JIRA export + jira_export_content = self._create_csv_content(jira_data, {"header1": "JIRA Export", "header2": "Raw Data"}) + await self.connector_sharepoint.upload_file( + site_id=self.site_id, + folder_path=self.audit_folder, + file_name=jira_export_filename, + content=jira_export_content, + ) + audit_log.append(f"JIRA export file created: {jira_export_filename}") + except Exception as e: + audit_log.append(f"Failed to create JIRA export file: {str(e)}") + audit_log.append("") + + # 4. Create backup of existing sync file (if it exists) + audit_log.append("Step 4: Creating backup...") + backup_created = False + try: + await self.create_backup() + backup_created = True + audit_log.append("Backup created successfully") + except Exception as e: + audit_log.append( + f"Backup creation failed (file might not exist): {str(e)}" + ) + audit_log.append("") + + # 5. Try to read existing CSV file from SharePoint + audit_log.append("Step 5: Reading existing CSV file...") + existing_data = [] + existing_file_found = False + existing_headers = {"header1": "", "header2": ""} + try: + file_path = f"{self.sync_folder}/{self.sync_file}" + csv_content = await self.connector_sharepoint.download_file_by_path( + site_id=self.site_id, file_path=file_path + ) + + # Read the first two lines to get headers + csv_lines = csv_content.decode('utf-8').split('\n') + if len(csv_lines) >= 2: + # Store the raw first two lines as headers (preserving original formatting) + existing_headers["header1"] = csv_lines[0].rstrip('\r\n') + existing_headers["header2"] = csv_lines[1].rstrip('\r\n') + + # Try to read with robust CSV parsing (skip first 2 rows) + df_existing = pd.read_csv( + BytesIO(csv_content), + skiprows=2, + quoting=1, # QUOTE_ALL + escapechar='\\', + on_bad_lines='skip', # Skip malformed lines + engine='python' # More robust parsing + ) + existing_data = df_existing.to_dict("records") + existing_file_found = True + audit_log.append( + f"Existing CSV file found with {len(existing_data)} records" + ) + audit_log.append(f"Preserved headers: Header1='{existing_headers['header1']}', Header2='{existing_headers['header2']}'") + except Exception as e: + audit_log.append(f"No existing CSV file found or read error: {str(e)}") + audit_log.append("") + + # 6. Merge JIRA data with existing data and track changes + audit_log.append("Step 6: Merging JIRA data with existing data...") + merged_data, change_details = self._merge_jira_with_existing_detailed( + jira_data, existing_data + ) + + # Log detailed changes + audit_log.append(f"Total records after merge: {len(merged_data)}") + audit_log.append(f"Records updated: {change_details['updated']}") + audit_log.append(f"Records added: {change_details['added']}") + audit_log.append(f"Records unchanged: {change_details['unchanged']}") + audit_log.append("") + + # Log individual changes + if change_details["changes"]: + audit_log.append("DETAILED CHANGES:") + for change in change_details["changes"]: + audit_log.append(f"- {change}") + audit_log.append("") + + # 7. Create CSV with 4-row structure and write to SharePoint + audit_log.append("Step 7: Writing updated CSV to SharePoint...") + csv_content = self._create_csv_content(merged_data, existing_headers) + await self.connector_sharepoint.upload_file( + site_id=self.site_id, + folder_path=self.sync_folder, + file_name=self.sync_file, + content=csv_content, + ) + audit_log.append("CSV file successfully written to SharePoint") + audit_log.append("") + + # Success summary + end_time = get_utc_now() + duration = (end_time - start_time).total_seconds() + audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===") + audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") + audit_log.append(f"Duration: {duration:.2f} seconds") + audit_log.append(f"Total JIRA issues processed: {len(jira_data)}") + audit_log.append(f"Total records in final CSV: {len(merged_data)}") + + except Exception as e: + # Error handling + end_time = get_utc_now() + duration = (end_time - start_time).total_seconds() + audit_log.append("") + audit_log.append("=== SYNC FAILED ===") + audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") + audit_log.append(f"Duration before failure: {duration:.2f} seconds") + audit_log.append(f"Error: {str(e)}") + raise + finally: + # Write audit log to SharePoint + await self._write_audit_log(audit_log, "jira_to_csv") + + async def sync_from_csv_to_jira(self): + """Syncs tasks from a CSV file in SharePoint to JIRA.""" + start_time = get_utc_now() + audit_log = [] + + audit_log.append("=== CSV TO JIRA SYNC STARTED ===") + audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}") + audit_log.append(f"Sync File: {self.sync_file}") + audit_log.append(f"Sync Folder: {self.sync_folder}") + audit_log.append("") + + try: + # 1. Read CSV file from SharePoint + audit_log.append("Step 1: Reading CSV file from SharePoint...") + try: + file_path = f"{self.sync_folder}/{self.sync_file}" + csv_content = await self.connector_sharepoint.download_file_by_path( + site_id=self.site_id, file_path=file_path + ) + # Try to read with robust CSV parsing + df = pd.read_csv( + BytesIO(csv_content), + skiprows=2, + quoting=1, # QUOTE_ALL + escapechar='\\', + on_bad_lines='skip', # Skip malformed lines + engine='python' # More robust parsing + ) + csv_data = df.to_dict("records") + audit_log.append( + f"CSV file read successfully with {len(csv_data)} records" + ) + except Exception as e: + audit_log.append(f"Failed to read CSV file: {str(e)}") + audit_log.append("CSV to JIRA sync aborted - no file to process") + return + audit_log.append("") + + # 2. Read current JIRA data for comparison + audit_log.append("Step 2: Reading current JIRA data for comparison...") + try: + current_jira_tasks = await self.connector_ticket.read_tasks(limit=0) + current_jira_data = self._transform_tasks( + current_jira_tasks, include_put=True + ) + jira_lookup = { + task.data.get("ID"): task.data for task in current_jira_data + } + audit_log.append(f"Current JIRA data read: {len(jira_lookup)} tasks") + except Exception as e: + audit_log.append(f"Failed to read current JIRA data: {str(e)}") + raise + audit_log.append("") + + # 3. Detect actual changes in "put" fields + audit_log.append("Step 3: Detecting changes in 'put' fields...") + actual_changes = {} + records_with_changes = 0 + total_changes = 0 + + for row in csv_data: + task_id = row.get("ID") + if not task_id or task_id not in jira_lookup: + continue + + current_jira_task = jira_lookup[task_id] + task_changes = {} + + for field_name, field_config in self.task_sync_definition.items(): + if field_config[0] == "put": # Only process "put" fields + csv_value = row.get(field_name, "") + jira_value = current_jira_task.get(field_name, "") + + # Convert None to empty string for comparison + csv_value = "" if csv_value is None else str(csv_value).strip() + jira_value = ( + "" if jira_value is None else str(jira_value).strip() + ) + + # Include if values are different (allow empty strings to clear fields like the reference does) + if csv_value != jira_value: + task_changes[field_name] = csv_value + + if task_changes: + actual_changes[task_id] = task_changes + records_with_changes += 1 + total_changes += len(task_changes) + + audit_log.append(f"Records with actual changes: {records_with_changes}") + audit_log.append(f"Total field changes detected: {total_changes}") + audit_log.append("") + + # Log detailed changes + if actual_changes: + audit_log.append("DETAILED CHANGES TO APPLY TO JIRA:") + for task_id, changes in actual_changes.items(): + change_list = [ + f"{field}: '{value}'" for field, value in changes.items() + ] + audit_log.append(f"- Task ID {task_id}: {', '.join(change_list)}") + audit_log.append("") + + # 4. Update JIRA tasks with actual changes + if actual_changes: + audit_log.append("Step 4: Updating JIRA tasks...") + + # Convert to Task objects for the connector + tasks_to_update = [] + for task_id, changes in actual_changes.items(): + # Create task data structure expected by JIRA connector + # Build the nested fields structure that JIRA expects + fields = {} + for field_name, new_value in changes.items(): + # Map back to JIRA field structure using task_sync_definition + field_config = self.task_sync_definition[field_name] + field_path = field_config[1] + + # Extract the JIRA field ID from the path + # For "put" fields, the path is like ['fields', 'customfield_10067'] + if len(field_path) >= 2 and field_path[0] == "fields": + jira_field_id = field_path[1] + fields[jira_field_id] = new_value + + if fields: + task_data = {"ID": task_id, "fields": fields} + task = Task(data=task_data) + tasks_to_update.append(task) + + # Write tasks back to JIRA + try: + await self.connector_ticket.write_tasks(tasks_to_update) + audit_log.append( + f"Successfully updated {len(tasks_to_update)} JIRA tasks" + ) + except Exception as e: + audit_log.append(f"Failed to update JIRA tasks: {str(e)}") + raise + else: + audit_log.append("Step 4: No changes to apply to JIRA") + audit_log.append("") + + # Success summary + end_time = get_utc_now() + duration = (end_time - start_time).total_seconds() + audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===") + audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") + audit_log.append(f"Duration: {duration:.2f} seconds") + audit_log.append(f"Total CSV records processed: {len(csv_data)}") + audit_log.append(f"Records with actual changes: {records_with_changes}") + audit_log.append(f"JIRA tasks updated: {len(actual_changes)}") + + except Exception as e: + # Error handling + end_time = get_utc_now() + duration = (end_time - start_time).total_seconds() + audit_log.append("") + audit_log.append("=== SYNC FAILED ===") + audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") + audit_log.append(f"Duration before failure: {duration:.2f} seconds") + audit_log.append(f"Error: {str(e)}") + raise + finally: + # Write audit log to SharePoint + await self._write_audit_log(audit_log, "csv_to_jira") + + def _transform_tasks( + self, tasks: list[Task], include_put: bool = False + ) -> list[Task]: + """Transforms tasks according to the task_sync_definition.""" + transformed_tasks = [] + + for task in tasks: + transformed_data = {} + + # Process each field in the sync definition + for field_name, field_config in self.task_sync_definition.items(): + direction = field_config[0] # "get" or "put" + field_path = field_config[1] # List of keys to navigate + + # Get the right fields + if direction == "get" or include_put: + # Extract value using the field path + value = self._extract_field_value(task.data, field_path) + transformed_data[field_name] = value + + # Create new Task with transformed data + transformed_task = Task(data=transformed_data) + transformed_tasks.append(transformed_task) + + return transformed_tasks + + def _extract_field_value(self, issue_data: dict, field_path: list[str]) -> Any: + """Extract field value from JIRA issue data using field path.""" + value = issue_data + try: + for key in field_path: + if value is not None: + value = value[key] + + if value is None: + return None + + # Handle complex objects that have a 'value' field (like custom field options) + if isinstance(value, dict) and "value" in value: + value = value["value"] + # Handle lists of objects with 'value' fields + elif ( + isinstance(value, list) + and len(value) > 0 + and isinstance(value[0], dict) + and "value" in value[0] + ): + value = value[0]["value"] + + return value + except (KeyError, TypeError): + return None + + def _merge_jira_with_existing( + self, jira_data: list[dict], existing_data: list[dict] + ) -> list[dict]: + """Merge JIRA data with existing CSV data, updating only 'get' fields.""" + # Create a lookup for existing data by ID + existing_lookup = {row.get("ID"): row for row in existing_data if row.get("ID")} + + merged_data = [] + for jira_row in jira_data: + jira_id = jira_row.get("ID") + if jira_id and jira_id in existing_lookup: + # Update existing row with JIRA data (only 'get' fields) + existing_row = existing_lookup[jira_id].copy() + for field_name, field_config in self.task_sync_definition.items(): + if field_config[0] == "get": # Only update 'get' fields + existing_row[field_name] = jira_row.get(field_name) + merged_data.append(existing_row) + # Remove from lookup to track processed items + del existing_lookup[jira_id] + else: + # New row from JIRA + merged_data.append(jira_row) + + # Add any remaining existing rows that weren't in JIRA data + merged_data.extend(existing_lookup.values()) + + return merged_data + + def _merge_jira_with_existing_detailed( + self, jira_data: list[dict], existing_data: list[dict] + ) -> tuple[list[dict], dict]: + """Merge JIRA data with existing CSV data and track detailed changes.""" + # Create a lookup for existing data by ID + existing_lookup = {row.get("ID"): row for row in existing_data if row.get("ID")} + + merged_data = [] + changes = [] + updated_count = 0 + added_count = 0 + unchanged_count = 0 + + for jira_row in jira_data: + jira_id = jira_row.get("ID") + if jira_id and jira_id in existing_lookup: + # Update existing row with JIRA data (only 'get' fields) + existing_row = existing_lookup[jira_id].copy() + row_changes = [] + + for field_name, field_config in self.task_sync_definition.items(): + if field_config[0] == "get": # Only update 'get' fields + old_value = existing_row.get(field_name, "") + new_value = jira_row.get(field_name, "") + + # Convert None to empty string for comparison + old_value = "" if old_value is None else str(old_value) + new_value = "" if new_value is None else str(new_value) + + if old_value != new_value: + row_changes.append( + f"{field_name}: '{old_value}' → '{new_value}'" + ) + + existing_row[field_name] = jira_row.get(field_name) + + merged_data.append(existing_row) + + if row_changes: + updated_count += 1 + changes.append( + f"Row ID {jira_id} updated: {', '.join(row_changes)}" + ) + else: + unchanged_count += 1 + + # Remove from lookup to track processed items + del existing_lookup[jira_id] + else: + # New row from JIRA + merged_data.append(jira_row) + added_count += 1 + changes.append(f"Row ID {jira_id} added as new record") + + # Add any remaining existing rows that weren't in JIRA data + for remaining_row in existing_lookup.values(): + merged_data.append(remaining_row) + unchanged_count += 1 + + change_details = { + "updated": updated_count, + "added": added_count, + "unchanged": unchanged_count, + "changes": changes, + } + + return merged_data, change_details + + async def _write_audit_log(self, audit_log: list[str], operation_type: str): + """Write audit log to SharePoint.""" + try: + timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S") + audit_filename = f"audit_{operation_type}_{timestamp}.log" + + # Convert audit log to bytes + audit_content = "\n".join(audit_log).encode("utf-8") + + # Debug logging + import logging + logger = logging.getLogger(__name__) + logger.debug(f"Writing audit log to folder: {self.audit_folder}, file: {audit_filename}") + + # Write to SharePoint + await self.connector_sharepoint.upload_file( + site_id=self.site_id, + folder_path=self.audit_folder, + file_name=audit_filename, + content=audit_content, + ) + logger.debug("Audit log written successfully") + except Exception as e: + # If audit logging fails, we don't want to break the main sync process + # Just log the error (this could be enhanced with fallback logging) + import logging + logger = logging.getLogger(__name__) + logger.warning(f"Failed to write audit log: {str(e)}") + logger.warning(f"Audit folder: {self.audit_folder}") + logger.warning(f"Operation type: {operation_type}") + import traceback + logger.warning(f"Traceback: {traceback.format_exc()}") + + def _create_csv_content(self, data: list[dict], existing_headers: dict = None) -> bytes: + """Create CSV content with 4-row structure matching reference code.""" + # Get current timestamp for header + timestamp = get_utc_now().strftime("%Y-%m-%d %H:%M:%S UTC") + + # Use existing headers if provided, otherwise use defaults + if existing_headers is None: + existing_headers = {"header1": "Header 1", "header2": "Header 2"} + + if not data: + # Build an empty table with the expected columns from schema + cols = list(self.task_sync_definition.keys()) + + df = pd.DataFrame(columns=cols) + + # Parse existing headers to extract individual columns + import csv as csv_module + header1_text = existing_headers.get("header1", "Header 1") + header2_text = existing_headers.get("header2", "Header 2") + + # Parse the existing header rows + header1_reader = csv_module.reader([header1_text]) + header2_reader = csv_module.reader([header2_text]) + header1_row = next(header1_reader, []) + header2_row = next(header2_reader, []) + + # Row 1: Use existing header1 or default + if len(header1_row) >= len(cols): + header_row1_data = header1_row[:len(cols)] + else: + header_row1_data = header1_row + [""] * (len(cols) - len(header1_row)) + header_row1 = pd.DataFrame([header_row1_data], columns=cols) + + # Row 2: Use existing header2 and add timestamp to second column + if len(header2_row) >= len(cols): + header_row2_data = header2_row[:len(cols)] + else: + header_row2_data = header2_row + [""] * (len(cols) - len(header2_row)) + if len(header_row2_data) > 1: + header_row2_data[1] = timestamp + header_row2 = pd.DataFrame([header_row2_data], columns=cols) + + # Row 3: table headers + table_headers = pd.DataFrame([cols], columns=cols) + + final_df = pd.concat( + [header_row1, header_row2, table_headers, df], ignore_index=True + ) + csv_text = StringIO() + final_df.to_csv(csv_text, index=False, header=False, quoting=1, escapechar='\\') + return csv_text.getvalue().encode("utf-8") + + # Create DataFrame from data + df = pd.DataFrame(data) + + # Force all columns to be object (string) type to preserve empty cells + for column in df.columns: + df[column] = df[column].astype("object") + df[column] = df[column].fillna("") + + # Clean data: replace actual line breaks with \n and escape quotes + for column in df.columns: + df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False) + df[column] = df[column].str.replace('"', '""', regex=False) + + # Create the 4-row structure + # Parse existing headers to extract individual columns + import csv as csv_module + header1_text = existing_headers.get("header1", "Header 1") + header2_text = existing_headers.get("header2", "Header 2") + + # Parse the existing header rows + header1_reader = csv_module.reader([header1_text]) + header2_reader = csv_module.reader([header2_text]) + header1_row = next(header1_reader, []) + header2_row = next(header2_reader, []) + + # Row 1: Use existing header1 or default + if len(header1_row) >= len(df.columns): + header_row1_data = header1_row[:len(df.columns)] + else: + header_row1_data = header1_row + [""] * (len(df.columns) - len(header1_row)) + header_row1 = pd.DataFrame([header_row1_data], columns=df.columns) + + # Row 2: Use existing header2 and add timestamp to second column + if len(header2_row) >= len(df.columns): + header_row2_data = header2_row[:len(df.columns)] + else: + header_row2_data = header2_row + [""] * (len(df.columns) - len(header2_row)) + if len(header_row2_data) > 1: + header_row2_data[1] = timestamp + header_row2 = pd.DataFrame([header_row2_data], columns=df.columns) + + # Row 3: Table headers (column names) + table_headers = pd.DataFrame([df.columns.tolist()], columns=df.columns) + + # Concatenate all rows: header1 + header2 + table_headers + data + final_df = pd.concat( + [header_row1, header_row2, table_headers, df], ignore_index=True + ) + + # Convert to CSV bytes with proper quoting for fields containing special characters + csv_text = StringIO() + final_df.to_csv(csv_text, index=False, header=False, quoting=1, escapechar='\\') + return csv_text.getvalue().encode("utf-8") diff --git a/modules/routes/routeDataConnections.py b/modules/routes/routeDataConnections.py index cf861e85..01452206 100644 --- a/modules/routes/routeDataConnections.py +++ b/modules/routes/routeDataConnections.py @@ -39,7 +39,7 @@ def get_token_status_for_connection(interface, connection_id: str) -> tuple[str, try: # Query tokens table for the latest token for this connection tokens = interface.db.getRecordset( - table="tokens", + Token, recordFilter={"connectionId": connection_id} ) @@ -93,9 +93,6 @@ async def get_connections( try: interface = getInterface(currentUser) - # Clear connections cache to ensure fresh data - interface.db.clearTableCache("connections") - # SECURITY FIX: All users (including admins) can only see their own connections # This prevents admin from seeing other users' connections and causing confusion connections = interface.getUserConnections(currentUser.id) @@ -179,10 +176,8 @@ async def create_connection( ) # Save connection record - models now handle timestamp serialization automatically - interface.db.recordModify("connections", connection.id, connection.to_dict()) + interface.db.recordModify(UserConnection, connection.id, connection.to_dict()) - # Clear cache to ensure fresh data - interface.db.clearTableCache("connections") return connection @@ -235,10 +230,8 @@ async def update_connection( connection.lastChecked = get_utc_timestamp() # Update connection - models now handle timestamp serialization automatically - interface.db.recordModify("connections", connectionId, connection.to_dict()) + interface.db.recordModify(UserConnection, connectionId, connection.to_dict()) - # Clear cache to ensure fresh data - interface.db.clearTableCache("connections") # Get token status for the updated connection token_status, token_expires_at = get_token_status_for_connection(interface, connectionId) @@ -372,10 +365,8 @@ async def disconnect_service( connection.lastChecked = get_utc_timestamp() # Update connection record - models now handle timestamp serialization automatically - interface.db.recordModify("connections", connectionId, connection.to_dict()) + interface.db.recordModify(UserConnection, connectionId, connection.to_dict()) - # Clear cache to ensure fresh data - interface.db.clearTableCache("connections") return {"message": "Service disconnected successfully"} diff --git a/modules/routes/routeSecurityGoogle.py b/modules/routes/routeSecurityGoogle.py index d3921b62..944f7c0f 100644 --- a/modules/routes/routeSecurityGoogle.py +++ b/modules/routes/routeSecurityGoogle.py @@ -173,7 +173,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse rootInterface = getRootInterface() # Prefer connection flow reuse; fallback to user access token if connection_id: - existing_tokens = rootInterface.db.getRecordset("tokens", recordFilter={ + from modules.interfaces.interfaceAppModel import Token + existing_tokens = rootInterface.db.getRecordset(Token, recordFilter={ "connectionId": connection_id, "authority": AuthAuthority.GOOGLE }) @@ -182,7 +183,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse existing_tokens.sort(key=lambda x: x.get("createdAt", 0), reverse=True) token_response["refresh_token"] = existing_tokens[0].get("tokenRefresh", "") if not token_response.get("refresh_token") and user_id: - existing_access_tokens = rootInterface.db.getRecordset("tokens", recordFilter={ + existing_access_tokens = rootInterface.db.getRecordset(Token, recordFilter={ "userId": user_id, "connectionId": None, "authority": AuthAuthority.GOOGLE @@ -358,10 +359,9 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse connection.externalEmail = user_info.get("email") # Update connection record directly - rootInterface.db.recordModify("connections", connection_id, connection.to_dict()) + from modules.interfaces.interfaceAppModel import UserConnection + rootInterface.db.recordModify(UserConnection, connection_id, connection.to_dict()) - # Clear cache to ensure fresh data - rootInterface.db.clearTableCache("connections") # Save token token = Token( @@ -543,7 +543,7 @@ async def refresh_token( google_connection.status = ConnectionStatus.ACTIVE # Save updated connection - appInterface.db.recordModify("connections", google_connection.id, google_connection.to_dict()) + appInterface.db.recordModify(UserConnection, google_connection.id, google_connection.to_dict()) # Calculate time until expiration current_time = get_utc_timestamp() diff --git a/modules/routes/routeSecurityLocal.py b/modules/routes/routeSecurityLocal.py index 89450a9d..03bdb566 100644 --- a/modules/routes/routeSecurityLocal.py +++ b/modules/routes/routeSecurityLocal.py @@ -52,7 +52,8 @@ async def login( rootInterface = getRootInterface() # Get default mandate ID - defaultMandateId = rootInterface.getInitialId("mandates") + from modules.interfaces.interfaceAppModel import Mandate + defaultMandateId = rootInterface.getInitialId(Mandate) if not defaultMandateId: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -146,7 +147,8 @@ async def register_user( appInterface = getRootInterface() # Get default mandate ID - defaultMandateId = appInterface.getInitialId("mandates") + from modules.interfaces.interfaceAppModel import Mandate + defaultMandateId = appInterface.getInitialId(Mandate) if not defaultMandateId: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, diff --git a/modules/routes/routeSecurityMsft.py b/modules/routes/routeSecurityMsft.py index 5480586d..1d4d8f10 100644 --- a/modules/routes/routeSecurityMsft.py +++ b/modules/routes/routeSecurityMsft.py @@ -309,10 +309,8 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse connection.externalEmail = user_info.get("mail") # Update connection record directly - rootInterface.db.recordModify("connections", connection_id, connection.to_dict()) + rootInterface.db.recordModify(UserConnection, connection_id, connection.to_dict()) - # Clear cache to ensure fresh data - rootInterface.db.clearTableCache("connections") # Save token @@ -524,7 +522,7 @@ async def refresh_token( msft_connection.status = ConnectionStatus.ACTIVE # Save updated connection - appInterface.db.recordModify("connections", msft_connection.id, msft_connection.to_dict()) + appInterface.db.recordModify(UserConnection, msft_connection.id, msft_connection.to_dict()) # Calculate time until expiration current_time = get_utc_timestamp() diff --git a/modules/routes/routeWorkflows.py b/modules/routes/routeWorkflows.py index 565052ce..fe70e347 100644 --- a/modules/routes/routeWorkflows.py +++ b/modules/routes/routeWorkflows.py @@ -57,31 +57,18 @@ async def get_workflows( """Get all workflows for the current user.""" try: appInterface = getInterface(currentUser) - workflows_data = appInterface.getAllWorkflows() + workflows_data = appInterface.getWorkflows() - # Convert raw dictionaries to ChatWorkflow objects + # Convert raw dictionaries to ChatWorkflow objects by loading each workflow properly workflows = [] for workflow_data in workflows_data: try: - workflow = ChatWorkflow( - id=workflow_data["id"], - status=workflow_data.get("status", "running"), - name=workflow_data.get("name"), - currentRound=workflow_data.get("currentRound", 0), # Default value - currentTask=workflow_data.get("currentTask", 0), - currentAction=workflow_data.get("currentAction", 0), - totalTasks=workflow_data.get("totalTasks", 0), - totalActions=workflow_data.get("totalActions", 0), - lastActivity=workflow_data.get("lastActivity", get_utc_timestamp()), - startedAt=workflow_data.get("startedAt", get_utc_timestamp()), - logs=[ChatLog(**log) for log in workflow_data.get("logs", [])], - messages=[ChatMessage(**msg) for msg in workflow_data.get("messages", [])], - stats=ChatStat(**workflow_data.get("stats", {})) if workflow_data.get("stats") else None, - mandateId=workflow_data.get("mandateId", currentUser.mandateId or "") - ) - workflows.append(workflow) + # Load the workflow properly using the same method as individual workflow endpoint + workflow = appInterface.getWorkflow(workflow_data["id"]) + if workflow: + workflows.append(workflow) except Exception as e: - logger.warning(f"Error converting workflow data to ChatWorkflow object: {str(e)}") + logger.warning(f"Error loading workflow {workflow_data.get('id', 'unknown')}: {str(e)}") # Skip invalid workflows instead of failing the entire request continue @@ -136,7 +123,7 @@ async def update_workflow( workflowInterface = getInterface(currentUser) # Get raw workflow data from database to check permissions - workflows = workflowInterface.db.getRecordset("workflows", recordFilter={"id": workflowId}) + workflows = workflowInterface.db.getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) if not workflows: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, @@ -225,7 +212,7 @@ async def get_workflow_logs( ) # Get all logs - allLogs = interfaceChat.getWorkflowLogs(workflowId) + allLogs = interfaceChat.getLogs(workflowId) # Apply selective data transfer if logId is provided if logId: @@ -268,7 +255,7 @@ async def get_workflow_messages( ) # Get all messages - allMessages = interfaceChat.getWorkflowMessages(workflowId) + allMessages = interfaceChat.getMessages(workflowId) # Apply selective data transfer if messageId is provided if messageId: @@ -276,7 +263,8 @@ async def get_workflow_messages( messageIndex = next((i for i, msg in enumerate(allMessages) if msg.id == messageId), -1) if messageIndex >= 0: # Return only messages after the specified message - return allMessages[messageIndex + 1:] + filteredMessages = allMessages[messageIndex + 1:] + return filteredMessages return allMessages except HTTPException: @@ -356,7 +344,7 @@ async def delete_workflow( interfaceChat = getServiceChat(currentUser) # Get raw workflow data from database to check permissions - workflows = interfaceChat.db.getRecordset("workflows", recordFilter={"id": workflowId}) + workflows = interfaceChat.db.getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) if not workflows: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, @@ -395,6 +383,45 @@ async def delete_workflow( ) +# Unified Chat Data Endpoint for Polling +@router.get("/{workflowId}/chatData") +@limiter.limit("120/minute") +async def get_workflow_chat_data( + request: Request, + workflowId: str = Path(..., description="ID of the workflow"), + afterTimestamp: Optional[float] = Query(None, description="Unix timestamp to get data after"), + currentUser: User = Depends(getCurrentUser) +) -> Dict[str, Any]: + """ + Get unified chat data (messages, logs, stats) for a workflow with timestamp-based selective data transfer. + Returns all data types in chronological order based on _createdAt timestamp. + """ + try: + # Get service center + interfaceChat = getServiceChat(currentUser) + + # Verify workflow exists + workflow = interfaceChat.getWorkflow(workflowId) + if not workflow: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Workflow with ID {workflowId} not found" + ) + + # Get unified chat data using the new method + chatData = interfaceChat.getUnifiedChatData(workflowId, afterTimestamp) + + return chatData + + except HTTPException: + raise + except Exception as e: + logger.error(f"Error getting unified chat data: {str(e)}", exc_info=True) + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=f"Error getting unified chat data: {str(e)}" + ) + # Document Management Endpoints @router.delete("/{workflowId}/messages/{messageId}", response_model=Dict[str, Any]) @@ -419,7 +446,7 @@ async def delete_workflow_message( ) # Delete the message - success = interfaceChat.deleteWorkflowMessage(workflowId, messageId) + success = interfaceChat.deleteMessage(workflowId, messageId) if not success: raise HTTPException( diff --git a/modules/shared/timezoneUtils.py b/modules/shared/timezoneUtils.py index a9d2260d..93011060 100644 --- a/modules/shared/timezoneUtils.py +++ b/modules/shared/timezoneUtils.py @@ -5,6 +5,7 @@ Ensures all timestamps are properly handled as UTC. from datetime import datetime, timezone, timedelta from typing import Union, Optional +import time def get_utc_now() -> datetime: """ @@ -17,12 +18,12 @@ def get_utc_now() -> datetime: def get_utc_timestamp() -> float: """ - Get current UTC timestamp (seconds since epoch). + Get current UTC timestamp (seconds since epoch with millisecond precision). Returns: - float: Current UTC timestamp in seconds + float: Current UTC timestamp in seconds with millisecond precision """ - return datetime.now(timezone.utc).timestamp() + return time.time() def to_utc_timestamp(dt: datetime) -> float: """ diff --git a/modules/workflow/managerSyncDelta.py b/modules/workflow/managerSyncDelta.py new file mode 100644 index 00000000..b66a7488 --- /dev/null +++ b/modules/workflow/managerSyncDelta.py @@ -0,0 +1,231 @@ +""" +Delta Group JIRA-SharePoint Sync Manager + +This module handles the synchronization of JIRA tickets to SharePoint using the new +Graph API-based connector architecture. +""" + +import logging +import csv +import io +from datetime import datetime, UTC +from typing import Dict, Any, List, Optional +from modules.connectors.connectorSharepoint import ConnectorSharepoint +from modules.connectors.connectorTicketJira import ConnectorTicketJira +from modules.interfaces.interfaceAppObjects import getRootInterface +from modules.interfaces.interfaceAppModel import UserInDB +from modules.interfaces.interfaceTicketObjects import TicketSharepointSyncInterface +from modules.shared.timezoneUtils import get_utc_timestamp +from modules.shared.configuration import APP_CONFIG + +logger = logging.getLogger(__name__) + +# Get environment type from configuration +APP_ENV_TYPE = APP_CONFIG.get("APP_ENV_TYPE", "dev") + + +class ManagerSyncDelta: + """Manages JIRA to SharePoint synchronization for Delta Group.""" + #SHAREPOINT_SITE_ID = "02830618-4029-4dc8-8d3d-f5168f282249" + #SHAREPOINT_SITE_NAME = "SteeringBPM" + #SHAREPOINT_MAIN_FOLDER = "/sites/SteeringBPM/Freigegebene Dokumente/General/50 Docs hosted by SELISE" + #SHAREPOINT_BACKUP_FOLDER = "/sites/SteeringBPM/Freigegebene Dokumente/General/50 Docs hosted by SELISE/SyncHistory" + #SHAREPOINT_AUDIT_FOLDER = "/sites/SteeringBPM/Freigegebene Dokumente/General/50 Docs hosted by SELISE/SyncHistory" + + # SharePoint site constants using hostname + site path (resolve real site ID at runtime) + SHAREPOINT_HOSTNAME = "pcuster.sharepoint.com" + SHAREPOINT_SITE_PATH = "KM.DELTAG.20968511411" + SHAREPOINT_SITE_NAME = "KM.DELTAG.20968511411" + # Drive-relative (document library) paths, not server-relative "/sites/..." + # Note: Default library name is "Shared Documents" in Graph + SHAREPOINT_MAIN_FOLDER = "1_Arbeitsbereich" + SHAREPOINT_BACKUP_FOLDER = "1_Arbeitsbereich/SyncHistory" + SHAREPOINT_AUDIT_FOLDER = "1_Arbeitsbereich/SyncHistory" + + # Fixed filename for the main CSV file (like original synchronizer) + SYNC_FILE_NAME = "DELTAgroup x SELISE Ticket Exchange List.csv" + + # JIRA connection parameters (hardcoded for Delta Group) + JIRA_USERNAME = "p.motsch@valueon.ch" + JIRA_API_TOKEN = "ATATT3xFfGF0d973nNb3R1wTDI4lesmJfJAmooS-4cYMJTyLfwYv4himrE6yyCxyX3aSMfl34NHcm2fAXeFXrLHUzJx0RQVUBonCFnlgexjLQTgS5BoCbSO7dwAVjlcHZZkArHbooCUaRwJ15n6AHkm-nwdjLQ3Z74TFnKKUZC4uhuh3Aj-MuX8=2D7124FA" + JIRA_URL = "https://deltasecurity.atlassian.net" + JIRA_PROJECT_CODE = "DCS" + JIRA_ISSUE_TYPE = "Task" + + # Task sync definition for field mapping (like original synchronizer) + TASK_SYNC_DEFINITION = { + "ID": ["get", ["key"]], + "Summary": ["get", ["fields", "summary"]], + "Status": ["get", ["fields", "status", "name"]], + "Assignee": ["get", ["fields", "assignee", "displayName"]], + "Reporter": ["get", ["fields", "reporter", "displayName"]], + "Created": ["get", ["fields", "created"]], + "Updated": ["get", ["fields", "updated"]], + "Priority": ["get", ["fields", "priority", "name"]], + "IssueType": ["get", ["fields", "issuetype", "name"]], + "Project": ["get", ["fields", "project", "name"]], + "Description": ["get", ["fields", "description"]], + } + + def __init__(self): + """Initialize the sync manager with hardcoded Delta Group credentials.""" + self.root_interface = getRootInterface() + self.jira_connector = None + self.sharepoint_connector = None + self.target_site = None + + async def initialize_connectors(self) -> bool: + """Initialize JIRA and SharePoint connectors.""" + try: + logger.info("Initializing JIRA connector with hardcoded credentials") + + # Initialize JIRA connector using class constants + self.jira_connector = await ConnectorTicketJira.create( + jira_username=self.JIRA_USERNAME, + jira_api_token=self.JIRA_API_TOKEN, + jira_url=self.JIRA_URL, + project_code=self.JIRA_PROJECT_CODE, + issue_type=self.JIRA_ISSUE_TYPE + ) + + # Use the current logged-in user from root interface + activeUser = self.root_interface.currentUser + if not activeUser: + logger.error("No current user available - SharePoint connection required") + return False + + logger.info(f"Using current user for SharePoint: {activeUser.id}") + + # Get SharePoint connection for this user + user_connections = self.root_interface.getUserConnections(activeUser.id) + sharepoint_connection = None + + for connection in user_connections: + if connection.authority == "msft": + sharepoint_connection = connection + break + + if not sharepoint_connection: + logger.error("No SharePoint connection found for Delta Group user") + return False + + logger.info(f"Found SharePoint connection: {sharepoint_connection.id}") + + # Get SharePoint token for this connection + sharepoint_token = self.root_interface.getConnectionToken(sharepoint_connection.id) + if not sharepoint_token: + logger.error("No SharePoint token found for Delta Group user connection") + return False + + logger.info(f"Found SharePoint token: {sharepoint_token.id}") + + # Initialize SharePoint connector with Graph API + self.sharepoint_connector = ConnectorSharepoint(access_token=sharepoint_token.tokenAccess) + + # Resolve the site by hostname + site path to get the real site ID + logger.info( + f"Resolving site ID via hostname+path: {self.SHAREPOINT_HOSTNAME}:/sites/{self.SHAREPOINT_SITE_PATH}" + ) + resolved = await self.sharepoint_connector.find_site_by_url( + hostname=self.SHAREPOINT_HOSTNAME, + site_path=self.SHAREPOINT_SITE_PATH + ) + + if not resolved: + logger.error( + f"Failed to resolve site. Hostname: {self.SHAREPOINT_HOSTNAME}, Path: {self.SHAREPOINT_SITE_PATH}" + ) + return False + + self.target_site = { + "id": resolved.get("id"), + "displayName": resolved.get("displayName", self.SHAREPOINT_SITE_NAME), + "name": resolved.get("name", self.SHAREPOINT_SITE_NAME) + } + + # Test site access by listing root of the drive + logger.info("Testing site access using resolved site ID...") + test_result = await self.sharepoint_connector.list_folder_contents( + site_id=self.target_site["id"], + folder_path="" + ) + + if test_result is not None: + logger.info( + f"Site access confirmed: {self.target_site['displayName']} (ID: {self.target_site['id']})" + ) + else: + logger.error("Could not access site drive - check permissions") + return False + + return True + + except Exception as e: + logger.error(f"Error initializing connectors: {str(e)}") + return False + + async def sync_jira_to_sharepoint(self) -> bool: + """Perform the main JIRA to SharePoint synchronization using sophisticated sync logic.""" + try: + logger.info("Starting JIRA to SharePoint synchronization") + + # Initialize connectors + if not await self.initialize_connectors(): + logger.error("Failed to initialize connectors") + return False + + # Create the sophisticated sync interface + sync_interface = await TicketSharepointSyncInterface.create( + connector_ticket=self.jira_connector, + connector_sharepoint=self.sharepoint_connector, + task_sync_definition=self.TASK_SYNC_DEFINITION, + sync_folder=self.SHAREPOINT_MAIN_FOLDER, + sync_file=self.SYNC_FILE_NAME, + backup_folder=self.SHAREPOINT_BACKUP_FOLDER, + audit_folder=self.SHAREPOINT_AUDIT_FOLDER, + site_id=self.target_site['id'] + ) + + # Perform the sophisticated sync + logger.info("Performing sophisticated JIRA to CSV sync...") + await sync_interface.sync_from_jira_to_csv() + + logger.info("JIRA to SharePoint synchronization completed successfully") + return True + + except Exception as e: + logger.error(f"Error during JIRA to SharePoint synchronization: {str(e)}") + return False + + + +# Global sync function for use in app.py +async def perform_sync_jira_delta_group() -> bool: + """Perform JIRA to SharePoint synchronization for Delta Group. + + This function is called by the scheduler and can be used independently. + + Returns: + bool: True if synchronization was successful, False otherwise + """ + try: + if APP_ENV_TYPE != "TASK-ACTIVATE-WHEN-ACCOUNT-READY-prod": + logger.info("JIRA to SharePoint synchronization: TASK to run only in PROD") + return True + + logger.info("Starting Delta Group JIRA sync...") + + + sync_manager = ManagerSyncDelta() + success = await sync_manager.sync_jira_to_sharepoint() + + if success: + logger.info("Delta Group JIRA sync completed successfully") + else: + logger.error("Delta Group JIRA sync failed") + + return success + + except Exception as e: + logger.error(f"Error in perform_sync_jira_delta_group: {str(e)}") + return False diff --git a/modules/workflow/managerWorkflow.py b/modules/workflow/managerWorkflow.py index ce7a2366..1d50b134 100644 --- a/modules/workflow/managerWorkflow.py +++ b/modules/workflow/managerWorkflow.py @@ -76,12 +76,12 @@ class WorkflowManager: "taskProgress": "pending", "actionProgress": "pending" } - message = self.chatInterface.createWorkflowMessage(stopped_message) + message = self.chatInterface.createMessage(stopped_message) if message: workflow.messages.append(message) # Add log entry - self.chatInterface.createWorkflowLog({ + self.chatInterface.createLog({ "workflowId": workflow.id, "message": "Workflow stopped by user", "type": "warning", @@ -120,12 +120,12 @@ class WorkflowManager: "taskProgress": "fail", "actionProgress": "fail" } - message = self.chatInterface.createWorkflowMessage(error_message) + message = self.chatInterface.createMessage(error_message) if message: workflow.messages.append(message) # Add error log entry - self.chatInterface.createWorkflowLog({ + self.chatInterface.createLog({ "workflowId": workflow.id, "message": f"Workflow failed: {str(e)}", "type": "error", @@ -165,16 +165,19 @@ class WorkflowManager: "actionProgress": "pending" } - # Add documents if any - if userInput.listFileId: - # Process file IDs and add to message data - documents = await self.chatManager.service.processFileIds(userInput.listFileId) - messageData["documents"] = documents - - # Create message using interface - message = self.chatInterface.createWorkflowMessage(messageData) + # Create message first to get messageId + message = self.chatInterface.createMessage(messageData) if message: workflow.messages.append(message) + + # Add documents if any, now with messageId + if userInput.listFileId: + # Process file IDs and add to message data + documents = await self.chatManager.service.processFileIds(userInput.listFileId, message.id) + message.documents = documents + # Update the message with documents in database + self.chatInterface.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]}) + return message else: raise Exception("Failed to create first message") @@ -241,7 +244,7 @@ class WorkflowManager: } # Create message using interface - message = self.chatInterface.createWorkflowMessage(messageData) + message = self.chatInterface.createMessage(messageData) if message: workflow.messages.append(message) @@ -256,7 +259,7 @@ class WorkflowManager: }) # Add completion log entry - self.chatInterface.createWorkflowLog({ + self.chatInterface.createLog({ "workflowId": workflow.id, "message": "Workflow completed", "type": "success", @@ -294,7 +297,7 @@ class WorkflowManager: "taskProgress": "stopped", "actionProgress": "stopped" } - message = self.chatInterface.createWorkflowMessage(stopped_message) + message = self.chatInterface.createMessage(stopped_message) if message: workflow.messages.append(message) @@ -326,7 +329,7 @@ class WorkflowManager: "taskProgress": "stopped", "actionProgress": "stopped" } - message = self.chatInterface.createWorkflowMessage(stopped_message) + message = self.chatInterface.createMessage(stopped_message) if message: workflow.messages.append(message) @@ -341,7 +344,7 @@ class WorkflowManager: }) # Add stopped log entry - self.chatInterface.createWorkflowLog({ + self.chatInterface.createLog({ "workflowId": workflow.id, "message": "Workflow stopped by user", "type": "warning", @@ -368,7 +371,7 @@ class WorkflowManager: "taskProgress": "fail", "actionProgress": "fail" } - message = self.chatInterface.createWorkflowMessage(error_message) + message = self.chatInterface.createMessage(error_message) if message: workflow.messages.append(message) @@ -383,7 +386,7 @@ class WorkflowManager: }) # Add failed log entry - self.chatInterface.createWorkflowLog({ + self.chatInterface.createLog({ "workflowId": workflow.id, "message": f"Workflow failed: {workflow_result.error or 'Unknown error'}", "type": "error", @@ -411,7 +414,7 @@ class WorkflowManager: "actionProgress": "success" } - message = self.chatInterface.createWorkflowMessage(summary_message) + message = self.chatInterface.createMessage(summary_message) if message: workflow.messages.append(message) @@ -426,7 +429,7 @@ class WorkflowManager: }) # Add completion log entry - self.chatInterface.createWorkflowLog({ + self.chatInterface.createLog({ "workflowId": workflow.id, "message": "Workflow completed successfully", "type": "success", @@ -454,7 +457,7 @@ class WorkflowManager: "taskProgress": "fail", "actionProgress": "fail" } - message = self.chatInterface.createWorkflowMessage(error_message) + message = self.chatInterface.createMessage(error_message) if message: workflow.messages.append(message) diff --git a/notes/changelog.txt b/notes/changelog.txt index affd7a3d..e10e683a 100644 --- a/notes/changelog.txt +++ b/notes/changelog.txt @@ -2,7 +2,10 @@ TODO # System -- sharepoint to fix +- database +- db initialization as separate function to create root mandate, then sysadmin with hashed passwords --> using the connector according to env configuration +- settings: UI page for: db new (delete if exists and init), then to add mandate root and sysadmin, log download --> in the api to add connector settings with the according endpoints +- access model as matrix, not as code --> to have view, add, update, delete with the rights on level table and attribute for all, my (created by me), my mandate (mandate I am in), none (no access) - document handling centralized - ai handling centralized - neutralizer to activate AND put back placeholders to the returned data @@ -21,6 +24,12 @@ TODO - check zusammenfassung von 10 dokumenten >10 MB - test case bewerbung +# Ida changes gateway: +- Polling endpoint + doku dazu +- files in documents integriert --> document endpoint for files +- prompts in chat endpoint +- + # DOCUMENTATION Design principles - UI: Module classes for data management (CRUD tables & forms --> formGeneric) diff --git a/notes/releasenotes.txt b/notes/releasenotes.txt deleted file mode 100644 index 10d5dadc..00000000 --- a/notes/releasenotes.txt +++ /dev/null @@ -1,8 +0,0 @@ -New features -- Limiter and tracking of ip adress access -- Sessions improved -- user and connection consequently separated -- seamless local and external authorities integration -- audit trail -- nda disclaimer in login window -- CSRF Tokens included in forms \ No newline at end of file diff --git a/query b/query new file mode 100644 index 00000000..a02a1cc7 --- /dev/null +++ b/query @@ -0,0 +1 @@ +postgresql diff --git a/requirements.txt b/requirements.txt index 783db728..b4691ada 100644 --- a/requirements.txt +++ b/requirements.txt @@ -43,6 +43,7 @@ chardet>=5.0.0 # Für Zeichensatzerkennung bei Webinhalten aiohttp>=3.8.0 # Required for SharePoint operations (async HTTP) selenium>=4.15.0 # Required for web automation and JavaScript-heavy pages tavily-python==0.7.11 # Tavily SDK +Office365-REST-Python-Client==2.6.2 # Easy Sharepoint integration ## Image Processing Pillow>=10.0.0 # Für Bildverarbeitung (als PIL importiert) @@ -73,6 +74,9 @@ chardet>=4.0.0 # For encoding detection pytest>=8.0.0 pytest-asyncio>=0.21.0 +## For Scheduling / Repeated Tasks +APScheduler==3.11.0 + ## Missing Dependencies for IPython and other tools decorator>=5.0.0 jedi>=0.16 @@ -90,4 +94,7 @@ bokeh>=3.2.0,<3.4.0 linkify-it-py>=1.0.0 mdit-py-plugins>=0.3.0 pyviz-comms>=2.0.0 -xyzservices>=2021.09.1 \ No newline at end of file +xyzservices>=2021.09.1 + +# PostgreSQL connector dependencies +psycopg2-binary==2.9.9 diff --git a/tests/connectors/__init__.py b/tests/connectors/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/connectors/test_connector_tavily.py b/tests/connectors/test_connector_tavily.py deleted file mode 100644 index 23253cba..00000000 --- a/tests/connectors/test_connector_tavily.py +++ /dev/null @@ -1,108 +0,0 @@ -"""Tests for Tavliy web search.""" - -import pytest -import logging - -from modules.interfaces.interfaceChatModel import ActionResult -from gateway.modules.interfaces.interfaceWebModel import ( - WebSearchRequest, - WebCrawlRequest, - WebScrapeRequest, -) -from gateway.modules.connectors.connectorWebTavily import ConnectorTavily - -logger = logging.getLogger(__name__) - - -@pytest.mark.asyncio -@pytest.mark.expensive -async def test_tavily_connector_search_test_live_api(): - logger.info("Testing Tavliy connector search with live API calls") - - # Test request - request = WebSearchRequest(query="How old is the Earth?", max_results=5) - - # Tavily instance - connectorWebTavily = await ConnectorTavily.create() - - # Search test - action_result = await connectorWebTavily.search_urls(request=request) - - # Check results - assert isinstance(action_result, ActionResult) - - logger.info("=" * 20) - logger.info(f"Action result success status: {action_result.success}") - logger.info(f"Action result error: {action_result.error}") - logger.info(f"Action result label: {action_result.resultLabel}") - - logger.info("Documents:") - for doc in action_result.documents: - logger.info("-" * 10) - logger.info(f" - Document Name: {doc.documentName}") - logger.info(f" - Document Mime Type: {doc.mimeType}") - logger.info(f" - Document Data: {doc.documentData}") - - -@pytest.mark.asyncio -@pytest.mark.expensive -async def test_tavily_connector_crawl_test_live_api(): - logger.info("Testing Tavily connector crawl with live API calls") - - # Test request - urls = [ - "https://en.wikipedia.org/wiki/Earth", - "https://valueon.ch", - ] - request = WebCrawlRequest(urls=urls) - - # Tavily instance - connectorWebTavily = await ConnectorTavily.create() - - # Crawl test - action_result = await connectorWebTavily.crawl_urls(request=request) - - # Check results - assert isinstance(action_result, ActionResult) - - logger.info("=" * 20) - logger.info(f"Action result success status: {action_result.success}") - logger.info(f"Action result error: {action_result.error}") - logger.info(f"Action result label: {action_result.resultLabel}") - - logger.info("Documents:") - for doc in action_result.documents: - logger.info("-" * 10) - logger.info(f" - Document Name: {doc.documentName}") - logger.info(f" - Document Mime Type: {doc.mimeType}") - logger.info(f" - Document Data: {doc.documentData}") - - -@pytest.mark.asyncio -@pytest.mark.expensive -async def test_tavily_connector_scrape_test_live_api(): - logger.info("Testing Tavily connector scrape with live API calls") - - # Test request with query - request = WebScrapeRequest(query="How old is the Earth?", max_results=3) - - # Tavily instance - connectorWebTavily = await ConnectorTavily.create() - - # Scrape test - action_result = await connectorWebTavily.scrape(request=request) - - # Check results - assert isinstance(action_result, ActionResult) - - logger.info("=" * 20) - logger.info(f"Action result success status: {action_result.success}") - logger.info(f"Action result error: {action_result.error}") - logger.info(f"Action result label: {action_result.resultLabel}") - - logger.info("Documents:") - for doc in action_result.documents: - logger.info("-" * 10) - logger.info(f" - Document Name: {doc.documentName}") - logger.info(f" - Document Mime Type: {doc.mimeType}") - logger.info(f" - Document Data: {doc.documentData}") diff --git a/test_graph_search.py b/tests/test_graph_search.py similarity index 100% rename from test_graph_search.py rename to tests/test_graph_search.py diff --git a/test_neutralizer/apprun.py b/tests/test_neutralizer/apprun.py similarity index 100% rename from test_neutralizer/apprun.py rename to tests/test_neutralizer/apprun.py diff --git a/test_neutralizer/logs/log_mapping.csv b/tests/test_neutralizer/logs/log_mapping.csv similarity index 100% rename from test_neutralizer/logs/log_mapping.csv rename to tests/test_neutralizer/logs/log_mapping.csv diff --git a/test_neutralizer/logs/log_replacements.csv b/tests/test_neutralizer/logs/log_replacements.csv similarity index 100% rename from test_neutralizer/logs/log_replacements.csv rename to tests/test_neutralizer/logs/log_replacements.csv diff --git a/test_neutralizer/neutralizer.py b/tests/test_neutralizer/neutralizer.py similarity index 100% rename from test_neutralizer/neutralizer.py rename to tests/test_neutralizer/neutralizer.py diff --git a/test_neutralizer/output/neutralized_Case.md b/tests/test_neutralizer/output/neutralized_Case.md similarity index 100% rename from test_neutralizer/output/neutralized_Case.md rename to tests/test_neutralizer/output/neutralized_Case.md diff --git a/test_neutralizer/output/neutralized_customers.csv b/tests/test_neutralizer/output/neutralized_customers.csv similarity index 100% rename from test_neutralizer/output/neutralized_customers.csv rename to tests/test_neutralizer/output/neutralized_customers.csv diff --git a/test_neutralizer/output/neutralized_cv_lara_meier.txt b/tests/test_neutralizer/output/neutralized_cv_lara_meier.txt similarity index 100% rename from test_neutralizer/output/neutralized_cv_lara_meier.txt rename to tests/test_neutralizer/output/neutralized_cv_lara_meier.txt diff --git a/test_neutralizer/output/neutralized_employees.csv b/tests/test_neutralizer/output/neutralized_employees.csv similarity index 100% rename from test_neutralizer/output/neutralized_employees.csv rename to tests/test_neutralizer/output/neutralized_employees.csv diff --git a/test_neutralizer/output/neutralized_english.txt b/tests/test_neutralizer/output/neutralized_english.txt similarity index 100% rename from test_neutralizer/output/neutralized_english.txt rename to tests/test_neutralizer/output/neutralized_english.txt diff --git a/test_neutralizer/output/neutralized_example.json b/tests/test_neutralizer/output/neutralized_example.json similarity index 100% rename from test_neutralizer/output/neutralized_example.json rename to tests/test_neutralizer/output/neutralized_example.json diff --git a/test_neutralizer/output/neutralized_example.xml b/tests/test_neutralizer/output/neutralized_example.xml similarity index 100% rename from test_neutralizer/output/neutralized_example.xml rename to tests/test_neutralizer/output/neutralized_example.xml diff --git a/test_neutralizer/output/neutralized_french.txt b/tests/test_neutralizer/output/neutralized_french.txt similarity index 100% rename from test_neutralizer/output/neutralized_french.txt rename to tests/test_neutralizer/output/neutralized_french.txt diff --git a/test_neutralizer/output/neutralized_german.txt b/tests/test_neutralizer/output/neutralized_german.txt similarity index 100% rename from test_neutralizer/output/neutralized_german.txt rename to tests/test_neutralizer/output/neutralized_german.txt diff --git a/test_neutralizer/output/neutralized_geschaeftsstrategie.txt b/tests/test_neutralizer/output/neutralized_geschaeftsstrategie.txt similarity index 100% rename from test_neutralizer/output/neutralized_geschaeftsstrategie.txt rename to tests/test_neutralizer/output/neutralized_geschaeftsstrategie.txt diff --git a/test_neutralizer/output/neutralized_geschäfte.csv b/tests/test_neutralizer/output/neutralized_geschäfte.csv similarity index 100% rename from test_neutralizer/output/neutralized_geschäfte.csv rename to tests/test_neutralizer/output/neutralized_geschäfte.csv diff --git a/test_neutralizer/output/neutralized_italian.txt b/tests/test_neutralizer/output/neutralized_italian.txt similarity index 100% rename from test_neutralizer/output/neutralized_italian.txt rename to tests/test_neutralizer/output/neutralized_italian.txt diff --git a/test_neutralizer/output/neutralized_kunden.csv b/tests/test_neutralizer/output/neutralized_kunden.csv similarity index 100% rename from test_neutralizer/output/neutralized_kunden.csv rename to tests/test_neutralizer/output/neutralized_kunden.csv diff --git a/test_neutralizer/output/neutralized_mitarbeiter.csv b/tests/test_neutralizer/output/neutralized_mitarbeiter.csv similarity index 100% rename from test_neutralizer/output/neutralized_mitarbeiter.csv rename to tests/test_neutralizer/output/neutralized_mitarbeiter.csv diff --git a/test_neutralizer/output/neutralized_swiss.txt b/tests/test_neutralizer/output/neutralized_swiss.txt similarity index 100% rename from test_neutralizer/output/neutralized_swiss.txt rename to tests/test_neutralizer/output/neutralized_swiss.txt diff --git a/test_neutralizer/output/neutralized_transactions.csv b/tests/test_neutralizer/output/neutralized_transactions.csv similarity index 100% rename from test_neutralizer/output/neutralized_transactions.csv rename to tests/test_neutralizer/output/neutralized_transactions.csv diff --git a/test_neutralizer/patterns.py b/tests/test_neutralizer/patterns.py similarity index 100% rename from test_neutralizer/patterns.py rename to tests/test_neutralizer/patterns.py diff --git a/test_neutralizer/testdata/Case.md b/tests/test_neutralizer/testdata/Case.md similarity index 100% rename from test_neutralizer/testdata/Case.md rename to tests/test_neutralizer/testdata/Case.md diff --git a/test_neutralizer/testdata/customers.csv b/tests/test_neutralizer/testdata/customers.csv similarity index 100% rename from test_neutralizer/testdata/customers.csv rename to tests/test_neutralizer/testdata/customers.csv diff --git a/test_neutralizer/testdata/cv_lara_meier.txt b/tests/test_neutralizer/testdata/cv_lara_meier.txt similarity index 100% rename from test_neutralizer/testdata/cv_lara_meier.txt rename to tests/test_neutralizer/testdata/cv_lara_meier.txt diff --git a/test_neutralizer/testdata/employees.csv b/tests/test_neutralizer/testdata/employees.csv similarity index 100% rename from test_neutralizer/testdata/employees.csv rename to tests/test_neutralizer/testdata/employees.csv diff --git a/test_neutralizer/testdata/english.txt b/tests/test_neutralizer/testdata/english.txt similarity index 100% rename from test_neutralizer/testdata/english.txt rename to tests/test_neutralizer/testdata/english.txt diff --git a/test_neutralizer/testdata/example.json b/tests/test_neutralizer/testdata/example.json similarity index 100% rename from test_neutralizer/testdata/example.json rename to tests/test_neutralizer/testdata/example.json diff --git a/test_neutralizer/testdata/example.xml b/tests/test_neutralizer/testdata/example.xml similarity index 100% rename from test_neutralizer/testdata/example.xml rename to tests/test_neutralizer/testdata/example.xml diff --git a/test_neutralizer/testdata/french.txt b/tests/test_neutralizer/testdata/french.txt similarity index 100% rename from test_neutralizer/testdata/french.txt rename to tests/test_neutralizer/testdata/french.txt diff --git a/test_neutralizer/testdata/german.txt b/tests/test_neutralizer/testdata/german.txt similarity index 100% rename from test_neutralizer/testdata/german.txt rename to tests/test_neutralizer/testdata/german.txt diff --git a/test_neutralizer/testdata/geschaeftsstrategie.txt b/tests/test_neutralizer/testdata/geschaeftsstrategie.txt similarity index 100% rename from test_neutralizer/testdata/geschaeftsstrategie.txt rename to tests/test_neutralizer/testdata/geschaeftsstrategie.txt diff --git a/test_neutralizer/testdata/geschäfte.csv b/tests/test_neutralizer/testdata/geschäfte.csv similarity index 100% rename from test_neutralizer/testdata/geschäfte.csv rename to tests/test_neutralizer/testdata/geschäfte.csv diff --git a/test_neutralizer/testdata/italian.txt b/tests/test_neutralizer/testdata/italian.txt similarity index 100% rename from test_neutralizer/testdata/italian.txt rename to tests/test_neutralizer/testdata/italian.txt diff --git a/test_neutralizer/testdata/kunden.csv b/tests/test_neutralizer/testdata/kunden.csv similarity index 100% rename from test_neutralizer/testdata/kunden.csv rename to tests/test_neutralizer/testdata/kunden.csv diff --git a/test_neutralizer/testdata/mitarbeiter.csv b/tests/test_neutralizer/testdata/mitarbeiter.csv similarity index 100% rename from test_neutralizer/testdata/mitarbeiter.csv rename to tests/test_neutralizer/testdata/mitarbeiter.csv diff --git a/test_neutralizer/testdata/swiss.txt b/tests/test_neutralizer/testdata/swiss.txt similarity index 100% rename from test_neutralizer/testdata/swiss.txt rename to tests/test_neutralizer/testdata/swiss.txt diff --git a/test_neutralizer/testdata/transactions.csv b/tests/test_neutralizer/testdata/transactions.csv similarity index 100% rename from test_neutralizer/testdata/transactions.csv rename to tests/test_neutralizer/testdata/transactions.csv diff --git a/test_neutralizer/zdocu.html b/tests/test_neutralizer/zdocu.html similarity index 100% rename from test_neutralizer/zdocu.html rename to tests/test_neutralizer/zdocu.html