gateway/modules/features/syncDelta/mainSyncDelta.py
2025-09-23 22:47:54 +02:00

577 lines
No EOL
25 KiB
Python

"""
Delta Group JIRA-SharePoint Sync Manager
This module handles the synchronization of JIRA tickets to SharePoint using the new
Graph API-based connector architecture.
"""
import logging
import os
import csv
import io
from datetime import datetime, UTC
from typing import Dict, Any, List, Optional
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
from modules.connectors.connectorTicketJira import ConnectorTicketJira
from modules.interfaces.interfaceAppObjects import getRootInterface
from modules.interfaces.interfaceAppModel import UserInDB
from modules.interfaces.interfaceTicketObjects import TicketSharepointSyncInterface
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
# Get environment type from configuration
APP_ENV_TYPE = APP_CONFIG.get("APP_ENV_TYPE", "dev")
def convert_adf_to_text(adf_data):
"""Convert Atlassian Document Format (ADF) to plain text.
Based on Atlassian Document Format specification for JIRA fields.
Handles paragraphs, lists, text formatting, and other ADF node types.
Args:
adf_data: ADF object or None
Returns:
str: Plain text content, or empty string if None/invalid
"""
if not adf_data or not isinstance(adf_data, dict):
return ""
if adf_data.get("type") != "doc":
return str(adf_data) if adf_data else ""
content = adf_data.get("content", [])
if not isinstance(content, list):
return ""
def extract_text_from_content(content_list, list_level=0):
"""Recursively extract text from ADF content with proper formatting."""
text_parts = []
list_counter = 1
for item in content_list:
if not isinstance(item, dict):
continue
item_type = item.get("type", "")
if item_type == "text":
# Extract text content, preserving formatting
text = item.get("text", "")
marks = item.get("marks", [])
# Handle text formatting (bold, italic, etc.)
if marks:
for mark in marks:
if mark.get("type") == "strong":
text = f"**{text}**"
elif mark.get("type") == "em":
text = f"*{text}*"
elif mark.get("type") == "code":
text = f"`{text}`"
elif mark.get("type") == "link":
attrs = mark.get("attrs", {})
href = attrs.get("href", "")
if href:
text = f"[{text}]({href})"
text_parts.append(text)
elif item_type == "hardBreak":
text_parts.append("\n")
elif item_type == "paragraph":
paragraph_content = item.get("content", [])
if paragraph_content:
paragraph_text = extract_text_from_content(paragraph_content, list_level)
if paragraph_text.strip():
text_parts.append(paragraph_text)
elif item_type == "bulletList":
list_content = item.get("content", [])
for list_item in list_content:
if list_item.get("type") == "listItem":
list_item_content = list_item.get("content", [])
for list_paragraph in list_item_content:
if list_paragraph.get("type") == "paragraph":
list_paragraph_content = list_paragraph.get("content", [])
if list_paragraph_content:
indent = " " * list_level
bullet_text = extract_text_from_content(list_paragraph_content, list_level + 1)
if bullet_text.strip():
text_parts.append(f"{indent}{bullet_text}")
elif item_type == "orderedList":
list_content = item.get("content", [])
for list_item in list_content:
if list_item.get("type") == "listItem":
list_item_content = list_item.get("content", [])
for list_paragraph in list_item_content:
if list_paragraph.get("type") == "paragraph":
list_paragraph_content = list_paragraph.get("content", [])
if list_paragraph_content:
indent = " " * list_level
ordered_text = extract_text_from_content(list_paragraph_content, list_level + 1)
if ordered_text.strip():
text_parts.append(f"{indent}{list_counter}. {ordered_text}")
list_counter += 1
elif item_type == "listItem":
# Handle nested list items
list_item_content = item.get("content", [])
if list_item_content:
text_parts.append(extract_text_from_content(list_item_content, list_level))
elif item_type == "embedCard":
# Handle embedded content (videos, etc.)
attrs = item.get("attrs", {})
url = attrs.get("url", "")
if url:
text_parts.append(f"[Embedded Content: {url}]")
elif item_type == "codeBlock":
# Handle code blocks
code_content = item.get("content", [])
if code_content:
code_text = extract_text_from_content(code_content, list_level)
if code_text.strip():
text_parts.append(f"```\n{code_text}\n```")
elif item_type == "blockquote":
# Handle blockquotes
quote_content = item.get("content", [])
if quote_content:
quote_text = extract_text_from_content(quote_content, list_level)
if quote_text.strip():
text_parts.append(f"> {quote_text}")
elif item_type == "heading":
# Handle headings
heading_content = item.get("content", [])
if heading_content:
heading_text = extract_text_from_content(heading_content, list_level)
if heading_text.strip():
level = item.get("attrs", {}).get("level", 1)
text_parts.append(f"{'#' * level} {heading_text}")
elif item_type == "rule":
# Handle horizontal rules
text_parts.append("---")
else:
# Handle unknown types by trying to extract content
if "content" in item:
content_text = extract_text_from_content(item.get("content", []), list_level)
if content_text.strip():
text_parts.append(content_text)
return "\n".join(text_parts)
result = extract_text_from_content(content)
return result.strip()
class ManagerSyncDelta:
"""Manages JIRA to SharePoint synchronization for Delta Group.
Supports two sync modes:
- CSV mode: Uses CSV files for synchronization (default)
- Excel mode: Uses Excel (.xlsx) files for synchronization
To change sync mode, use the set_sync_mode() method or modify SYNC_MODE class variable.
"""
SHAREPOINT_SITE_ID = "02830618-4029-4dc8-8d3d-f5168f282249"
SHAREPOINT_SITE_NAME = "SteeringBPM"
SHAREPOINT_SITE_PATH = "SteeringBPM"
SHAREPOINT_HOSTNAME = "deltasecurityag.sharepoint.com"
SHAREPOINT_MAIN_FOLDER = "/General/50 Docs hosted by SELISE"
SHAREPOINT_BACKUP_FOLDER = "/General/50 Docs hosted by SELISE/SyncHistory"
SHAREPOINT_AUDIT_FOLDER = "/General/50 Docs hosted by SELISE/SyncHistory"
SHAREPOINT_USER_ID = "patrick.motsch@delta.ch"
# Sync mode: "csv" or "xlsx"
SYNC_MODE = "xlsx" # Can be "csv" or "xlsx"
# File names for different sync modes
SYNC_FILE_CSV = "DELTAgroup x SELISE Ticket Exchange List.csv"
SYNC_FILE_XLSX = "DELTAgroup x SELISE Ticket Exchange List.xlsx"
# JIRA connection parameters (hardcoded for Delta Group)
JIRA_USERNAME = "p.motsch@valueon.ch"
JIRA_API_TOKEN = "ATATT3xFfGF0d973nNb3R1wTDI4lesmJfJAmooS-4cYMJTyLfwYv4himrE6yyCxyX3aSMfl34NHcm2fAXeFXrLHUzJx0RQVUBonCFnlgexjLQTgS5BoCbSO7dwAVjlcHZZkArHbooCUaRwJ15n6AHkm-nwdjLQ3Z74TFnKKUZC4uhuh3Aj-MuX8=2D7124FA"
JIRA_URL = "https://deltasecurity.atlassian.net"
JIRA_PROJECT_CODE = "DCS"
JIRA_ISSUE_TYPE = "Task"
# Task sync definition for field mapping (like original synchronizer)
TASK_SYNC_DEFINITION={
#key=excel-header, [get:jira>excel | put: excel>jira, jira-xml-field-list]
'ID': ['get', ['key']],
'Module Category': ['get', ['fields', 'customfield_10058', 'value']],
'Summary': ['get', ['fields', 'summary']],
'Description': ['get', ['fields', 'description']], # ADF format - needs conversion to text
'References': ['get', ['fields', 'customfield_10066']], # Field exists, may be None
'Priority': ['get', ['fields', 'priority', 'name']],
'Issue Status': ['get', ['fields', 'status', 'name']],
'Assignee': ['get', ['fields', 'assignee', 'displayName']],
'Issue Created': ['get', ['fields', 'created']],
'Due Date': ['get', ['fields', 'duedate']], # Field exists, may be None
'DELTA Comments': ['get', ['fields', 'customfield_10167']], # Field exists, may be None
'SELISE Ticket References': ['put', ['fields', 'customfield_10067']],
'SELISE Status Values': ['put', ['fields', 'customfield_10065']],
'SELISE Comments': ['put', ['fields', 'customfield_10168']],
}
def __init__(self):
"""Initialize the sync manager with hardcoded Delta Group credentials."""
self.root_interface = getRootInterface()
self.jira_connector = None
self.sharepoint_connector = None
self.target_site = None
# Initialize centralized services with root user
from modules.services import getInterface as getServices
root_user = self.root_interface.getUserByUsername("admin")
self.services = getServices(root_user, None)
def get_sync_file_name(self) -> str:
"""Get the appropriate sync file name based on the sync mode."""
if self.SYNC_MODE == "xlsx":
return self.SYNC_FILE_XLSX
else: # Default to CSV
return self.SYNC_FILE_CSV
def set_sync_mode(self, mode: str) -> bool:
"""Set the sync mode to either 'csv' or 'xlsx'.
Args:
mode: Either 'csv' or 'xlsx'
Returns:
bool: True if mode was set successfully, False if invalid mode
"""
if mode.lower() in ["csv", "xlsx"]:
self.SYNC_MODE = mode.lower()
logger.info(f"Sync mode changed to: {self.SYNC_MODE}")
return True
else:
logger.error(f"Invalid sync mode: {mode}. Must be 'csv' or 'xlsx'")
return False
async def initialize_connectors(self) -> bool:
"""Initialize JIRA and SharePoint connectors."""
try:
logger.info("Initializing JIRA connector with hardcoded credentials")
# Initialize JIRA connector using class constants
self.jira_connector = await ConnectorTicketJira.create(
jira_username=self.JIRA_USERNAME,
jira_api_token=self.JIRA_API_TOKEN,
jira_url=self.JIRA_URL,
project_code=self.JIRA_PROJECT_CODE,
issue_type=self.JIRA_ISSUE_TYPE
)
# Use the admin user for SharePoint connection
adminUser = self.root_interface.getUserByUsername("admin")
if not adminUser:
logger.error("Admin user not found - SharePoint connection required")
return False
logger.info(f"Using admin user for SharePoint: {adminUser.id}")
# Get SharePoint connection for admin user
user_connections = self.root_interface.getUserConnections(adminUser.id)
sharepoint_connection = None
for connection in user_connections:
if connection.authority == "msft" and connection.externalUsername == self.SHAREPOINT_USER_ID:
sharepoint_connection = connection
break
if not sharepoint_connection:
logger.error(f"No SharePoint connection found for user: {self.SHAREPOINT_USER_ID}")
return False
logger.info(f"Found SharePoint connection: {sharepoint_connection.id}")
# Get fresh SharePoint token for this connection
from modules.security.tokenManager import TokenManager
sharepoint_token = TokenManager().getFreshToken(self.root_interface, sharepoint_connection.id)
if not sharepoint_token:
logger.error("No SharePoint token found for Delta Group user connection")
return False
logger.info(f"Found SharePoint token: {sharepoint_token.id}")
# Initialize SharePoint connector with Graph API
self.sharepoint_connector = SharepointService(access_token=sharepoint_token.tokenAccess)
# Resolve the site by hostname + site path to get the real site ID
logger.info(
f"Resolving site ID via hostname+path: {self.SHAREPOINT_HOSTNAME}:/sites/{self.SHAREPOINT_SITE_PATH}"
)
resolved = await self.sharepoint_connector.find_site_by_url(
hostname=self.SHAREPOINT_HOSTNAME,
site_path=self.SHAREPOINT_SITE_PATH
)
if not resolved:
logger.error(
f"Failed to resolve site. Hostname: {self.SHAREPOINT_HOSTNAME}, Path: {self.SHAREPOINT_SITE_PATH}"
)
return False
self.target_site = {
"id": resolved.get("id"),
"displayName": resolved.get("displayName", self.SHAREPOINT_SITE_NAME),
"name": resolved.get("name", self.SHAREPOINT_SITE_NAME)
}
# Test site access by listing root of the drive
logger.info("Testing site access using resolved site ID...")
test_result = await self.sharepoint_connector.list_folder_contents(
site_id=self.target_site["id"],
folder_path=""
)
if test_result is not None:
logger.info(
f"Site access confirmed: {self.target_site['displayName']} (ID: {self.target_site['id']})"
)
else:
logger.error("Could not access site drive - check permissions")
return False
return True
except Exception as e:
logger.error(f"Error initializing connectors: {str(e)}")
return False
async def sync_jira_to_sharepoint(self) -> bool:
"""Perform the main JIRA to SharePoint synchronization using sophisticated sync logic."""
try:
logger.info(f"Starting JIRA to SharePoint synchronization (Mode: {self.SYNC_MODE})")
# Initialize connectors
if not await self.initialize_connectors():
logger.error("Failed to initialize connectors")
return False
# Dump current Jira fields to text file for reference
try:
pass # await dump_jira_fields_to_file()
except Exception as e:
logger.warning(f"Failed to dump JIRA fields (non-blocking): {str(e)}")
# Dump actual JIRA data for debugging
try:
pass # await dump_jira_data_to_file()
except Exception as e:
logger.warning(f"Failed to dump JIRA data (non-blocking): {str(e)}")
# Get the appropriate sync file name based on mode
sync_file_name = self.get_sync_file_name()
logger.info(f"Using sync file: {sync_file_name}")
# Create the sophisticated sync interface
sync_interface = await TicketSharepointSyncInterface.create(
connector_ticket=self.jira_connector,
connector_sharepoint=self.sharepoint_connector,
task_sync_definition=self.TASK_SYNC_DEFINITION,
sync_folder=self.SHAREPOINT_MAIN_FOLDER,
sync_file=sync_file_name,
backup_folder=self.SHAREPOINT_BACKUP_FOLDER,
audit_folder=self.SHAREPOINT_AUDIT_FOLDER,
site_id=self.target_site['id']
)
# Perform the sophisticated sync based on mode
if self.SYNC_MODE == "xlsx":
logger.info("Performing JIRA to Excel sync...")
await sync_interface.sync_from_jira_to_excel()
logger.info("Performing Excel to JIRA sync...")
await sync_interface.sync_from_excel_to_jira()
else: # CSV mode (default)
logger.info("Performing JIRA to CSV sync...")
await sync_interface.sync_from_jira_to_csv()
logger.info("Performing CSV to JIRA sync...")
await sync_interface.sync_from_csv_to_jira()
logger.info(f"JIRA to SharePoint synchronization completed successfully (Mode: {self.SYNC_MODE})")
return True
except Exception as e:
logger.error(f"Error during JIRA to SharePoint synchronization: {str(e)}")
return False
# Utility: dump all Jira fields (name -> field id) to a text file
async def dump_jira_fields_to_file(filepath: str = "delta_sync_fields.txt") -> bool:
"""Write all available JIRA fields for the configured project/issue type to a text file.
The output format matches the legacy fields.txt, e.g.:
'Summary': ['get', ['fields', 'summary']]
Args:
filepath: Target text file path to write.
Returns:
True on success, False otherwise.
"""
try:
# Initialize Jira connector with the hardcoded credentials/constants
jira = await ConnectorTicketJira.create(
jira_username=ManagerSyncDelta.JIRA_USERNAME,
jira_api_token=ManagerSyncDelta.JIRA_API_TOKEN,
jira_url=ManagerSyncDelta.JIRA_URL,
project_code=ManagerSyncDelta.JIRA_PROJECT_CODE,
issue_type=ManagerSyncDelta.JIRA_ISSUE_TYPE,
)
attributes = await jira.read_attributes()
if not attributes:
logger.warning("No JIRA attributes returned; nothing to write.")
return False
# Ensure directory exists if a directory part is provided
dir_name = os.path.dirname(filepath)
if dir_name:
os.makedirs(dir_name, exist_ok=True)
# Write in the expected mapping format
with open(filepath, "w", encoding="utf-8") as f:
for attr in attributes:
# attr.field_name (human name), attr.field (Jira field id)
f.write(f"'{attr.field_name}': ['get', ['fields', '{attr.field}']]\n")
logger.info(f"Wrote {len(attributes)} JIRA fields to {filepath}")
return True
except Exception as e:
logger.error(f"Failed to dump JIRA fields: {str(e)}")
return False
# Utility: dump actual JIRA data for debugging
async def dump_jira_data_to_file(filepath: str = "delta_sync_data.txt") -> bool:
"""Write actual JIRA ticket data to a text file for debugging field mapping.
Args:
filepath: Target text file path to write.
Returns:
True on success, False otherwise.
"""
try:
# Initialize Jira connector with the hardcoded credentials/constants
jira = await ConnectorTicketJira.create(
jira_username=ManagerSyncDelta.JIRA_USERNAME,
jira_api_token=ManagerSyncDelta.JIRA_API_TOKEN,
jira_url=ManagerSyncDelta.JIRA_URL,
project_code=ManagerSyncDelta.JIRA_PROJECT_CODE,
issue_type=ManagerSyncDelta.JIRA_ISSUE_TYPE,
)
# Get a few sample tickets to see the actual data structure
tickets = await jira.read_tasks(limit=5)
if not tickets:
logger.warning("No JIRA tickets returned; nothing to write.")
return False
# Ensure directory exists if a directory part is provided
dir_name = os.path.dirname(filepath)
if dir_name:
os.makedirs(dir_name, exist_ok=True)
# Write the actual ticket data
with open(filepath, "w", encoding="utf-8") as f:
f.write("=== JIRA TICKET DATA DEBUG ===\n\n")
for i, ticket in enumerate(tickets):
f.write(f"--- TICKET {i+1} ---\n")
f.write(f"Raw ticket data:\n")
f.write(f"{ticket.data}\n\n")
# Also show the specific fields we're trying to map
f.write("Field mapping analysis:\n")
for field_name, field_path in ManagerSyncDelta.TASK_SYNC_DEFINITION.items():
if field_path[0] == 'get': # Only analyze 'get' fields
try:
# Navigate through the field path
value = ticket.data
for key in field_path[1]:
if isinstance(value, dict) and key in value:
value = value[key]
else:
value = f"KEY_NOT_FOUND: {key}"
break
# Convert ADF fields to text
if field_name in ['Description', 'References', 'DELTA Comments', 'SELISE Comments']:
if isinstance(value, dict) and value.get("type") == "doc":
value = convert_adf_to_text(value)
elif value is None:
value = ""
f.write(f" {field_name}: {value}\n")
except Exception as e:
f.write(f" {field_name}: ERROR - {str(e)}\n")
f.write("\n" + "="*50 + "\n\n")
logger.info(f"Wrote JIRA data for {len(tickets)} tickets to {filepath}")
return True
except Exception as e:
logger.error(f"Failed to dump JIRA data: {str(e)}")
return False
# Global sync function for use in app.py
async def perform_sync_jira_delta_group() -> bool:
"""Perform JIRA to SharePoint synchronization for Delta Group.
This function is called by the scheduler and can be used independently.
Returns:
bool: True if synchronization was successful, False otherwise
"""
try:
if APP_ENV_TYPE != "prod" and APP_ENV_TYPE != "tst":
logger.info("JIRA to SharePoint synchronization: TASK to run only in PROD")
return True
logger.info("Starting Delta Group JIRA sync...")
sync_manager = ManagerSyncDelta()
success = await sync_manager.sync_jira_to_sharepoint()
if success:
logger.info("Delta Group JIRA sync completed successfully")
else:
logger.error("Delta Group JIRA sync failed")
return success
except Exception as e:
logger.error(f"Error in perform_sync_jira_delta_group: {str(e)}")
return False
# Register scheduled job on import using the shared event manager
try:
from modules.shared.eventManagement import eventManager
# Schedule sync every 20 minutes (at minutes 00, 20, 40)
eventManager.register_cron(
job_id="jira_delta_group_sync",
func=perform_sync_jira_delta_group,
cron_kwargs={"minute": "0,20,40"},
replace_existing=True,
coalesce=True,
max_instances=1,
misfire_grace_time=1800,
)
logger.info("Registered jira_delta_group_sync via EventManagement (every 20 minutes)")
except Exception as e:
logger.error(f"Failed to register jira_delta_group_sync: {str(e)}")