gateway/modules/interfaces/interfaceTicketObjects.py
2025-09-23 22:47:54 +02:00

1424 lines
63 KiB
Python

from dataclasses import dataclass
from io import BytesIO, StringIO
from typing import Any, Optional
from datetime import datetime, timezone
import pandas as pd
import openpyxl
from modules.shared.timezoneUtils import get_utc_now
from modules.services.serviceSharepoint.mainSharepoint import SharepointService
from modules.interfaces.interfaceTicketModel import TicketBase, Task
@dataclass(slots=True)
class TicketSharepointSyncInterface:
connector_ticket: TicketBase
connector_sharepoint: SharepointService
task_sync_definition: dict
sync_folder: str
sync_file: str
backup_folder: str
audit_folder: str
site_id: str # Keep for compatibility but not used with REST API
@classmethod
async def create(
cls,
connector_ticket: TicketBase,
connector_sharepoint: SharepointService,
task_sync_definition: dict,
sync_folder: str,
sync_file: str,
backup_folder: str,
audit_folder: str,
site_id: str,
) -> "TicketSharepointSyncInterface":
return cls(
connector_ticket=connector_ticket,
connector_sharepoint=connector_sharepoint,
task_sync_definition=task_sync_definition,
sync_folder=sync_folder,
sync_file=sync_file,
backup_folder=backup_folder,
audit_folder=audit_folder,
site_id=site_id,
)
async def create_backup(self):
"""Creates a backup of the current sync file in the backup folder."""
timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S")
backup_filename = f"backup_{timestamp}_{self.sync_file}"
try:
await self.connector_sharepoint.copy_file_async(
site_id=self.site_id,
source_folder=self.sync_folder,
source_file=self.sync_file,
dest_folder=self.backup_folder,
dest_file=backup_filename,
)
except Exception as e:
# If the source file doesn't exist (404 error), that's okay for first-time sync
if "itemNotFound" in str(e) or "404" in str(e) or "could not be found" in str(e):
raise Exception(f"Source file does not exist - no backup needed: {self.sync_file}")
else:
# Re-raise other errors
raise
async def sync_from_jira_to_csv(self):
"""Syncs tasks from JIRA to a CSV file in SharePoint."""
start_time = get_utc_now()
audit_log = []
audit_log.append("=== JIRA TO CSV SYNC STARTED ===")
audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
audit_log.append(f"Sync File: {self.sync_file}")
audit_log.append(f"Sync Folder: {self.sync_folder}")
audit_log.append("")
try:
# 1. Read JIRA tickets
audit_log.append("Step 1: Reading JIRA tickets...")
tickets = await self.connector_ticket.read_tasks(limit=0)
audit_log.append(f"JIRA issues read: {len(tickets)}")
audit_log.append("")
# 2. Transform tasks according to task_sync_definition
audit_log.append("Step 2: Transforming JIRA data...")
transformed_tasks = self._transform_tasks(tickets, include_put=True)
jira_data = [task.data for task in transformed_tasks]
before_count = len(jira_data)
# Remove records without an ID to avoid blank rows
jira_data = self._filter_empty_records(jira_data)
after_count = len(jira_data)
audit_log.append(f"JIRA issues transformed: {before_count}")
audit_log.append(f"JIRA issues after ID filter: {after_count}")
# Log a sample of IDs to diagnose empty export issues
try:
sample_ids = [str(row.get("ID")) for row in jira_data[:5]]
audit_log.append(f"Sample IDs: {', '.join(sample_ids)}")
except Exception:
pass
audit_log.append("")
# 3. Create JIRA export file in audit folder
audit_log.append("Step 3: Creating JIRA export file...")
try:
timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S")
jira_export_filename = f"jira_export_{timestamp}.csv"
# Use default headers for JIRA export
jira_export_content = self._create_csv_content(jira_data, {"header1": "JIRA Export", "header2": "Raw Data"})
await self.connector_sharepoint.upload_file(
site_id=self.site_id,
folder_path=self.audit_folder,
file_name=jira_export_filename,
content=jira_export_content,
)
audit_log.append(f"JIRA export file created: {jira_export_filename}")
except Exception as e:
audit_log.append(f"Failed to create JIRA export file: {str(e)}")
audit_log.append("")
# 4. Create backup of existing sync file (if it exists)
audit_log.append("Step 4: Creating backup...")
backup_created = False
try:
await self.create_backup()
backup_created = True
audit_log.append("Backup created successfully")
except Exception as e:
audit_log.append(
f"Backup creation failed (file might not exist): {str(e)}"
)
audit_log.append("")
# 5. Try to read existing CSV file from SharePoint
audit_log.append("Step 5: Reading existing CSV file...")
existing_data = []
existing_file_found = False
existing_headers = {"header1": "", "header2": ""}
try:
file_path = f"{self.sync_folder}/{self.sync_file}"
csv_content = await self.connector_sharepoint.download_file_by_path(
site_id=self.site_id, file_path=file_path
)
# Read the first two lines to get headers
csv_lines = csv_content.decode('utf-8').split('\n')
if len(csv_lines) >= 2:
# Store the raw first two lines as headers (preserving original formatting)
existing_headers["header1"] = csv_lines[0].rstrip('\r\n')
existing_headers["header2"] = csv_lines[1].rstrip('\r\n')
# Try to read with robust CSV parsing (skip first 2 rows)
df_existing = pd.read_csv(
BytesIO(csv_content),
skiprows=2,
quoting=1, # QUOTE_ALL
escapechar='\\',
on_bad_lines='skip', # Skip malformed lines
engine='python' # More robust parsing
)
existing_data = df_existing.to_dict("records")
existing_file_found = True
audit_log.append(
f"Existing CSV file found with {len(existing_data)} records"
)
audit_log.append(f"Preserved headers: Header1='{existing_headers['header1']}', Header2='{existing_headers['header2']}'")
except Exception as e:
audit_log.append(f"No existing CSV file found or read error: {str(e)}")
audit_log.append("")
# 6. Merge JIRA data with existing data and track changes
audit_log.append("Step 6: Merging JIRA data with existing data...")
merged_data, change_details = self._merge_jira_with_existing_detailed(
jira_data, existing_data
)
# Log detailed changes
audit_log.append(f"Total records after merge: {len(merged_data)}")
audit_log.append(f"Records updated: {change_details['updated']}")
audit_log.append(f"Records added: {change_details['added']}")
audit_log.append(f"Records unchanged: {change_details['unchanged']}")
audit_log.append("")
# Log individual changes
if change_details["changes"]:
audit_log.append("DETAILED CHANGES:")
for change in change_details["changes"]:
audit_log.append(f"- {change}")
audit_log.append("")
# 7. Create CSV with 4-row structure and write to SharePoint
audit_log.append("Step 7: Writing updated CSV to SharePoint...")
csv_content = self._create_csv_content(merged_data, existing_headers)
await self.connector_sharepoint.upload_file(
site_id=self.site_id,
folder_path=self.sync_folder,
file_name=self.sync_file,
content=csv_content,
)
audit_log.append("CSV file successfully written to SharePoint")
audit_log.append("")
# Success summary
end_time = get_utc_now()
duration = (end_time - start_time).total_seconds()
audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===")
audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
audit_log.append(f"Duration: {duration:.2f} seconds")
audit_log.append(f"Total JIRA issues processed: {len(jira_data)}")
audit_log.append(f"Total records in final CSV: {len(merged_data)}")
except Exception as e:
# Error handling
end_time = get_utc_now()
duration = (end_time - start_time).total_seconds()
audit_log.append("")
audit_log.append("=== SYNC FAILED ===")
audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
audit_log.append(f"Duration before failure: {duration:.2f} seconds")
audit_log.append(f"Error: {str(e)}")
raise
finally:
# Write audit log to SharePoint
await self._write_audit_log(audit_log, "jira_to_csv")
async def sync_from_csv_to_jira(self):
"""Syncs tasks from a CSV file in SharePoint to JIRA."""
start_time = get_utc_now()
audit_log = []
audit_log.append("=== CSV TO JIRA SYNC STARTED ===")
audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
audit_log.append(f"Sync File: {self.sync_file}")
audit_log.append(f"Sync Folder: {self.sync_folder}")
audit_log.append("")
try:
# 1. Read CSV file from SharePoint
audit_log.append("Step 1: Reading CSV file from SharePoint...")
try:
file_path = f"{self.sync_folder}/{self.sync_file}"
csv_content = await self.connector_sharepoint.download_file_by_path(
site_id=self.site_id, file_path=file_path
)
# Try to read with robust CSV parsing
df = pd.read_csv(
BytesIO(csv_content),
skiprows=2,
quoting=1, # QUOTE_ALL
escapechar='\\',
on_bad_lines='skip', # Skip malformed lines
engine='python' # More robust parsing
)
csv_data = df.to_dict("records")
audit_log.append(
f"CSV file read successfully with {len(csv_data)} records"
)
except Exception as e:
audit_log.append(f"Failed to read CSV file: {str(e)}")
audit_log.append("CSV to JIRA sync aborted - no file to process")
return
audit_log.append("")
# 2. Read current JIRA data for comparison
audit_log.append("Step 2: Reading current JIRA data for comparison...")
try:
current_jira_tasks = await self.connector_ticket.read_tasks(limit=0)
current_jira_data = self._transform_tasks(
current_jira_tasks, include_put=True
)
jira_lookup = {
task.data.get("ID"): task.data for task in current_jira_data
}
audit_log.append(f"Current JIRA data read: {len(jira_lookup)} tasks")
except Exception as e:
audit_log.append(f"Failed to read current JIRA data: {str(e)}")
raise
audit_log.append("")
# 3. Detect actual changes in "put" fields
audit_log.append("Step 3: Detecting changes in 'put' fields...")
actual_changes = {}
records_with_changes = 0
total_changes = 0
for row in csv_data:
task_id = row.get("ID")
if not task_id or task_id not in jira_lookup:
continue
current_jira_task = jira_lookup[task_id]
task_changes = {}
for field_name, field_config in self.task_sync_definition.items():
if field_config[0] == "put": # Only process "put" fields
csv_value = row.get(field_name, "")
jira_value = current_jira_task.get(field_name, "")
# Convert None to empty string for comparison
csv_value = "" if csv_value is None else str(csv_value).strip()
jira_value = (
"" if jira_value is None else str(jira_value).strip()
)
# Include if values are different (allow empty strings to clear fields like the reference does)
if csv_value != jira_value:
task_changes[field_name] = csv_value
if task_changes:
actual_changes[task_id] = task_changes
records_with_changes += 1
total_changes += len(task_changes)
audit_log.append(f"Records with actual changes: {records_with_changes}")
audit_log.append(f"Total field changes detected: {total_changes}")
audit_log.append("")
# Log detailed changes
if actual_changes:
audit_log.append("DETAILED CHANGES TO APPLY TO JIRA:")
for task_id, changes in actual_changes.items():
change_list = [
f"{field}: '{value}'" for field, value in changes.items()
]
audit_log.append(f"- Task ID {task_id}: {', '.join(change_list)}")
audit_log.append("")
# 4. Update JIRA tasks with actual changes
if actual_changes:
audit_log.append("Step 4: Updating JIRA tasks...")
# Convert to Task objects for the connector
tasks_to_update = []
for task_id, changes in actual_changes.items():
# Create task data structure expected by JIRA connector
# Build the nested fields structure that JIRA expects
fields = {}
for field_name, new_value in changes.items():
# Map back to JIRA field structure using task_sync_definition
field_config = self.task_sync_definition[field_name]
field_path = field_config[1]
# Extract the JIRA field ID from the path
# For "put" fields, the path is like ['fields', 'customfield_10067']
if len(field_path) >= 2 and field_path[0] == "fields":
jira_field_id = field_path[1]
# Parse date fields back to JIRA format
if self._is_date_field(field_name) and new_value:
parsed_date = self._parse_date_from_excel(str(new_value))
if parsed_date:
fields[jira_field_id] = parsed_date
else:
fields[jira_field_id] = new_value
else:
fields[jira_field_id] = new_value
if fields:
task_data = {"ID": task_id, "fields": fields}
task = Task(data=task_data)
tasks_to_update.append(task)
# Write tasks back to JIRA
try:
await self.connector_ticket.write_tasks(tasks_to_update)
audit_log.append(
f"Successfully updated {len(tasks_to_update)} JIRA tasks"
)
except Exception as e:
audit_log.append(f"Failed to update JIRA tasks: {str(e)}")
raise
else:
audit_log.append("Step 4: No changes to apply to JIRA")
audit_log.append("")
# Success summary
end_time = get_utc_now()
duration = (end_time - start_time).total_seconds()
audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===")
audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
audit_log.append(f"Duration: {duration:.2f} seconds")
audit_log.append(f"Total CSV records processed: {len(csv_data)}")
audit_log.append(f"Records with actual changes: {records_with_changes}")
audit_log.append(f"JIRA tasks updated: {len(actual_changes)}")
except Exception as e:
# Error handling
end_time = get_utc_now()
duration = (end_time - start_time).total_seconds()
audit_log.append("")
audit_log.append("=== SYNC FAILED ===")
audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
audit_log.append(f"Duration before failure: {duration:.2f} seconds")
audit_log.append(f"Error: {str(e)}")
raise
finally:
# Write audit log to SharePoint
await self._write_audit_log(audit_log, "csv_to_jira")
async def sync_from_jira_to_excel(self):
"""Syncs tasks from JIRA to an Excel file in SharePoint."""
start_time = get_utc_now()
audit_log = []
audit_log.append("=== JIRA TO EXCEL SYNC STARTED ===")
audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
audit_log.append(f"Sync File: {self.sync_file}")
audit_log.append(f"Sync Folder: {self.sync_folder}")
audit_log.append("")
try:
# 1. Read JIRA tickets
audit_log.append("Step 1: Reading JIRA tickets...")
tickets = await self.connector_ticket.read_tasks(limit=0)
audit_log.append(f"JIRA issues read: {len(tickets)}")
audit_log.append("")
# 2. Transform tasks according to task_sync_definition
audit_log.append("Step 2: Transforming JIRA data...")
transformed_tasks = self._transform_tasks(tickets, include_put=True)
jira_data = [task.data for task in transformed_tasks]
audit_log.append(f"JIRA issues transformed: {len(jira_data)}")
audit_log.append("")
# 3. Create JIRA export file in audit folder
audit_log.append("Step 3: Creating JIRA export file...")
try:
timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S")
jira_export_filename = f"jira_export_{timestamp}.xlsx"
# Use default headers for JIRA export
jira_export_content = self._create_excel_content(jira_data, {"header1": "JIRA Export", "header2": "Raw Data"})
await self.connector_sharepoint.upload_file(
site_id=self.site_id,
folder_path=self.audit_folder,
file_name=jira_export_filename,
content=jira_export_content,
)
audit_log.append(f"JIRA export file created: {jira_export_filename}")
except Exception as e:
audit_log.append(f"Failed to create JIRA export file: {str(e)}")
audit_log.append("")
# 4. Create backup of existing Excel file (if it exists)
audit_log.append("Step 4: Creating backup...")
backup_created = False
try:
await self.create_backup()
backup_created = True
audit_log.append("Backup created successfully")
except Exception as e:
audit_log.append(
f"Backup creation failed (file might not exist): {str(e)}"
)
audit_log.append("")
# 5. Try to read existing Excel file from SharePoint
audit_log.append("Step 5: Reading existing Excel file...")
existing_data = []
existing_file_found = False
existing_headers = {"header1": "Header 1", "header2": "Header 2"}
try:
file_path = f"{self.sync_folder}/{self.sync_file}"
excel_content = await self.connector_sharepoint.download_file_by_path(
site_id=self.site_id, file_path=file_path
)
# Parse Excel file with 4-row structure
existing_data, existing_headers = self._parse_excel_content(excel_content)
existing_file_found = True
audit_log.append(
f"Existing Excel file found with {len(existing_data)} records"
)
audit_log.append(f"Preserved headers: Header1='{existing_headers['header1']}', Header2='{existing_headers['header2']}'")
except Exception as e:
audit_log.append(f"No existing Excel file found or read error: {str(e)}")
audit_log.append("")
# 6. Merge JIRA data with existing data and track changes
audit_log.append("Step 6: Merging JIRA data with existing data...")
merged_data, change_details = self._merge_jira_with_existing_detailed(
jira_data, existing_data
)
# Log detailed changes
audit_log.append(f"Total records after merge: {len(merged_data)}")
audit_log.append(f"Records updated: {change_details['updated']}")
audit_log.append(f"Records added: {change_details['added']}")
audit_log.append(f"Records unchanged: {change_details['unchanged']}")
audit_log.append("")
# Log individual changes
if change_details["changes"]:
audit_log.append("DETAILED CHANGES:")
for change in change_details["changes"]:
audit_log.append(f"- {change}")
audit_log.append("")
# 7. Create Excel with 4-row structure and write to SharePoint
audit_log.append("Step 7: Writing updated Excel to SharePoint...")
# Ensure no records without ID are written
merged_data = self._filter_empty_records(merged_data)
excel_content = self._create_excel_content(merged_data, existing_headers)
await self.connector_sharepoint.upload_file(
site_id=self.site_id,
folder_path=self.sync_folder,
file_name=self.sync_file,
content=excel_content,
)
audit_log.append("Excel file successfully written to SharePoint")
audit_log.append("")
# Success summary
end_time = get_utc_now()
duration = (end_time - start_time).total_seconds()
audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===")
audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
audit_log.append(f"Duration: {duration:.2f} seconds")
audit_log.append(f"Total JIRA issues processed: {len(jira_data)}")
audit_log.append(f"Total records in final Excel: {len(merged_data)}")
except Exception as e:
# Error handling
end_time = get_utc_now()
duration = (end_time - start_time).total_seconds()
audit_log.append("")
audit_log.append("=== SYNC FAILED ===")
audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
audit_log.append(f"Duration before failure: {duration:.2f} seconds")
audit_log.append(f"Error: {str(e)}")
raise
finally:
# Write audit log to SharePoint
await self._write_audit_log(audit_log, "jira_to_excel")
async def sync_from_excel_to_jira(self):
"""Syncs tasks from an Excel file in SharePoint to JIRA."""
start_time = get_utc_now()
audit_log = []
audit_log.append("=== EXCEL TO JIRA SYNC STARTED ===")
audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
audit_log.append(f"Sync File: {self.sync_file}")
audit_log.append(f"Sync Folder: {self.sync_folder}")
audit_log.append("")
try:
# 1. Read Excel file from SharePoint
audit_log.append("Step 1: Reading Excel file from SharePoint...")
try:
file_path = f"{self.sync_folder}/{self.sync_file}"
excel_content = await self.connector_sharepoint.download_file_by_path(
site_id=self.site_id, file_path=file_path
)
# Parse Excel file with 4-row structure
excel_data, _ = self._parse_excel_content(excel_content)
audit_log.append(
f"Excel file read successfully with {len(excel_data)} records"
)
except Exception as e:
audit_log.append(f"Failed to read Excel file: {str(e)}")
audit_log.append("Excel to JIRA sync aborted - no file to process")
return
audit_log.append("")
# 2. Read current JIRA data for comparison
audit_log.append("Step 2: Reading current JIRA data for comparison...")
try:
current_jira_tasks = await self.connector_ticket.read_tasks(limit=0)
current_jira_data = self._transform_tasks(
current_jira_tasks, include_put=True
)
jira_lookup = {
task.data.get("ID"): task.data for task in current_jira_data
}
audit_log.append(f"Current JIRA data read: {len(jira_lookup)} tasks")
except Exception as e:
audit_log.append(f"Failed to read current JIRA data: {str(e)}")
raise
audit_log.append("")
# 3. Detect actual changes in "put" fields
audit_log.append("Step 3: Detecting changes in 'put' fields...")
actual_changes = {}
records_with_changes = 0
total_changes = 0
for row in excel_data:
task_id = row.get("ID")
if not task_id or task_id not in jira_lookup:
continue
current_jira_task = jira_lookup[task_id]
task_changes = {}
for field_name, field_config in self.task_sync_definition.items():
if field_config[0] == "put": # Only process "put" fields
excel_value = row.get(field_name, "")
jira_value = current_jira_task.get(field_name, "")
# Convert None to empty string for comparison
excel_value = "" if excel_value is None else str(excel_value).strip()
jira_value = (
"" if jira_value is None else str(jira_value).strip()
)
# Include if values are different (allow empty strings to clear fields like the reference does)
if excel_value != jira_value:
task_changes[field_name] = excel_value
if task_changes:
actual_changes[task_id] = task_changes
records_with_changes += 1
total_changes += len(task_changes)
audit_log.append(f"Records with actual changes: {records_with_changes}")
audit_log.append(f"Total field changes detected: {total_changes}")
audit_log.append("")
# Log detailed changes
if actual_changes:
audit_log.append("DETAILED CHANGES TO APPLY TO JIRA:")
for task_id, changes in actual_changes.items():
change_list = [
f"{field}: '{value}'" for field, value in changes.items()
]
audit_log.append(f"- Task ID {task_id}: {', '.join(change_list)}")
audit_log.append("")
# 4. Update JIRA tasks with actual changes
if actual_changes:
audit_log.append("Step 4: Updating JIRA tasks...")
# Convert to Task objects for the connector
tasks_to_update = []
for task_id, changes in actual_changes.items():
# Create task data structure expected by JIRA connector
# Build the nested fields structure that JIRA expects
fields = {}
for field_name, new_value in changes.items():
# Map back to JIRA field structure using task_sync_definition
field_config = self.task_sync_definition[field_name]
field_path = field_config[1]
# Extract the JIRA field ID from the path
# For "put" fields, the path is like ['fields', 'customfield_10067']
if len(field_path) >= 2 and field_path[0] == "fields":
jira_field_id = field_path[1]
# Parse date fields back to JIRA format
if self._is_date_field(field_name) and new_value:
parsed_date = self._parse_date_from_excel(str(new_value))
if parsed_date:
fields[jira_field_id] = parsed_date
else:
fields[jira_field_id] = new_value
else:
fields[jira_field_id] = new_value
if fields:
task_data = {"ID": task_id, "fields": fields}
task = Task(data=task_data)
tasks_to_update.append(task)
# Write tasks back to JIRA
try:
await self.connector_ticket.write_tasks(tasks_to_update)
audit_log.append(
f"Successfully updated {len(tasks_to_update)} JIRA tasks"
)
except Exception as e:
audit_log.append(f"Failed to update JIRA tasks: {str(e)}")
raise
else:
audit_log.append("Step 4: No changes to apply to JIRA")
audit_log.append("")
# Success summary
end_time = get_utc_now()
duration = (end_time - start_time).total_seconds()
audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===")
audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
audit_log.append(f"Duration: {duration:.2f} seconds")
audit_log.append(f"Total Excel records processed: {len(excel_data)}")
audit_log.append(f"Records with actual changes: {records_with_changes}")
audit_log.append(f"JIRA tasks updated: {len(actual_changes)}")
except Exception as e:
# Error handling
end_time = get_utc_now()
duration = (end_time - start_time).total_seconds()
audit_log.append("")
audit_log.append("=== SYNC FAILED ===")
audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
audit_log.append(f"Duration before failure: {duration:.2f} seconds")
audit_log.append(f"Error: {str(e)}")
raise
finally:
# Write audit log to SharePoint
await self._write_audit_log(audit_log, "excel_to_jira")
def _transform_tasks(
self, tasks: list[Task], include_put: bool = False
) -> list[Task]:
"""Transforms tasks according to the task_sync_definition."""
transformed_tasks = []
for task in tasks:
transformed_data = {}
# Process each field in the sync definition
for field_name, field_config in self.task_sync_definition.items():
direction = field_config[0] # "get" or "put"
field_path = field_config[1] # List of keys to navigate
# Get the right fields
if direction == "get" or include_put:
# Extract value using the field path
value = self._extract_field_value(task.data, field_path, field_name)
transformed_data[field_name] = value
# Create new Task with transformed data
transformed_task = Task(data=transformed_data)
transformed_tasks.append(transformed_task)
return transformed_tasks
def _extract_field_value(self, issue_data: dict, field_path: list[str], field_name: str = None) -> Any:
"""Extract field value from JIRA issue data using field path."""
value = issue_data
try:
for key in field_path:
if value is not None:
value = value[key]
if value is None:
return None
# Handle complex objects that have a 'value' field (like custom field options)
if isinstance(value, dict) and "value" in value:
value = value["value"]
# Handle lists of objects with 'value' fields
elif (
isinstance(value, list)
and len(value) > 0
and isinstance(value[0], dict)
and "value" in value[0]
):
value = value[0]["value"]
# Apply ADF conversion for specific fields that contain ADF content
if isinstance(value, dict) and value.get("type") == "doc":
value = self._convert_adf_to_text(value)
# Apply date formatting for date fields
if field_name and self._is_date_field(field_name):
value = self._format_date_for_excel(value)
return value
except (KeyError, TypeError):
return None
def _convert_adf_to_text(self, adf_data):
"""Convert Atlassian Document Format (ADF) to plain text.
Based on Atlassian Document Format specification for JIRA fields.
Handles paragraphs, lists, text formatting, and other ADF node types.
Args:
adf_data: ADF object or None
Returns:
str: Plain text content, or empty string if None/invalid
"""
if not adf_data or not isinstance(adf_data, dict):
return ""
if adf_data.get("type") != "doc":
return str(adf_data) if adf_data else ""
content = adf_data.get("content", [])
if not isinstance(content, list):
return ""
def extract_text_from_content(content_list, list_level=0):
"""Recursively extract text from ADF content with proper formatting."""
text_parts = []
list_counter = 1
for item in content_list:
if not isinstance(item, dict):
continue
item_type = item.get("type", "")
if item_type == "text":
# Extract text content, preserving formatting
text = item.get("text", "")
marks = item.get("marks", [])
# Handle text formatting (bold, italic, etc.)
if marks:
for mark in marks:
if mark.get("type") == "strong":
text = f"**{text}**"
elif mark.get("type") == "em":
text = f"*{text}*"
elif mark.get("type") == "code":
text = f"`{text}`"
elif mark.get("type") == "link":
attrs = mark.get("attrs", {})
href = attrs.get("href", "")
if href:
text = f"[{text}]({href})"
text_parts.append(text)
elif item_type == "hardBreak":
text_parts.append("\n")
elif item_type == "paragraph":
paragraph_content = item.get("content", [])
if paragraph_content:
paragraph_text = extract_text_from_content(paragraph_content, list_level)
if paragraph_text.strip():
text_parts.append(paragraph_text)
elif item_type == "bulletList":
list_content = item.get("content", [])
for list_item in list_content:
if list_item.get("type") == "listItem":
list_item_content = list_item.get("content", [])
for list_paragraph in list_item_content:
if list_paragraph.get("type") == "paragraph":
list_paragraph_content = list_paragraph.get("content", [])
if list_paragraph_content:
indent = " " * list_level
bullet_text = extract_text_from_content(list_paragraph_content, list_level + 1)
if bullet_text.strip():
text_parts.append(f"{indent}{bullet_text}")
elif item_type == "orderedList":
list_content = item.get("content", [])
for list_item in list_content:
if list_item.get("type") == "listItem":
list_item_content = list_item.get("content", [])
for list_paragraph in list_item_content:
if list_paragraph.get("type") == "paragraph":
list_paragraph_content = list_paragraph.get("content", [])
if list_paragraph_content:
indent = " " * list_level
ordered_text = extract_text_from_content(list_paragraph_content, list_level + 1)
if ordered_text.strip():
text_parts.append(f"{indent}{list_counter}. {ordered_text}")
list_counter += 1
elif item_type == "listItem":
# Handle nested list items
list_item_content = item.get("content", [])
if list_item_content:
text_parts.append(extract_text_from_content(list_item_content, list_level))
elif item_type == "embedCard":
# Handle embedded content (videos, etc.)
attrs = item.get("attrs", {})
url = attrs.get("url", "")
if url:
text_parts.append(f"[Embedded Content: {url}]")
elif item_type == "codeBlock":
# Handle code blocks
code_content = item.get("content", [])
if code_content:
code_text = extract_text_from_content(code_content, list_level)
if code_text.strip():
text_parts.append(f"```\n{code_text}\n```")
elif item_type == "blockquote":
# Handle blockquotes
quote_content = item.get("content", [])
if quote_content:
quote_text = extract_text_from_content(quote_content, list_level)
if quote_text.strip():
text_parts.append(f"> {quote_text}")
elif item_type == "heading":
# Handle headings
heading_content = item.get("content", [])
if heading_content:
heading_text = extract_text_from_content(heading_content, list_level)
if heading_text.strip():
level = item.get("attrs", {}).get("level", 1)
text_parts.append(f"{'#' * level} {heading_text}")
elif item_type == "rule":
# Handle horizontal rules
text_parts.append("---")
else:
# Handle unknown types by trying to extract content
if "content" in item:
content_text = extract_text_from_content(item.get("content", []), list_level)
if content_text.strip():
text_parts.append(content_text)
return "\n".join(text_parts)
result = extract_text_from_content(content)
return result.strip()
def _format_date_for_excel(self, date_value: Any) -> Optional[str]:
"""Format date value for Excel export.
Handles various date formats from JIRA and converts them to a consistent format
suitable for Excel display.
Args:
date_value: Date value from JIRA (string, datetime, or None)
Returns:
Formatted date string or None if invalid/empty
"""
if not date_value:
return None
try:
# Handle ISO 8601 strings (JIRA format: 2025-09-16T12:33:10.044+0200)
if isinstance(date_value, str):
# Parse ISO format with timezone
if 'T' in date_value and ('+' in date_value or 'Z' in date_value):
dt = datetime.fromisoformat(date_value.replace('Z', '+00:00'))
# Convert to UTC for consistency
if dt.tzinfo:
dt = dt.astimezone(timezone.utc)
return dt.strftime('%Y-%m-%d %H:%M:%S UTC')
# Handle simple date strings
elif len(date_value) == 10 and date_value.count('-') == 2:
dt = datetime.strptime(date_value, '%Y-%m-%d')
return dt.strftime('%Y-%m-%d')
else:
# Try to parse as datetime
dt = datetime.fromisoformat(date_value)
return dt.strftime('%Y-%m-%d %H:%M:%S')
# Handle datetime objects
elif isinstance(date_value, datetime):
if date_value.tzinfo:
dt = date_value.astimezone(timezone.utc)
else:
dt = date_value
return dt.strftime('%Y-%m-%d %H:%M:%S UTC')
return str(date_value)
except (ValueError, TypeError) as e:
# Log error but don't fail the sync
return str(date_value) if date_value else None
def _parse_date_from_excel(self, date_string: str) -> Optional[str]:
"""Parse date string from Excel and convert to JIRA format.
Converts Excel date strings back to JIRA-compatible ISO format.
Args:
date_string: Date string from Excel
Returns:
ISO formatted date string for JIRA or None if invalid
"""
if not date_string or not isinstance(date_string, str):
return None
try:
# Handle various Excel date formats
date_string = date_string.strip()
# Try common Excel date formats
formats_to_try = [
'%Y-%m-%d %H:%M:%S UTC', # Our export format
'%Y-%m-%d %H:%M:%S', # Standard format
'%Y-%m-%d', # Date only
'%d.%m.%Y', # German format
'%m/%d/%Y', # US format
'%d/%m/%Y', # European format
]
for fmt in formats_to_try:
try:
dt = datetime.strptime(date_string, fmt)
# Convert to UTC and format as ISO
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt.isoformat()
except ValueError:
continue
# If no format matches, try pandas parsing
try:
dt = pd.to_datetime(date_string)
if hasattr(dt, 'to_pydatetime'):
dt = dt.to_pydatetime()
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt.isoformat()
except:
pass
return None
except Exception:
return None
def _is_date_field(self, field_name: str) -> bool:
"""Check if a field is a date field based on its name.
Args:
field_name: Name of the field
Returns:
True if field is likely a date field
"""
date_keywords = ['date', 'time', 'created', 'updated', 'due', 'deadline']
return any(keyword in field_name.lower() for keyword in date_keywords)
def _filter_empty_records(self, records: list[dict]) -> list[dict]:
"""Remove records that are missing an ID.
Purposefully only filter by presence of 'ID' to avoid dropping
valid rows with many empty optional fields.
"""
filtered: list[dict] = []
for row in records:
if isinstance(row, dict) and row.get("ID"):
filtered.append(row)
return filtered
def _merge_jira_with_existing(
self, jira_data: list[dict], existing_data: list[dict]
) -> list[dict]:
"""Merge JIRA data with existing CSV data, updating only 'get' fields."""
# Create a lookup for existing data by ID
existing_lookup = {row.get("ID"): row for row in existing_data if row.get("ID")}
merged_data = []
for jira_row in jira_data:
jira_id = jira_row.get("ID")
if jira_id and jira_id in existing_lookup:
# Update existing row with JIRA data (only 'get' fields)
existing_row = existing_lookup[jira_id].copy()
for field_name, field_config in self.task_sync_definition.items():
if field_config[0] == "get": # Only update 'get' fields
existing_row[field_name] = jira_row.get(field_name)
merged_data.append(existing_row)
# Remove from lookup to track processed items
del existing_lookup[jira_id]
else:
# New row from JIRA
merged_data.append(jira_row)
# Add any remaining existing rows that weren't in JIRA data
merged_data.extend(existing_lookup.values())
return merged_data
def _merge_jira_with_existing_detailed(
self, jira_data: list[dict], existing_data: list[dict]
) -> tuple[list[dict], dict]:
"""Merge JIRA data with existing CSV data and track detailed changes."""
# Create a lookup for existing data by ID
existing_lookup = {row.get("ID"): row for row in existing_data if row.get("ID")}
merged_data = []
changes = []
updated_count = 0
added_count = 0
unchanged_count = 0
for jira_row in jira_data:
jira_id = jira_row.get("ID")
if jira_id and jira_id in existing_lookup:
# Update existing row with JIRA data (only 'get' fields)
existing_row = existing_lookup[jira_id].copy()
row_changes = []
for field_name, field_config in self.task_sync_definition.items():
if field_config[0] == "get": # Only update 'get' fields
old_value = existing_row.get(field_name, "")
new_value = jira_row.get(field_name, "")
# Convert None to empty string for comparison
old_value = "" if old_value is None else str(old_value)
new_value = "" if new_value is None else str(new_value)
if old_value != new_value:
row_changes.append(
f"{field_name}: '{old_value}''{new_value}'"
)
existing_row[field_name] = jira_row.get(field_name)
merged_data.append(existing_row)
if row_changes:
updated_count += 1
changes.append(
f"Row ID {jira_id} updated: {', '.join(row_changes)}"
)
else:
unchanged_count += 1
# Remove from lookup to track processed items
del existing_lookup[jira_id]
else:
# New row from JIRA
merged_data.append(jira_row)
added_count += 1
changes.append(f"Row ID {jira_id} added as new record")
# Add any remaining existing rows that weren't in JIRA data
for remaining_row in existing_lookup.values():
merged_data.append(remaining_row)
unchanged_count += 1
change_details = {
"updated": updated_count,
"added": added_count,
"unchanged": unchanged_count,
"changes": changes,
}
return merged_data, change_details
async def _write_audit_log(self, audit_log: list[str], operation_type: str):
"""Write audit log to SharePoint."""
try:
timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S")
audit_filename = f"audit_{operation_type}_{timestamp}.log"
# Convert audit log to bytes
audit_content = "\n".join(audit_log).encode("utf-8")
# Debug logging
import logging
logger = logging.getLogger(__name__)
logger.debug(f"Writing audit log to folder: {self.audit_folder}, file: {audit_filename}")
# Write to SharePoint
await self.connector_sharepoint.upload_file(
site_id=self.site_id,
folder_path=self.audit_folder,
file_name=audit_filename,
content=audit_content,
)
logger.debug("Audit log written successfully")
except Exception as e:
# If audit logging fails, we don't want to break the main sync process
# Just log the error (this could be enhanced with fallback logging)
import logging
logger = logging.getLogger(__name__)
logger.warning(f"Failed to write audit log: {str(e)}")
logger.warning(f"Audit folder: {self.audit_folder}")
logger.warning(f"Operation type: {operation_type}")
import traceback
logger.warning(f"Traceback: {traceback.format_exc()}")
def _create_csv_content(self, data: list[dict], existing_headers: dict = None) -> bytes:
"""Create CSV content with 4-row structure matching reference code."""
# Get current timestamp for header
timestamp = get_utc_now().strftime("%Y-%m-%d %H:%M:%S UTC")
# Use existing headers if provided, otherwise use defaults
if existing_headers is None:
existing_headers = {"header1": "Header 1", "header2": "Header 2"}
if not data:
# Build an empty table with the expected columns from schema
cols = list(self.task_sync_definition.keys())
df = pd.DataFrame(columns=cols)
# Parse existing headers to extract individual columns
import csv as csv_module
header1_text = existing_headers.get("header1", "Header 1")
header2_text = existing_headers.get("header2", "Header 2")
# Parse the existing header rows
header1_reader = csv_module.reader([header1_text])
header2_reader = csv_module.reader([header2_text])
header1_row = next(header1_reader, [])
header2_row = next(header2_reader, [])
# Row 1: Use existing header1 or default
if len(header1_row) >= len(cols):
header_row1_data = header1_row[:len(cols)]
else:
header_row1_data = header1_row + [""] * (len(cols) - len(header1_row))
header_row1 = pd.DataFrame([header_row1_data], columns=cols)
# Row 2: Use existing header2 and add timestamp to second column
if len(header2_row) >= len(cols):
header_row2_data = header2_row[:len(cols)]
else:
header_row2_data = header2_row + [""] * (len(cols) - len(header2_row))
if len(header_row2_data) > 1:
header_row2_data[1] = timestamp
header_row2 = pd.DataFrame([header_row2_data], columns=cols)
# Row 3: table headers
table_headers = pd.DataFrame([cols], columns=cols)
final_df = pd.concat(
[header_row1, header_row2, table_headers, df], ignore_index=True
)
csv_text = StringIO()
final_df.to_csv(csv_text, index=False, header=False, quoting=1, escapechar='\\')
return csv_text.getvalue().encode("utf-8")
# Create DataFrame from data
df = pd.DataFrame(data)
# Force all columns to be object (string) type to preserve empty cells
for column in df.columns:
df[column] = df[column].astype("object")
df[column] = df[column].fillna("")
# Clean data: replace actual line breaks with \n and escape quotes
for column in df.columns:
df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False)
df[column] = df[column].str.replace('"', '""', regex=False)
# Create the 4-row structure
# Parse existing headers to extract individual columns
import csv as csv_module
header1_text = existing_headers.get("header1", "Header 1")
header2_text = existing_headers.get("header2", "Header 2")
# Parse the existing header rows
header1_reader = csv_module.reader([header1_text])
header2_reader = csv_module.reader([header2_text])
header1_row = next(header1_reader, [])
header2_row = next(header2_reader, [])
# Row 1: Use existing header1 or default
if len(header1_row) >= len(df.columns):
header_row1_data = header1_row[:len(df.columns)]
else:
header_row1_data = header1_row + [""] * (len(df.columns) - len(header1_row))
header_row1 = pd.DataFrame([header_row1_data], columns=df.columns)
# Row 2: Use existing header2 and add timestamp to second column
if len(header2_row) >= len(df.columns):
header_row2_data = header2_row[:len(df.columns)]
else:
header_row2_data = header2_row + [""] * (len(df.columns) - len(header2_row))
if len(header_row2_data) > 1:
header_row2_data[1] = timestamp
header_row2 = pd.DataFrame([header_row2_data], columns=df.columns)
# Row 3: Table headers (column names)
table_headers = pd.DataFrame([df.columns.tolist()], columns=df.columns)
# Concatenate all rows: header1 + header2 + table_headers + data
final_df = pd.concat(
[header_row1, header_row2, table_headers, df], ignore_index=True
)
# Convert to CSV bytes with proper quoting for fields containing special characters
csv_text = StringIO()
final_df.to_csv(csv_text, index=False, header=False, quoting=1, escapechar='\\')
return csv_text.getvalue().encode("utf-8")
def _create_excel_content(self, data: list[dict], existing_headers: dict = None) -> bytes:
"""Create Excel content with 4-row structure matching reference code."""
# Get current timestamp for header
timestamp = get_utc_now().strftime("%Y-%m-%d %H:%M:%S UTC")
# Use existing headers if provided, otherwise use defaults
if existing_headers is None:
existing_headers = {"header1": "Header 1", "header2": "Header 2"}
if not data:
# Build an empty table with the expected columns from schema
cols = list(self.task_sync_definition.keys())
df = pd.DataFrame(columns=cols)
# Parse existing headers to extract individual columns
import csv as csv_module
header1_text = existing_headers.get("header1", "Header 1")
header2_text = existing_headers.get("header2", "Header 2")
# Parse the existing header rows
header1_reader = csv_module.reader([header1_text])
header2_reader = csv_module.reader([header2_text])
header1_row = next(header1_reader, [])
header2_row = next(header2_reader, [])
# Row 1: Use existing header1 or default
if len(header1_row) >= len(cols):
header_row1_data = header1_row[:len(cols)]
else:
header_row1_data = header1_row + [""] * (len(cols) - len(header1_row))
header_row1 = pd.DataFrame([header_row1_data], columns=cols)
# Row 2: Use existing header2 and add timestamp to second column
if len(header2_row) >= len(cols):
header_row2_data = header2_row[:len(cols)]
else:
header_row2_data = header2_row + [""] * (len(cols) - len(header2_row))
if len(header_row2_data) > 1:
header_row2_data[1] = timestamp
header_row2 = pd.DataFrame([header_row2_data], columns=cols)
# Row 3: table headers
table_headers = pd.DataFrame([cols], columns=cols)
final_df = pd.concat(
[header_row1, header_row2, table_headers, df], ignore_index=True
)
# Create Excel file in memory
excel_buffer = BytesIO()
final_df.to_excel(excel_buffer, index=False, header=False, engine='openpyxl')
return excel_buffer.getvalue()
# Create DataFrame from data
df = pd.DataFrame(data)
# Force all columns to be object (string) type to preserve empty cells
for column in df.columns:
df[column] = df[column].astype("object")
df[column] = df[column].fillna("")
# Clean data: replace actual line breaks with \n and escape quotes
for column in df.columns:
df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False)
df[column] = df[column].str.replace('"', '""', regex=False)
# Create the 4-row structure
# Parse existing headers to extract individual columns
import csv as csv_module
header1_text = existing_headers.get("header1", "Header 1")
header2_text = existing_headers.get("header2", "Header 2")
# Parse the existing header rows
header1_reader = csv_module.reader([header1_text])
header2_reader = csv_module.reader([header2_text])
header1_row = next(header1_reader, [])
header2_row = next(header2_reader, [])
# Row 1: Use existing header1 or default
if len(header1_row) >= len(df.columns):
header_row1_data = header1_row[:len(df.columns)]
else:
header_row1_data = header1_row + [""] * (len(df.columns) - len(header1_row))
header_row1 = pd.DataFrame([header_row1_data], columns=df.columns)
# Row 2: Use existing header2 and add timestamp to second column
if len(header2_row) >= len(df.columns):
header_row2_data = header2_row[:len(df.columns)]
else:
header_row2_data = header2_row + [""] * (len(df.columns) - len(header2_row))
if len(header_row2_data) > 1:
header_row2_data[1] = timestamp
header_row2 = pd.DataFrame([header_row2_data], columns=df.columns)
# Row 3: Table headers (column names)
table_headers = pd.DataFrame([df.columns.tolist()], columns=df.columns)
# Concatenate all rows: header1 + header2 + table_headers + data
final_df = pd.concat(
[header_row1, header_row2, table_headers, df], ignore_index=True
)
# Create Excel file in memory
excel_buffer = BytesIO()
final_df.to_excel(excel_buffer, index=False, header=False, engine='openpyxl')
return excel_buffer.getvalue()
def _parse_excel_content(self, excel_content: bytes) -> tuple[list[dict], dict]:
"""Parse Excel content with 4-row structure and return data and headers."""
try:
# Load Excel file from bytes
df = pd.read_excel(
BytesIO(excel_content),
engine='openpyxl',
header=None
)
# Extract the 4 parts:
# Row 1: Static header row 1
header_row1 = df.iloc[0:1].copy()
# Row 2: Static header row 2
header_row2 = df.iloc[1:2].copy()
# Row 3: Table headers
table_headers = df.iloc[2:3].copy()
# Row 4+: Data rows
df_data = df.iloc[3:].copy()
# Set column names from row 3
df_data.columns = table_headers.iloc[0]
# Reset index to start from 0
df_data = df_data.reset_index(drop=True)
# Force all columns to be object (string) type and handle NaN values
for column in df_data.columns:
df_data[column] = df_data[column].astype('object')
# Fill NaN values with empty string to keep cells empty
df_data[column] = df_data[column].fillna('')
# Convert DataFrame to list of dictionaries
data = df_data.to_dict(orient='records')
# Extract headers as strings (like CSV version)
headers = {
"header1": ",".join([str(x) if pd.notna(x) else "" for x in header_row1.iloc[0].tolist()]),
"header2": ",".join([str(x) if pd.notna(x) else "" for x in header_row2.iloc[0].tolist()])
}
return data, headers
except Exception as e:
raise Exception(f"Failed to parse Excel content: {str(e)}")