1112 lines
49 KiB
Python
1112 lines
49 KiB
Python
from dataclasses import dataclass
|
|
from io import BytesIO, StringIO
|
|
from typing import Any
|
|
import pandas as pd
|
|
import openpyxl
|
|
from modules.shared.timezoneUtils import get_utc_now
|
|
|
|
from modules.connectors.connectorSharepoint import ConnectorSharepoint
|
|
|
|
from modules.interfaces.interfaceTicketModel import TicketBase, Task
|
|
|
|
|
|
@dataclass(slots=True)
|
|
class TicketSharepointSyncInterface:
|
|
connector_ticket: TicketBase
|
|
connector_sharepoint: ConnectorSharepoint
|
|
task_sync_definition: dict
|
|
sync_folder: str
|
|
sync_file: str
|
|
backup_folder: str
|
|
audit_folder: str
|
|
site_id: str # Keep for compatibility but not used with REST API
|
|
|
|
@classmethod
|
|
async def create(
|
|
cls,
|
|
connector_ticket: TicketBase,
|
|
connector_sharepoint: ConnectorSharepoint,
|
|
task_sync_definition: dict,
|
|
sync_folder: str,
|
|
sync_file: str,
|
|
backup_folder: str,
|
|
audit_folder: str,
|
|
site_id: str,
|
|
) -> "TicketSharepointSyncInterface":
|
|
return cls(
|
|
connector_ticket=connector_ticket,
|
|
connector_sharepoint=connector_sharepoint,
|
|
task_sync_definition=task_sync_definition,
|
|
sync_folder=sync_folder,
|
|
sync_file=sync_file,
|
|
backup_folder=backup_folder,
|
|
audit_folder=audit_folder,
|
|
site_id=site_id,
|
|
)
|
|
|
|
async def create_backup(self):
|
|
"""Creates a backup of the current sync file in the backup folder."""
|
|
timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S")
|
|
backup_filename = f"backup_{timestamp}_{self.sync_file}"
|
|
|
|
try:
|
|
await self.connector_sharepoint.copy_file_async(
|
|
site_id=self.site_id,
|
|
source_folder=self.sync_folder,
|
|
source_file=self.sync_file,
|
|
dest_folder=self.backup_folder,
|
|
dest_file=backup_filename,
|
|
)
|
|
except Exception as e:
|
|
# If the source file doesn't exist (404 error), that's okay for first-time sync
|
|
if "itemNotFound" in str(e) or "404" in str(e) or "could not be found" in str(e):
|
|
raise Exception(f"Source file does not exist - no backup needed: {self.sync_file}")
|
|
else:
|
|
# Re-raise other errors
|
|
raise
|
|
|
|
async def sync_from_jira_to_csv(self):
|
|
"""Syncs tasks from JIRA to a CSV file in SharePoint."""
|
|
start_time = get_utc_now()
|
|
audit_log = []
|
|
|
|
audit_log.append("=== JIRA TO CSV SYNC STARTED ===")
|
|
audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
audit_log.append(f"Sync File: {self.sync_file}")
|
|
audit_log.append(f"Sync Folder: {self.sync_folder}")
|
|
audit_log.append("")
|
|
|
|
try:
|
|
# 1. Read JIRA tickets
|
|
audit_log.append("Step 1: Reading JIRA tickets...")
|
|
tickets = await self.connector_ticket.read_tasks(limit=0)
|
|
audit_log.append(f"JIRA issues read: {len(tickets)}")
|
|
audit_log.append("")
|
|
|
|
# 2. Transform tasks according to task_sync_definition
|
|
audit_log.append("Step 2: Transforming JIRA data...")
|
|
transformed_tasks = self._transform_tasks(tickets, include_put=True)
|
|
jira_data = [task.data for task in transformed_tasks]
|
|
audit_log.append(f"JIRA issues transformed: {len(jira_data)}")
|
|
audit_log.append("")
|
|
|
|
# 3. Create JIRA export file in audit folder
|
|
audit_log.append("Step 3: Creating JIRA export file...")
|
|
try:
|
|
timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S")
|
|
jira_export_filename = f"jira_export_{timestamp}.csv"
|
|
# Use default headers for JIRA export
|
|
jira_export_content = self._create_csv_content(jira_data, {"header1": "JIRA Export", "header2": "Raw Data"})
|
|
await self.connector_sharepoint.upload_file(
|
|
site_id=self.site_id,
|
|
folder_path=self.audit_folder,
|
|
file_name=jira_export_filename,
|
|
content=jira_export_content,
|
|
)
|
|
audit_log.append(f"JIRA export file created: {jira_export_filename}")
|
|
except Exception as e:
|
|
audit_log.append(f"Failed to create JIRA export file: {str(e)}")
|
|
audit_log.append("")
|
|
|
|
# 4. Create backup of existing sync file (if it exists)
|
|
audit_log.append("Step 4: Creating backup...")
|
|
backup_created = False
|
|
try:
|
|
await self.create_backup()
|
|
backup_created = True
|
|
audit_log.append("Backup created successfully")
|
|
except Exception as e:
|
|
audit_log.append(
|
|
f"Backup creation failed (file might not exist): {str(e)}"
|
|
)
|
|
audit_log.append("")
|
|
|
|
# 5. Try to read existing CSV file from SharePoint
|
|
audit_log.append("Step 5: Reading existing CSV file...")
|
|
existing_data = []
|
|
existing_file_found = False
|
|
existing_headers = {"header1": "", "header2": ""}
|
|
try:
|
|
file_path = f"{self.sync_folder}/{self.sync_file}"
|
|
csv_content = await self.connector_sharepoint.download_file_by_path(
|
|
site_id=self.site_id, file_path=file_path
|
|
)
|
|
|
|
# Read the first two lines to get headers
|
|
csv_lines = csv_content.decode('utf-8').split('\n')
|
|
if len(csv_lines) >= 2:
|
|
# Store the raw first two lines as headers (preserving original formatting)
|
|
existing_headers["header1"] = csv_lines[0].rstrip('\r\n')
|
|
existing_headers["header2"] = csv_lines[1].rstrip('\r\n')
|
|
|
|
# Try to read with robust CSV parsing (skip first 2 rows)
|
|
df_existing = pd.read_csv(
|
|
BytesIO(csv_content),
|
|
skiprows=2,
|
|
quoting=1, # QUOTE_ALL
|
|
escapechar='\\',
|
|
on_bad_lines='skip', # Skip malformed lines
|
|
engine='python' # More robust parsing
|
|
)
|
|
existing_data = df_existing.to_dict("records")
|
|
existing_file_found = True
|
|
audit_log.append(
|
|
f"Existing CSV file found with {len(existing_data)} records"
|
|
)
|
|
audit_log.append(f"Preserved headers: Header1='{existing_headers['header1']}', Header2='{existing_headers['header2']}'")
|
|
except Exception as e:
|
|
audit_log.append(f"No existing CSV file found or read error: {str(e)}")
|
|
audit_log.append("")
|
|
|
|
# 6. Merge JIRA data with existing data and track changes
|
|
audit_log.append("Step 6: Merging JIRA data with existing data...")
|
|
merged_data, change_details = self._merge_jira_with_existing_detailed(
|
|
jira_data, existing_data
|
|
)
|
|
|
|
# Log detailed changes
|
|
audit_log.append(f"Total records after merge: {len(merged_data)}")
|
|
audit_log.append(f"Records updated: {change_details['updated']}")
|
|
audit_log.append(f"Records added: {change_details['added']}")
|
|
audit_log.append(f"Records unchanged: {change_details['unchanged']}")
|
|
audit_log.append("")
|
|
|
|
# Log individual changes
|
|
if change_details["changes"]:
|
|
audit_log.append("DETAILED CHANGES:")
|
|
for change in change_details["changes"]:
|
|
audit_log.append(f"- {change}")
|
|
audit_log.append("")
|
|
|
|
# 7. Create CSV with 4-row structure and write to SharePoint
|
|
audit_log.append("Step 7: Writing updated CSV to SharePoint...")
|
|
csv_content = self._create_csv_content(merged_data, existing_headers)
|
|
await self.connector_sharepoint.upload_file(
|
|
site_id=self.site_id,
|
|
folder_path=self.sync_folder,
|
|
file_name=self.sync_file,
|
|
content=csv_content,
|
|
)
|
|
audit_log.append("CSV file successfully written to SharePoint")
|
|
audit_log.append("")
|
|
|
|
# Success summary
|
|
end_time = get_utc_now()
|
|
duration = (end_time - start_time).total_seconds()
|
|
audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===")
|
|
audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
audit_log.append(f"Duration: {duration:.2f} seconds")
|
|
audit_log.append(f"Total JIRA issues processed: {len(jira_data)}")
|
|
audit_log.append(f"Total records in final CSV: {len(merged_data)}")
|
|
|
|
except Exception as e:
|
|
# Error handling
|
|
end_time = get_utc_now()
|
|
duration = (end_time - start_time).total_seconds()
|
|
audit_log.append("")
|
|
audit_log.append("=== SYNC FAILED ===")
|
|
audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
audit_log.append(f"Duration before failure: {duration:.2f} seconds")
|
|
audit_log.append(f"Error: {str(e)}")
|
|
raise
|
|
finally:
|
|
# Write audit log to SharePoint
|
|
await self._write_audit_log(audit_log, "jira_to_csv")
|
|
|
|
async def sync_from_csv_to_jira(self):
|
|
"""Syncs tasks from a CSV file in SharePoint to JIRA."""
|
|
start_time = get_utc_now()
|
|
audit_log = []
|
|
|
|
audit_log.append("=== CSV TO JIRA SYNC STARTED ===")
|
|
audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
audit_log.append(f"Sync File: {self.sync_file}")
|
|
audit_log.append(f"Sync Folder: {self.sync_folder}")
|
|
audit_log.append("")
|
|
|
|
try:
|
|
# 1. Read CSV file from SharePoint
|
|
audit_log.append("Step 1: Reading CSV file from SharePoint...")
|
|
try:
|
|
file_path = f"{self.sync_folder}/{self.sync_file}"
|
|
csv_content = await self.connector_sharepoint.download_file_by_path(
|
|
site_id=self.site_id, file_path=file_path
|
|
)
|
|
# Try to read with robust CSV parsing
|
|
df = pd.read_csv(
|
|
BytesIO(csv_content),
|
|
skiprows=2,
|
|
quoting=1, # QUOTE_ALL
|
|
escapechar='\\',
|
|
on_bad_lines='skip', # Skip malformed lines
|
|
engine='python' # More robust parsing
|
|
)
|
|
csv_data = df.to_dict("records")
|
|
audit_log.append(
|
|
f"CSV file read successfully with {len(csv_data)} records"
|
|
)
|
|
except Exception as e:
|
|
audit_log.append(f"Failed to read CSV file: {str(e)}")
|
|
audit_log.append("CSV to JIRA sync aborted - no file to process")
|
|
return
|
|
audit_log.append("")
|
|
|
|
# 2. Read current JIRA data for comparison
|
|
audit_log.append("Step 2: Reading current JIRA data for comparison...")
|
|
try:
|
|
current_jira_tasks = await self.connector_ticket.read_tasks(limit=0)
|
|
current_jira_data = self._transform_tasks(
|
|
current_jira_tasks, include_put=True
|
|
)
|
|
jira_lookup = {
|
|
task.data.get("ID"): task.data for task in current_jira_data
|
|
}
|
|
audit_log.append(f"Current JIRA data read: {len(jira_lookup)} tasks")
|
|
except Exception as e:
|
|
audit_log.append(f"Failed to read current JIRA data: {str(e)}")
|
|
raise
|
|
audit_log.append("")
|
|
|
|
# 3. Detect actual changes in "put" fields
|
|
audit_log.append("Step 3: Detecting changes in 'put' fields...")
|
|
actual_changes = {}
|
|
records_with_changes = 0
|
|
total_changes = 0
|
|
|
|
for row in csv_data:
|
|
task_id = row.get("ID")
|
|
if not task_id or task_id not in jira_lookup:
|
|
continue
|
|
|
|
current_jira_task = jira_lookup[task_id]
|
|
task_changes = {}
|
|
|
|
for field_name, field_config in self.task_sync_definition.items():
|
|
if field_config[0] == "put": # Only process "put" fields
|
|
csv_value = row.get(field_name, "")
|
|
jira_value = current_jira_task.get(field_name, "")
|
|
|
|
# Convert None to empty string for comparison
|
|
csv_value = "" if csv_value is None else str(csv_value).strip()
|
|
jira_value = (
|
|
"" if jira_value is None else str(jira_value).strip()
|
|
)
|
|
|
|
# Include if values are different (allow empty strings to clear fields like the reference does)
|
|
if csv_value != jira_value:
|
|
task_changes[field_name] = csv_value
|
|
|
|
if task_changes:
|
|
actual_changes[task_id] = task_changes
|
|
records_with_changes += 1
|
|
total_changes += len(task_changes)
|
|
|
|
audit_log.append(f"Records with actual changes: {records_with_changes}")
|
|
audit_log.append(f"Total field changes detected: {total_changes}")
|
|
audit_log.append("")
|
|
|
|
# Log detailed changes
|
|
if actual_changes:
|
|
audit_log.append("DETAILED CHANGES TO APPLY TO JIRA:")
|
|
for task_id, changes in actual_changes.items():
|
|
change_list = [
|
|
f"{field}: '{value}'" for field, value in changes.items()
|
|
]
|
|
audit_log.append(f"- Task ID {task_id}: {', '.join(change_list)}")
|
|
audit_log.append("")
|
|
|
|
# 4. Update JIRA tasks with actual changes
|
|
if actual_changes:
|
|
audit_log.append("Step 4: Updating JIRA tasks...")
|
|
|
|
# Convert to Task objects for the connector
|
|
tasks_to_update = []
|
|
for task_id, changes in actual_changes.items():
|
|
# Create task data structure expected by JIRA connector
|
|
# Build the nested fields structure that JIRA expects
|
|
fields = {}
|
|
for field_name, new_value in changes.items():
|
|
# Map back to JIRA field structure using task_sync_definition
|
|
field_config = self.task_sync_definition[field_name]
|
|
field_path = field_config[1]
|
|
|
|
# Extract the JIRA field ID from the path
|
|
# For "put" fields, the path is like ['fields', 'customfield_10067']
|
|
if len(field_path) >= 2 and field_path[0] == "fields":
|
|
jira_field_id = field_path[1]
|
|
fields[jira_field_id] = new_value
|
|
|
|
if fields:
|
|
task_data = {"ID": task_id, "fields": fields}
|
|
task = Task(data=task_data)
|
|
tasks_to_update.append(task)
|
|
|
|
# Write tasks back to JIRA
|
|
try:
|
|
await self.connector_ticket.write_tasks(tasks_to_update)
|
|
audit_log.append(
|
|
f"Successfully updated {len(tasks_to_update)} JIRA tasks"
|
|
)
|
|
except Exception as e:
|
|
audit_log.append(f"Failed to update JIRA tasks: {str(e)}")
|
|
raise
|
|
else:
|
|
audit_log.append("Step 4: No changes to apply to JIRA")
|
|
audit_log.append("")
|
|
|
|
# Success summary
|
|
end_time = get_utc_now()
|
|
duration = (end_time - start_time).total_seconds()
|
|
audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===")
|
|
audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
audit_log.append(f"Duration: {duration:.2f} seconds")
|
|
audit_log.append(f"Total CSV records processed: {len(csv_data)}")
|
|
audit_log.append(f"Records with actual changes: {records_with_changes}")
|
|
audit_log.append(f"JIRA tasks updated: {len(actual_changes)}")
|
|
|
|
except Exception as e:
|
|
# Error handling
|
|
end_time = get_utc_now()
|
|
duration = (end_time - start_time).total_seconds()
|
|
audit_log.append("")
|
|
audit_log.append("=== SYNC FAILED ===")
|
|
audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
audit_log.append(f"Duration before failure: {duration:.2f} seconds")
|
|
audit_log.append(f"Error: {str(e)}")
|
|
raise
|
|
finally:
|
|
# Write audit log to SharePoint
|
|
await self._write_audit_log(audit_log, "csv_to_jira")
|
|
|
|
async def sync_from_jira_to_excel(self):
|
|
"""Syncs tasks from JIRA to an Excel file in SharePoint."""
|
|
start_time = get_utc_now()
|
|
audit_log = []
|
|
|
|
audit_log.append("=== JIRA TO EXCEL SYNC STARTED ===")
|
|
audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
audit_log.append(f"Sync File: {self.sync_file}")
|
|
audit_log.append(f"Sync Folder: {self.sync_folder}")
|
|
audit_log.append("")
|
|
|
|
try:
|
|
# 1. Read JIRA tickets
|
|
audit_log.append("Step 1: Reading JIRA tickets...")
|
|
tickets = await self.connector_ticket.read_tasks(limit=0)
|
|
audit_log.append(f"JIRA issues read: {len(tickets)}")
|
|
audit_log.append("")
|
|
|
|
# 2. Transform tasks according to task_sync_definition
|
|
audit_log.append("Step 2: Transforming JIRA data...")
|
|
transformed_tasks = self._transform_tasks(tickets, include_put=True)
|
|
jira_data = [task.data for task in transformed_tasks]
|
|
audit_log.append(f"JIRA issues transformed: {len(jira_data)}")
|
|
audit_log.append("")
|
|
|
|
# 3. Create JIRA export file in audit folder
|
|
audit_log.append("Step 3: Creating JIRA export file...")
|
|
try:
|
|
timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S")
|
|
jira_export_filename = f"jira_export_{timestamp}.xlsx"
|
|
# Use default headers for JIRA export
|
|
jira_export_content = self._create_excel_content(jira_data, {"header1": "JIRA Export", "header2": "Raw Data"})
|
|
await self.connector_sharepoint.upload_file(
|
|
site_id=self.site_id,
|
|
folder_path=self.audit_folder,
|
|
file_name=jira_export_filename,
|
|
content=jira_export_content,
|
|
)
|
|
audit_log.append(f"JIRA export file created: {jira_export_filename}")
|
|
except Exception as e:
|
|
audit_log.append(f"Failed to create JIRA export file: {str(e)}")
|
|
audit_log.append("")
|
|
|
|
# 4. Create backup of existing Excel file (if it exists)
|
|
audit_log.append("Step 4: Creating backup...")
|
|
backup_created = False
|
|
try:
|
|
await self.create_backup()
|
|
backup_created = True
|
|
audit_log.append("Backup created successfully")
|
|
except Exception as e:
|
|
audit_log.append(
|
|
f"Backup creation failed (file might not exist): {str(e)}"
|
|
)
|
|
audit_log.append("")
|
|
|
|
# 5. Try to read existing Excel file from SharePoint
|
|
audit_log.append("Step 5: Reading existing Excel file...")
|
|
existing_data = []
|
|
existing_file_found = False
|
|
existing_headers = {"header1": "Header 1", "header2": "Header 2"}
|
|
try:
|
|
file_path = f"{self.sync_folder}/{self.sync_file}"
|
|
excel_content = await self.connector_sharepoint.download_file_by_path(
|
|
site_id=self.site_id, file_path=file_path
|
|
)
|
|
|
|
# Parse Excel file with 4-row structure
|
|
existing_data, existing_headers = self._parse_excel_content(excel_content)
|
|
existing_file_found = True
|
|
audit_log.append(
|
|
f"Existing Excel file found with {len(existing_data)} records"
|
|
)
|
|
audit_log.append(f"Preserved headers: Header1='{existing_headers['header1']}', Header2='{existing_headers['header2']}'")
|
|
except Exception as e:
|
|
audit_log.append(f"No existing Excel file found or read error: {str(e)}")
|
|
audit_log.append("")
|
|
|
|
# 6. Merge JIRA data with existing data and track changes
|
|
audit_log.append("Step 6: Merging JIRA data with existing data...")
|
|
merged_data, change_details = self._merge_jira_with_existing_detailed(
|
|
jira_data, existing_data
|
|
)
|
|
|
|
# Log detailed changes
|
|
audit_log.append(f"Total records after merge: {len(merged_data)}")
|
|
audit_log.append(f"Records updated: {change_details['updated']}")
|
|
audit_log.append(f"Records added: {change_details['added']}")
|
|
audit_log.append(f"Records unchanged: {change_details['unchanged']}")
|
|
audit_log.append("")
|
|
|
|
# Log individual changes
|
|
if change_details["changes"]:
|
|
audit_log.append("DETAILED CHANGES:")
|
|
for change in change_details["changes"]:
|
|
audit_log.append(f"- {change}")
|
|
audit_log.append("")
|
|
|
|
# 7. Create Excel with 4-row structure and write to SharePoint
|
|
audit_log.append("Step 7: Writing updated Excel to SharePoint...")
|
|
excel_content = self._create_excel_content(merged_data, existing_headers)
|
|
await self.connector_sharepoint.upload_file(
|
|
site_id=self.site_id,
|
|
folder_path=self.sync_folder,
|
|
file_name=self.sync_file,
|
|
content=excel_content,
|
|
)
|
|
audit_log.append("Excel file successfully written to SharePoint")
|
|
audit_log.append("")
|
|
|
|
# Success summary
|
|
end_time = get_utc_now()
|
|
duration = (end_time - start_time).total_seconds()
|
|
audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===")
|
|
audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
audit_log.append(f"Duration: {duration:.2f} seconds")
|
|
audit_log.append(f"Total JIRA issues processed: {len(jira_data)}")
|
|
audit_log.append(f"Total records in final Excel: {len(merged_data)}")
|
|
|
|
except Exception as e:
|
|
# Error handling
|
|
end_time = get_utc_now()
|
|
duration = (end_time - start_time).total_seconds()
|
|
audit_log.append("")
|
|
audit_log.append("=== SYNC FAILED ===")
|
|
audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
audit_log.append(f"Duration before failure: {duration:.2f} seconds")
|
|
audit_log.append(f"Error: {str(e)}")
|
|
raise
|
|
finally:
|
|
# Write audit log to SharePoint
|
|
await self._write_audit_log(audit_log, "jira_to_excel")
|
|
|
|
async def sync_from_excel_to_jira(self):
|
|
"""Syncs tasks from an Excel file in SharePoint to JIRA."""
|
|
start_time = get_utc_now()
|
|
audit_log = []
|
|
|
|
audit_log.append("=== EXCEL TO JIRA SYNC STARTED ===")
|
|
audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
audit_log.append(f"Sync File: {self.sync_file}")
|
|
audit_log.append(f"Sync Folder: {self.sync_folder}")
|
|
audit_log.append("")
|
|
|
|
try:
|
|
# 1. Read Excel file from SharePoint
|
|
audit_log.append("Step 1: Reading Excel file from SharePoint...")
|
|
try:
|
|
file_path = f"{self.sync_folder}/{self.sync_file}"
|
|
excel_content = await self.connector_sharepoint.download_file_by_path(
|
|
site_id=self.site_id, file_path=file_path
|
|
)
|
|
# Parse Excel file with 4-row structure
|
|
excel_data, _ = self._parse_excel_content(excel_content)
|
|
audit_log.append(
|
|
f"Excel file read successfully with {len(excel_data)} records"
|
|
)
|
|
except Exception as e:
|
|
audit_log.append(f"Failed to read Excel file: {str(e)}")
|
|
audit_log.append("Excel to JIRA sync aborted - no file to process")
|
|
return
|
|
audit_log.append("")
|
|
|
|
# 2. Read current JIRA data for comparison
|
|
audit_log.append("Step 2: Reading current JIRA data for comparison...")
|
|
try:
|
|
current_jira_tasks = await self.connector_ticket.read_tasks(limit=0)
|
|
current_jira_data = self._transform_tasks(
|
|
current_jira_tasks, include_put=True
|
|
)
|
|
jira_lookup = {
|
|
task.data.get("ID"): task.data for task in current_jira_data
|
|
}
|
|
audit_log.append(f"Current JIRA data read: {len(jira_lookup)} tasks")
|
|
except Exception as e:
|
|
audit_log.append(f"Failed to read current JIRA data: {str(e)}")
|
|
raise
|
|
audit_log.append("")
|
|
|
|
# 3. Detect actual changes in "put" fields
|
|
audit_log.append("Step 3: Detecting changes in 'put' fields...")
|
|
actual_changes = {}
|
|
records_with_changes = 0
|
|
total_changes = 0
|
|
|
|
for row in excel_data:
|
|
task_id = row.get("ID")
|
|
if not task_id or task_id not in jira_lookup:
|
|
continue
|
|
|
|
current_jira_task = jira_lookup[task_id]
|
|
task_changes = {}
|
|
|
|
for field_name, field_config in self.task_sync_definition.items():
|
|
if field_config[0] == "put": # Only process "put" fields
|
|
excel_value = row.get(field_name, "")
|
|
jira_value = current_jira_task.get(field_name, "")
|
|
|
|
# Convert None to empty string for comparison
|
|
excel_value = "" if excel_value is None else str(excel_value).strip()
|
|
jira_value = (
|
|
"" if jira_value is None else str(jira_value).strip()
|
|
)
|
|
|
|
# Include if values are different (allow empty strings to clear fields like the reference does)
|
|
if excel_value != jira_value:
|
|
task_changes[field_name] = excel_value
|
|
|
|
if task_changes:
|
|
actual_changes[task_id] = task_changes
|
|
records_with_changes += 1
|
|
total_changes += len(task_changes)
|
|
|
|
audit_log.append(f"Records with actual changes: {records_with_changes}")
|
|
audit_log.append(f"Total field changes detected: {total_changes}")
|
|
audit_log.append("")
|
|
|
|
# Log detailed changes
|
|
if actual_changes:
|
|
audit_log.append("DETAILED CHANGES TO APPLY TO JIRA:")
|
|
for task_id, changes in actual_changes.items():
|
|
change_list = [
|
|
f"{field}: '{value}'" for field, value in changes.items()
|
|
]
|
|
audit_log.append(f"- Task ID {task_id}: {', '.join(change_list)}")
|
|
audit_log.append("")
|
|
|
|
# 4. Update JIRA tasks with actual changes
|
|
if actual_changes:
|
|
audit_log.append("Step 4: Updating JIRA tasks...")
|
|
|
|
# Convert to Task objects for the connector
|
|
tasks_to_update = []
|
|
for task_id, changes in actual_changes.items():
|
|
# Create task data structure expected by JIRA connector
|
|
# Build the nested fields structure that JIRA expects
|
|
fields = {}
|
|
for field_name, new_value in changes.items():
|
|
# Map back to JIRA field structure using task_sync_definition
|
|
field_config = self.task_sync_definition[field_name]
|
|
field_path = field_config[1]
|
|
|
|
# Extract the JIRA field ID from the path
|
|
# For "put" fields, the path is like ['fields', 'customfield_10067']
|
|
if len(field_path) >= 2 and field_path[0] == "fields":
|
|
jira_field_id = field_path[1]
|
|
fields[jira_field_id] = new_value
|
|
|
|
if fields:
|
|
task_data = {"ID": task_id, "fields": fields}
|
|
task = Task(data=task_data)
|
|
tasks_to_update.append(task)
|
|
|
|
# Write tasks back to JIRA
|
|
try:
|
|
await self.connector_ticket.write_tasks(tasks_to_update)
|
|
audit_log.append(
|
|
f"Successfully updated {len(tasks_to_update)} JIRA tasks"
|
|
)
|
|
except Exception as e:
|
|
audit_log.append(f"Failed to update JIRA tasks: {str(e)}")
|
|
raise
|
|
else:
|
|
audit_log.append("Step 4: No changes to apply to JIRA")
|
|
audit_log.append("")
|
|
|
|
# Success summary
|
|
end_time = get_utc_now()
|
|
duration = (end_time - start_time).total_seconds()
|
|
audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===")
|
|
audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
audit_log.append(f"Duration: {duration:.2f} seconds")
|
|
audit_log.append(f"Total Excel records processed: {len(excel_data)}")
|
|
audit_log.append(f"Records with actual changes: {records_with_changes}")
|
|
audit_log.append(f"JIRA tasks updated: {len(actual_changes)}")
|
|
|
|
except Exception as e:
|
|
# Error handling
|
|
end_time = get_utc_now()
|
|
duration = (end_time - start_time).total_seconds()
|
|
audit_log.append("")
|
|
audit_log.append("=== SYNC FAILED ===")
|
|
audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
audit_log.append(f"Duration before failure: {duration:.2f} seconds")
|
|
audit_log.append(f"Error: {str(e)}")
|
|
raise
|
|
finally:
|
|
# Write audit log to SharePoint
|
|
await self._write_audit_log(audit_log, "excel_to_jira")
|
|
|
|
def _transform_tasks(
|
|
self, tasks: list[Task], include_put: bool = False
|
|
) -> list[Task]:
|
|
"""Transforms tasks according to the task_sync_definition."""
|
|
transformed_tasks = []
|
|
|
|
for task in tasks:
|
|
transformed_data = {}
|
|
|
|
# Process each field in the sync definition
|
|
for field_name, field_config in self.task_sync_definition.items():
|
|
direction = field_config[0] # "get" or "put"
|
|
field_path = field_config[1] # List of keys to navigate
|
|
|
|
# Get the right fields
|
|
if direction == "get" or include_put:
|
|
# Extract value using the field path
|
|
value = self._extract_field_value(task.data, field_path)
|
|
transformed_data[field_name] = value
|
|
|
|
# Create new Task with transformed data
|
|
transformed_task = Task(data=transformed_data)
|
|
transformed_tasks.append(transformed_task)
|
|
|
|
return transformed_tasks
|
|
|
|
def _extract_field_value(self, issue_data: dict, field_path: list[str]) -> Any:
|
|
"""Extract field value from JIRA issue data using field path."""
|
|
value = issue_data
|
|
try:
|
|
for key in field_path:
|
|
if value is not None:
|
|
value = value[key]
|
|
|
|
if value is None:
|
|
return None
|
|
|
|
# Handle complex objects that have a 'value' field (like custom field options)
|
|
if isinstance(value, dict) and "value" in value:
|
|
value = value["value"]
|
|
# Handle lists of objects with 'value' fields
|
|
elif (
|
|
isinstance(value, list)
|
|
and len(value) > 0
|
|
and isinstance(value[0], dict)
|
|
and "value" in value[0]
|
|
):
|
|
value = value[0]["value"]
|
|
|
|
return value
|
|
except (KeyError, TypeError):
|
|
return None
|
|
|
|
def _merge_jira_with_existing(
|
|
self, jira_data: list[dict], existing_data: list[dict]
|
|
) -> list[dict]:
|
|
"""Merge JIRA data with existing CSV data, updating only 'get' fields."""
|
|
# Create a lookup for existing data by ID
|
|
existing_lookup = {row.get("ID"): row for row in existing_data if row.get("ID")}
|
|
|
|
merged_data = []
|
|
for jira_row in jira_data:
|
|
jira_id = jira_row.get("ID")
|
|
if jira_id and jira_id in existing_lookup:
|
|
# Update existing row with JIRA data (only 'get' fields)
|
|
existing_row = existing_lookup[jira_id].copy()
|
|
for field_name, field_config in self.task_sync_definition.items():
|
|
if field_config[0] == "get": # Only update 'get' fields
|
|
existing_row[field_name] = jira_row.get(field_name)
|
|
merged_data.append(existing_row)
|
|
# Remove from lookup to track processed items
|
|
del existing_lookup[jira_id]
|
|
else:
|
|
# New row from JIRA
|
|
merged_data.append(jira_row)
|
|
|
|
# Add any remaining existing rows that weren't in JIRA data
|
|
merged_data.extend(existing_lookup.values())
|
|
|
|
return merged_data
|
|
|
|
def _merge_jira_with_existing_detailed(
|
|
self, jira_data: list[dict], existing_data: list[dict]
|
|
) -> tuple[list[dict], dict]:
|
|
"""Merge JIRA data with existing CSV data and track detailed changes."""
|
|
# Create a lookup for existing data by ID
|
|
existing_lookup = {row.get("ID"): row for row in existing_data if row.get("ID")}
|
|
|
|
merged_data = []
|
|
changes = []
|
|
updated_count = 0
|
|
added_count = 0
|
|
unchanged_count = 0
|
|
|
|
for jira_row in jira_data:
|
|
jira_id = jira_row.get("ID")
|
|
if jira_id and jira_id in existing_lookup:
|
|
# Update existing row with JIRA data (only 'get' fields)
|
|
existing_row = existing_lookup[jira_id].copy()
|
|
row_changes = []
|
|
|
|
for field_name, field_config in self.task_sync_definition.items():
|
|
if field_config[0] == "get": # Only update 'get' fields
|
|
old_value = existing_row.get(field_name, "")
|
|
new_value = jira_row.get(field_name, "")
|
|
|
|
# Convert None to empty string for comparison
|
|
old_value = "" if old_value is None else str(old_value)
|
|
new_value = "" if new_value is None else str(new_value)
|
|
|
|
if old_value != new_value:
|
|
row_changes.append(
|
|
f"{field_name}: '{old_value}' → '{new_value}'"
|
|
)
|
|
|
|
existing_row[field_name] = jira_row.get(field_name)
|
|
|
|
merged_data.append(existing_row)
|
|
|
|
if row_changes:
|
|
updated_count += 1
|
|
changes.append(
|
|
f"Row ID {jira_id} updated: {', '.join(row_changes)}"
|
|
)
|
|
else:
|
|
unchanged_count += 1
|
|
|
|
# Remove from lookup to track processed items
|
|
del existing_lookup[jira_id]
|
|
else:
|
|
# New row from JIRA
|
|
merged_data.append(jira_row)
|
|
added_count += 1
|
|
changes.append(f"Row ID {jira_id} added as new record")
|
|
|
|
# Add any remaining existing rows that weren't in JIRA data
|
|
for remaining_row in existing_lookup.values():
|
|
merged_data.append(remaining_row)
|
|
unchanged_count += 1
|
|
|
|
change_details = {
|
|
"updated": updated_count,
|
|
"added": added_count,
|
|
"unchanged": unchanged_count,
|
|
"changes": changes,
|
|
}
|
|
|
|
return merged_data, change_details
|
|
|
|
async def _write_audit_log(self, audit_log: list[str], operation_type: str):
|
|
"""Write audit log to SharePoint."""
|
|
try:
|
|
timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S")
|
|
audit_filename = f"audit_{operation_type}_{timestamp}.log"
|
|
|
|
# Convert audit log to bytes
|
|
audit_content = "\n".join(audit_log).encode("utf-8")
|
|
|
|
# Debug logging
|
|
import logging
|
|
logger = logging.getLogger(__name__)
|
|
logger.debug(f"Writing audit log to folder: {self.audit_folder}, file: {audit_filename}")
|
|
|
|
# Write to SharePoint
|
|
await self.connector_sharepoint.upload_file(
|
|
site_id=self.site_id,
|
|
folder_path=self.audit_folder,
|
|
file_name=audit_filename,
|
|
content=audit_content,
|
|
)
|
|
logger.debug("Audit log written successfully")
|
|
except Exception as e:
|
|
# If audit logging fails, we don't want to break the main sync process
|
|
# Just log the error (this could be enhanced with fallback logging)
|
|
import logging
|
|
logger = logging.getLogger(__name__)
|
|
logger.warning(f"Failed to write audit log: {str(e)}")
|
|
logger.warning(f"Audit folder: {self.audit_folder}")
|
|
logger.warning(f"Operation type: {operation_type}")
|
|
import traceback
|
|
logger.warning(f"Traceback: {traceback.format_exc()}")
|
|
|
|
def _create_csv_content(self, data: list[dict], existing_headers: dict = None) -> bytes:
|
|
"""Create CSV content with 4-row structure matching reference code."""
|
|
# Get current timestamp for header
|
|
timestamp = get_utc_now().strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
|
|
# Use existing headers if provided, otherwise use defaults
|
|
if existing_headers is None:
|
|
existing_headers = {"header1": "Header 1", "header2": "Header 2"}
|
|
|
|
if not data:
|
|
# Build an empty table with the expected columns from schema
|
|
cols = list(self.task_sync_definition.keys())
|
|
|
|
df = pd.DataFrame(columns=cols)
|
|
|
|
# Parse existing headers to extract individual columns
|
|
import csv as csv_module
|
|
header1_text = existing_headers.get("header1", "Header 1")
|
|
header2_text = existing_headers.get("header2", "Header 2")
|
|
|
|
# Parse the existing header rows
|
|
header1_reader = csv_module.reader([header1_text])
|
|
header2_reader = csv_module.reader([header2_text])
|
|
header1_row = next(header1_reader, [])
|
|
header2_row = next(header2_reader, [])
|
|
|
|
# Row 1: Use existing header1 or default
|
|
if len(header1_row) >= len(cols):
|
|
header_row1_data = header1_row[:len(cols)]
|
|
else:
|
|
header_row1_data = header1_row + [""] * (len(cols) - len(header1_row))
|
|
header_row1 = pd.DataFrame([header_row1_data], columns=cols)
|
|
|
|
# Row 2: Use existing header2 and add timestamp to second column
|
|
if len(header2_row) >= len(cols):
|
|
header_row2_data = header2_row[:len(cols)]
|
|
else:
|
|
header_row2_data = header2_row + [""] * (len(cols) - len(header2_row))
|
|
if len(header_row2_data) > 1:
|
|
header_row2_data[1] = timestamp
|
|
header_row2 = pd.DataFrame([header_row2_data], columns=cols)
|
|
|
|
# Row 3: table headers
|
|
table_headers = pd.DataFrame([cols], columns=cols)
|
|
|
|
final_df = pd.concat(
|
|
[header_row1, header_row2, table_headers, df], ignore_index=True
|
|
)
|
|
csv_text = StringIO()
|
|
final_df.to_csv(csv_text, index=False, header=False, quoting=1, escapechar='\\')
|
|
return csv_text.getvalue().encode("utf-8")
|
|
|
|
# Create DataFrame from data
|
|
df = pd.DataFrame(data)
|
|
|
|
# Force all columns to be object (string) type to preserve empty cells
|
|
for column in df.columns:
|
|
df[column] = df[column].astype("object")
|
|
df[column] = df[column].fillna("")
|
|
|
|
# Clean data: replace actual line breaks with \n and escape quotes
|
|
for column in df.columns:
|
|
df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False)
|
|
df[column] = df[column].str.replace('"', '""', regex=False)
|
|
|
|
# Create the 4-row structure
|
|
# Parse existing headers to extract individual columns
|
|
import csv as csv_module
|
|
header1_text = existing_headers.get("header1", "Header 1")
|
|
header2_text = existing_headers.get("header2", "Header 2")
|
|
|
|
# Parse the existing header rows
|
|
header1_reader = csv_module.reader([header1_text])
|
|
header2_reader = csv_module.reader([header2_text])
|
|
header1_row = next(header1_reader, [])
|
|
header2_row = next(header2_reader, [])
|
|
|
|
# Row 1: Use existing header1 or default
|
|
if len(header1_row) >= len(df.columns):
|
|
header_row1_data = header1_row[:len(df.columns)]
|
|
else:
|
|
header_row1_data = header1_row + [""] * (len(df.columns) - len(header1_row))
|
|
header_row1 = pd.DataFrame([header_row1_data], columns=df.columns)
|
|
|
|
# Row 2: Use existing header2 and add timestamp to second column
|
|
if len(header2_row) >= len(df.columns):
|
|
header_row2_data = header2_row[:len(df.columns)]
|
|
else:
|
|
header_row2_data = header2_row + [""] * (len(df.columns) - len(header2_row))
|
|
if len(header_row2_data) > 1:
|
|
header_row2_data[1] = timestamp
|
|
header_row2 = pd.DataFrame([header_row2_data], columns=df.columns)
|
|
|
|
# Row 3: Table headers (column names)
|
|
table_headers = pd.DataFrame([df.columns.tolist()], columns=df.columns)
|
|
|
|
# Concatenate all rows: header1 + header2 + table_headers + data
|
|
final_df = pd.concat(
|
|
[header_row1, header_row2, table_headers, df], ignore_index=True
|
|
)
|
|
|
|
# Convert to CSV bytes with proper quoting for fields containing special characters
|
|
csv_text = StringIO()
|
|
final_df.to_csv(csv_text, index=False, header=False, quoting=1, escapechar='\\')
|
|
return csv_text.getvalue().encode("utf-8")
|
|
|
|
def _create_excel_content(self, data: list[dict], existing_headers: dict = None) -> bytes:
|
|
"""Create Excel content with 4-row structure matching reference code."""
|
|
# Get current timestamp for header
|
|
timestamp = get_utc_now().strftime("%Y-%m-%d %H:%M:%S UTC")
|
|
|
|
# Use existing headers if provided, otherwise use defaults
|
|
if existing_headers is None:
|
|
existing_headers = {"header1": "Header 1", "header2": "Header 2"}
|
|
|
|
if not data:
|
|
# Build an empty table with the expected columns from schema
|
|
cols = list(self.task_sync_definition.keys())
|
|
|
|
df = pd.DataFrame(columns=cols)
|
|
|
|
# Parse existing headers to extract individual columns
|
|
import csv as csv_module
|
|
header1_text = existing_headers.get("header1", "Header 1")
|
|
header2_text = existing_headers.get("header2", "Header 2")
|
|
|
|
# Parse the existing header rows
|
|
header1_reader = csv_module.reader([header1_text])
|
|
header2_reader = csv_module.reader([header2_text])
|
|
header1_row = next(header1_reader, [])
|
|
header2_row = next(header2_reader, [])
|
|
|
|
# Row 1: Use existing header1 or default
|
|
if len(header1_row) >= len(cols):
|
|
header_row1_data = header1_row[:len(cols)]
|
|
else:
|
|
header_row1_data = header1_row + [""] * (len(cols) - len(header1_row))
|
|
header_row1 = pd.DataFrame([header_row1_data], columns=cols)
|
|
|
|
# Row 2: Use existing header2 and add timestamp to second column
|
|
if len(header2_row) >= len(cols):
|
|
header_row2_data = header2_row[:len(cols)]
|
|
else:
|
|
header_row2_data = header2_row + [""] * (len(cols) - len(header2_row))
|
|
if len(header_row2_data) > 1:
|
|
header_row2_data[1] = timestamp
|
|
header_row2 = pd.DataFrame([header_row2_data], columns=cols)
|
|
|
|
# Row 3: table headers
|
|
table_headers = pd.DataFrame([cols], columns=cols)
|
|
|
|
final_df = pd.concat(
|
|
[header_row1, header_row2, table_headers, df], ignore_index=True
|
|
)
|
|
|
|
# Create Excel file in memory
|
|
excel_buffer = BytesIO()
|
|
final_df.to_excel(excel_buffer, index=False, header=False, engine='openpyxl')
|
|
return excel_buffer.getvalue()
|
|
|
|
# Create DataFrame from data
|
|
df = pd.DataFrame(data)
|
|
|
|
# Force all columns to be object (string) type to preserve empty cells
|
|
for column in df.columns:
|
|
df[column] = df[column].astype("object")
|
|
df[column] = df[column].fillna("")
|
|
|
|
# Clean data: replace actual line breaks with \n and escape quotes
|
|
for column in df.columns:
|
|
df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False)
|
|
df[column] = df[column].str.replace('"', '""', regex=False)
|
|
|
|
# Create the 4-row structure
|
|
# Parse existing headers to extract individual columns
|
|
import csv as csv_module
|
|
header1_text = existing_headers.get("header1", "Header 1")
|
|
header2_text = existing_headers.get("header2", "Header 2")
|
|
|
|
# Parse the existing header rows
|
|
header1_reader = csv_module.reader([header1_text])
|
|
header2_reader = csv_module.reader([header2_text])
|
|
header1_row = next(header1_reader, [])
|
|
header2_row = next(header2_reader, [])
|
|
|
|
# Row 1: Use existing header1 or default
|
|
if len(header1_row) >= len(df.columns):
|
|
header_row1_data = header1_row[:len(df.columns)]
|
|
else:
|
|
header_row1_data = header1_row + [""] * (len(df.columns) - len(header1_row))
|
|
header_row1 = pd.DataFrame([header_row1_data], columns=df.columns)
|
|
|
|
# Row 2: Use existing header2 and add timestamp to second column
|
|
if len(header2_row) >= len(df.columns):
|
|
header_row2_data = header2_row[:len(df.columns)]
|
|
else:
|
|
header_row2_data = header2_row + [""] * (len(df.columns) - len(header2_row))
|
|
if len(header_row2_data) > 1:
|
|
header_row2_data[1] = timestamp
|
|
header_row2 = pd.DataFrame([header_row2_data], columns=df.columns)
|
|
|
|
# Row 3: Table headers (column names)
|
|
table_headers = pd.DataFrame([df.columns.tolist()], columns=df.columns)
|
|
|
|
# Concatenate all rows: header1 + header2 + table_headers + data
|
|
final_df = pd.concat(
|
|
[header_row1, header_row2, table_headers, df], ignore_index=True
|
|
)
|
|
|
|
# Create Excel file in memory
|
|
excel_buffer = BytesIO()
|
|
final_df.to_excel(excel_buffer, index=False, header=False, engine='openpyxl')
|
|
return excel_buffer.getvalue()
|
|
|
|
def _parse_excel_content(self, excel_content: bytes) -> tuple[list[dict], dict]:
|
|
"""Parse Excel content with 4-row structure and return data and headers."""
|
|
try:
|
|
# Load Excel file from bytes
|
|
df = pd.read_excel(
|
|
BytesIO(excel_content),
|
|
engine='openpyxl',
|
|
header=None
|
|
)
|
|
|
|
# Extract the 4 parts:
|
|
# Row 1: Static header row 1
|
|
header_row1 = df.iloc[0:1].copy()
|
|
|
|
# Row 2: Static header row 2
|
|
header_row2 = df.iloc[1:2].copy()
|
|
|
|
# Row 3: Table headers
|
|
table_headers = df.iloc[2:3].copy()
|
|
|
|
# Row 4+: Data rows
|
|
df_data = df.iloc[3:].copy()
|
|
# Set column names from row 3
|
|
df_data.columns = table_headers.iloc[0]
|
|
# Reset index to start from 0
|
|
df_data = df_data.reset_index(drop=True)
|
|
|
|
# Force all columns to be object (string) type and handle NaN values
|
|
for column in df_data.columns:
|
|
df_data[column] = df_data[column].astype('object')
|
|
# Fill NaN values with empty string to keep cells empty
|
|
df_data[column] = df_data[column].fillna('')
|
|
|
|
# Convert DataFrame to list of dictionaries
|
|
data = df_data.to_dict(orient='records')
|
|
|
|
# Extract headers as strings (like CSV version)
|
|
headers = {
|
|
"header1": ",".join([str(x) if pd.notna(x) else "" for x in header_row1.iloc[0].tolist()]),
|
|
"header2": ",".join([str(x) if pd.notna(x) else "" for x in header_row2.iloc[0].tolist()])
|
|
}
|
|
|
|
return data, headers
|
|
|
|
except Exception as e:
|
|
raise Exception(f"Failed to parse Excel content: {str(e)}")
|