from dataclasses import dataclass from io import BytesIO, StringIO from typing import Any, Optional from datetime import datetime, timezone import pandas as pd import openpyxl from modules.shared.timezoneUtils import get_utc_now from modules.connectors.connectorSharepoint import ConnectorSharepoint from modules.interfaces.interfaceTicketModel import TicketBase, Task @dataclass(slots=True) class TicketSharepointSyncInterface: connector_ticket: TicketBase connector_sharepoint: ConnectorSharepoint task_sync_definition: dict sync_folder: str sync_file: str backup_folder: str audit_folder: str site_id: str # Keep for compatibility but not used with REST API @classmethod async def create( cls, connector_ticket: TicketBase, connector_sharepoint: ConnectorSharepoint, task_sync_definition: dict, sync_folder: str, sync_file: str, backup_folder: str, audit_folder: str, site_id: str, ) -> "TicketSharepointSyncInterface": return cls( connector_ticket=connector_ticket, connector_sharepoint=connector_sharepoint, task_sync_definition=task_sync_definition, sync_folder=sync_folder, sync_file=sync_file, backup_folder=backup_folder, audit_folder=audit_folder, site_id=site_id, ) async def create_backup(self): """Creates a backup of the current sync file in the backup folder.""" timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S") backup_filename = f"backup_{timestamp}_{self.sync_file}" try: await self.connector_sharepoint.copy_file_async( site_id=self.site_id, source_folder=self.sync_folder, source_file=self.sync_file, dest_folder=self.backup_folder, dest_file=backup_filename, ) except Exception as e: # If the source file doesn't exist (404 error), that's okay for first-time sync if "itemNotFound" in str(e) or "404" in str(e) or "could not be found" in str(e): raise Exception(f"Source file does not exist - no backup needed: {self.sync_file}") else: # Re-raise other errors raise async def sync_from_jira_to_csv(self): """Syncs tasks from JIRA to a CSV file in SharePoint.""" start_time = get_utc_now() audit_log = [] audit_log.append("=== JIRA TO CSV SYNC STARTED ===") audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}") audit_log.append(f"Sync File: {self.sync_file}") audit_log.append(f"Sync Folder: {self.sync_folder}") audit_log.append("") try: # 1. Read JIRA tickets audit_log.append("Step 1: Reading JIRA tickets...") tickets = await self.connector_ticket.read_tasks(limit=0) audit_log.append(f"JIRA issues read: {len(tickets)}") audit_log.append("") # 2. Transform tasks according to task_sync_definition audit_log.append("Step 2: Transforming JIRA data...") transformed_tasks = self._transform_tasks(tickets, include_put=True) jira_data = [task.data for task in transformed_tasks] before_count = len(jira_data) # Remove records without an ID to avoid blank rows jira_data = self._filter_empty_records(jira_data) after_count = len(jira_data) audit_log.append(f"JIRA issues transformed: {before_count}") audit_log.append(f"JIRA issues after ID filter: {after_count}") # Log a sample of IDs to diagnose empty export issues try: sample_ids = [str(row.get("ID")) for row in jira_data[:5]] audit_log.append(f"Sample IDs: {', '.join(sample_ids)}") except Exception: pass audit_log.append("") # 3. Create JIRA export file in audit folder audit_log.append("Step 3: Creating JIRA export file...") try: timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S") jira_export_filename = f"jira_export_{timestamp}.csv" # Use default headers for JIRA export jira_export_content = self._create_csv_content(jira_data, {"header1": "JIRA Export", "header2": "Raw Data"}) await self.connector_sharepoint.upload_file( site_id=self.site_id, folder_path=self.audit_folder, file_name=jira_export_filename, content=jira_export_content, ) audit_log.append(f"JIRA export file created: {jira_export_filename}") except Exception as e: audit_log.append(f"Failed to create JIRA export file: {str(e)}") audit_log.append("") # 4. Create backup of existing sync file (if it exists) audit_log.append("Step 4: Creating backup...") backup_created = False try: await self.create_backup() backup_created = True audit_log.append("Backup created successfully") except Exception as e: audit_log.append( f"Backup creation failed (file might not exist): {str(e)}" ) audit_log.append("") # 5. Try to read existing CSV file from SharePoint audit_log.append("Step 5: Reading existing CSV file...") existing_data = [] existing_file_found = False existing_headers = {"header1": "", "header2": ""} try: file_path = f"{self.sync_folder}/{self.sync_file}" csv_content = await self.connector_sharepoint.download_file_by_path( site_id=self.site_id, file_path=file_path ) # Read the first two lines to get headers csv_lines = csv_content.decode('utf-8').split('\n') if len(csv_lines) >= 2: # Store the raw first two lines as headers (preserving original formatting) existing_headers["header1"] = csv_lines[0].rstrip('\r\n') existing_headers["header2"] = csv_lines[1].rstrip('\r\n') # Try to read with robust CSV parsing (skip first 2 rows) df_existing = pd.read_csv( BytesIO(csv_content), skiprows=2, quoting=1, # QUOTE_ALL escapechar='\\', on_bad_lines='skip', # Skip malformed lines engine='python' # More robust parsing ) existing_data = df_existing.to_dict("records") existing_file_found = True audit_log.append( f"Existing CSV file found with {len(existing_data)} records" ) audit_log.append(f"Preserved headers: Header1='{existing_headers['header1']}', Header2='{existing_headers['header2']}'") except Exception as e: audit_log.append(f"No existing CSV file found or read error: {str(e)}") audit_log.append("") # 6. Merge JIRA data with existing data and track changes audit_log.append("Step 6: Merging JIRA data with existing data...") merged_data, change_details = self._merge_jira_with_existing_detailed( jira_data, existing_data ) # Log detailed changes audit_log.append(f"Total records after merge: {len(merged_data)}") audit_log.append(f"Records updated: {change_details['updated']}") audit_log.append(f"Records added: {change_details['added']}") audit_log.append(f"Records unchanged: {change_details['unchanged']}") audit_log.append("") # Log individual changes if change_details["changes"]: audit_log.append("DETAILED CHANGES:") for change in change_details["changes"]: audit_log.append(f"- {change}") audit_log.append("") # 7. Create CSV with 4-row structure and write to SharePoint audit_log.append("Step 7: Writing updated CSV to SharePoint...") csv_content = self._create_csv_content(merged_data, existing_headers) await self.connector_sharepoint.upload_file( site_id=self.site_id, folder_path=self.sync_folder, file_name=self.sync_file, content=csv_content, ) audit_log.append("CSV file successfully written to SharePoint") audit_log.append("") # Success summary end_time = get_utc_now() duration = (end_time - start_time).total_seconds() audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===") audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") audit_log.append(f"Duration: {duration:.2f} seconds") audit_log.append(f"Total JIRA issues processed: {len(jira_data)}") audit_log.append(f"Total records in final CSV: {len(merged_data)}") except Exception as e: # Error handling end_time = get_utc_now() duration = (end_time - start_time).total_seconds() audit_log.append("") audit_log.append("=== SYNC FAILED ===") audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") audit_log.append(f"Duration before failure: {duration:.2f} seconds") audit_log.append(f"Error: {str(e)}") raise finally: # Write audit log to SharePoint await self._write_audit_log(audit_log, "jira_to_csv") async def sync_from_csv_to_jira(self): """Syncs tasks from a CSV file in SharePoint to JIRA.""" start_time = get_utc_now() audit_log = [] audit_log.append("=== CSV TO JIRA SYNC STARTED ===") audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}") audit_log.append(f"Sync File: {self.sync_file}") audit_log.append(f"Sync Folder: {self.sync_folder}") audit_log.append("") try: # 1. Read CSV file from SharePoint audit_log.append("Step 1: Reading CSV file from SharePoint...") try: file_path = f"{self.sync_folder}/{self.sync_file}" csv_content = await self.connector_sharepoint.download_file_by_path( site_id=self.site_id, file_path=file_path ) # Try to read with robust CSV parsing df = pd.read_csv( BytesIO(csv_content), skiprows=2, quoting=1, # QUOTE_ALL escapechar='\\', on_bad_lines='skip', # Skip malformed lines engine='python' # More robust parsing ) csv_data = df.to_dict("records") audit_log.append( f"CSV file read successfully with {len(csv_data)} records" ) except Exception as e: audit_log.append(f"Failed to read CSV file: {str(e)}") audit_log.append("CSV to JIRA sync aborted - no file to process") return audit_log.append("") # 2. Read current JIRA data for comparison audit_log.append("Step 2: Reading current JIRA data for comparison...") try: current_jira_tasks = await self.connector_ticket.read_tasks(limit=0) current_jira_data = self._transform_tasks( current_jira_tasks, include_put=True ) jira_lookup = { task.data.get("ID"): task.data for task in current_jira_data } audit_log.append(f"Current JIRA data read: {len(jira_lookup)} tasks") except Exception as e: audit_log.append(f"Failed to read current JIRA data: {str(e)}") raise audit_log.append("") # 3. Detect actual changes in "put" fields audit_log.append("Step 3: Detecting changes in 'put' fields...") actual_changes = {} records_with_changes = 0 total_changes = 0 for row in csv_data: task_id = row.get("ID") if not task_id or task_id not in jira_lookup: continue current_jira_task = jira_lookup[task_id] task_changes = {} for field_name, field_config in self.task_sync_definition.items(): if field_config[0] == "put": # Only process "put" fields csv_value = row.get(field_name, "") jira_value = current_jira_task.get(field_name, "") # Convert None to empty string for comparison csv_value = "" if csv_value is None else str(csv_value).strip() jira_value = ( "" if jira_value is None else str(jira_value).strip() ) # Include if values are different (allow empty strings to clear fields like the reference does) if csv_value != jira_value: task_changes[field_name] = csv_value if task_changes: actual_changes[task_id] = task_changes records_with_changes += 1 total_changes += len(task_changes) audit_log.append(f"Records with actual changes: {records_with_changes}") audit_log.append(f"Total field changes detected: {total_changes}") audit_log.append("") # Log detailed changes if actual_changes: audit_log.append("DETAILED CHANGES TO APPLY TO JIRA:") for task_id, changes in actual_changes.items(): change_list = [ f"{field}: '{value}'" for field, value in changes.items() ] audit_log.append(f"- Task ID {task_id}: {', '.join(change_list)}") audit_log.append("") # 4. Update JIRA tasks with actual changes if actual_changes: audit_log.append("Step 4: Updating JIRA tasks...") # Convert to Task objects for the connector tasks_to_update = [] for task_id, changes in actual_changes.items(): # Create task data structure expected by JIRA connector # Build the nested fields structure that JIRA expects fields = {} for field_name, new_value in changes.items(): # Map back to JIRA field structure using task_sync_definition field_config = self.task_sync_definition[field_name] field_path = field_config[1] # Extract the JIRA field ID from the path # For "put" fields, the path is like ['fields', 'customfield_10067'] if len(field_path) >= 2 and field_path[0] == "fields": jira_field_id = field_path[1] # Parse date fields back to JIRA format if self._is_date_field(field_name) and new_value: parsed_date = self._parse_date_from_excel(str(new_value)) if parsed_date: fields[jira_field_id] = parsed_date else: fields[jira_field_id] = new_value else: fields[jira_field_id] = new_value if fields: task_data = {"ID": task_id, "fields": fields} task = Task(data=task_data) tasks_to_update.append(task) # Write tasks back to JIRA try: await self.connector_ticket.write_tasks(tasks_to_update) audit_log.append( f"Successfully updated {len(tasks_to_update)} JIRA tasks" ) except Exception as e: audit_log.append(f"Failed to update JIRA tasks: {str(e)}") raise else: audit_log.append("Step 4: No changes to apply to JIRA") audit_log.append("") # Success summary end_time = get_utc_now() duration = (end_time - start_time).total_seconds() audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===") audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") audit_log.append(f"Duration: {duration:.2f} seconds") audit_log.append(f"Total CSV records processed: {len(csv_data)}") audit_log.append(f"Records with actual changes: {records_with_changes}") audit_log.append(f"JIRA tasks updated: {len(actual_changes)}") except Exception as e: # Error handling end_time = get_utc_now() duration = (end_time - start_time).total_seconds() audit_log.append("") audit_log.append("=== SYNC FAILED ===") audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") audit_log.append(f"Duration before failure: {duration:.2f} seconds") audit_log.append(f"Error: {str(e)}") raise finally: # Write audit log to SharePoint await self._write_audit_log(audit_log, "csv_to_jira") async def sync_from_jira_to_excel(self): """Syncs tasks from JIRA to an Excel file in SharePoint.""" start_time = get_utc_now() audit_log = [] audit_log.append("=== JIRA TO EXCEL SYNC STARTED ===") audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}") audit_log.append(f"Sync File: {self.sync_file}") audit_log.append(f"Sync Folder: {self.sync_folder}") audit_log.append("") try: # 1. Read JIRA tickets audit_log.append("Step 1: Reading JIRA tickets...") tickets = await self.connector_ticket.read_tasks(limit=0) audit_log.append(f"JIRA issues read: {len(tickets)}") audit_log.append("") # 2. Transform tasks according to task_sync_definition audit_log.append("Step 2: Transforming JIRA data...") transformed_tasks = self._transform_tasks(tickets, include_put=True) jira_data = [task.data for task in transformed_tasks] audit_log.append(f"JIRA issues transformed: {len(jira_data)}") audit_log.append("") # 3. Create JIRA export file in audit folder audit_log.append("Step 3: Creating JIRA export file...") try: timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S") jira_export_filename = f"jira_export_{timestamp}.xlsx" # Use default headers for JIRA export jira_export_content = self._create_excel_content(jira_data, {"header1": "JIRA Export", "header2": "Raw Data"}) await self.connector_sharepoint.upload_file( site_id=self.site_id, folder_path=self.audit_folder, file_name=jira_export_filename, content=jira_export_content, ) audit_log.append(f"JIRA export file created: {jira_export_filename}") except Exception as e: audit_log.append(f"Failed to create JIRA export file: {str(e)}") audit_log.append("") # 4. Create backup of existing Excel file (if it exists) audit_log.append("Step 4: Creating backup...") backup_created = False try: await self.create_backup() backup_created = True audit_log.append("Backup created successfully") except Exception as e: audit_log.append( f"Backup creation failed (file might not exist): {str(e)}" ) audit_log.append("") # 5. Try to read existing Excel file from SharePoint audit_log.append("Step 5: Reading existing Excel file...") existing_data = [] existing_file_found = False existing_headers = {"header1": "Header 1", "header2": "Header 2"} try: file_path = f"{self.sync_folder}/{self.sync_file}" excel_content = await self.connector_sharepoint.download_file_by_path( site_id=self.site_id, file_path=file_path ) # Parse Excel file with 4-row structure existing_data, existing_headers = self._parse_excel_content(excel_content) existing_file_found = True audit_log.append( f"Existing Excel file found with {len(existing_data)} records" ) audit_log.append(f"Preserved headers: Header1='{existing_headers['header1']}', Header2='{existing_headers['header2']}'") except Exception as e: audit_log.append(f"No existing Excel file found or read error: {str(e)}") audit_log.append("") # 6. Merge JIRA data with existing data and track changes audit_log.append("Step 6: Merging JIRA data with existing data...") merged_data, change_details = self._merge_jira_with_existing_detailed( jira_data, existing_data ) # Log detailed changes audit_log.append(f"Total records after merge: {len(merged_data)}") audit_log.append(f"Records updated: {change_details['updated']}") audit_log.append(f"Records added: {change_details['added']}") audit_log.append(f"Records unchanged: {change_details['unchanged']}") audit_log.append("") # Log individual changes if change_details["changes"]: audit_log.append("DETAILED CHANGES:") for change in change_details["changes"]: audit_log.append(f"- {change}") audit_log.append("") # 7. Create Excel with 4-row structure and write to SharePoint audit_log.append("Step 7: Writing updated Excel to SharePoint...") # Ensure no records without ID are written merged_data = self._filter_empty_records(merged_data) excel_content = self._create_excel_content(merged_data, existing_headers) await self.connector_sharepoint.upload_file( site_id=self.site_id, folder_path=self.sync_folder, file_name=self.sync_file, content=excel_content, ) audit_log.append("Excel file successfully written to SharePoint") audit_log.append("") # Success summary end_time = get_utc_now() duration = (end_time - start_time).total_seconds() audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===") audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") audit_log.append(f"Duration: {duration:.2f} seconds") audit_log.append(f"Total JIRA issues processed: {len(jira_data)}") audit_log.append(f"Total records in final Excel: {len(merged_data)}") except Exception as e: # Error handling end_time = get_utc_now() duration = (end_time - start_time).total_seconds() audit_log.append("") audit_log.append("=== SYNC FAILED ===") audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") audit_log.append(f"Duration before failure: {duration:.2f} seconds") audit_log.append(f"Error: {str(e)}") raise finally: # Write audit log to SharePoint await self._write_audit_log(audit_log, "jira_to_excel") async def sync_from_excel_to_jira(self): """Syncs tasks from an Excel file in SharePoint to JIRA.""" start_time = get_utc_now() audit_log = [] audit_log.append("=== EXCEL TO JIRA SYNC STARTED ===") audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}") audit_log.append(f"Sync File: {self.sync_file}") audit_log.append(f"Sync Folder: {self.sync_folder}") audit_log.append("") try: # 1. Read Excel file from SharePoint audit_log.append("Step 1: Reading Excel file from SharePoint...") try: file_path = f"{self.sync_folder}/{self.sync_file}" excel_content = await self.connector_sharepoint.download_file_by_path( site_id=self.site_id, file_path=file_path ) # Parse Excel file with 4-row structure excel_data, _ = self._parse_excel_content(excel_content) audit_log.append( f"Excel file read successfully with {len(excel_data)} records" ) except Exception as e: audit_log.append(f"Failed to read Excel file: {str(e)}") audit_log.append("Excel to JIRA sync aborted - no file to process") return audit_log.append("") # 2. Read current JIRA data for comparison audit_log.append("Step 2: Reading current JIRA data for comparison...") try: current_jira_tasks = await self.connector_ticket.read_tasks(limit=0) current_jira_data = self._transform_tasks( current_jira_tasks, include_put=True ) jira_lookup = { task.data.get("ID"): task.data for task in current_jira_data } audit_log.append(f"Current JIRA data read: {len(jira_lookup)} tasks") except Exception as e: audit_log.append(f"Failed to read current JIRA data: {str(e)}") raise audit_log.append("") # 3. Detect actual changes in "put" fields audit_log.append("Step 3: Detecting changes in 'put' fields...") actual_changes = {} records_with_changes = 0 total_changes = 0 for row in excel_data: task_id = row.get("ID") if not task_id or task_id not in jira_lookup: continue current_jira_task = jira_lookup[task_id] task_changes = {} for field_name, field_config in self.task_sync_definition.items(): if field_config[0] == "put": # Only process "put" fields excel_value = row.get(field_name, "") jira_value = current_jira_task.get(field_name, "") # Convert None to empty string for comparison excel_value = "" if excel_value is None else str(excel_value).strip() jira_value = ( "" if jira_value is None else str(jira_value).strip() ) # Include if values are different (allow empty strings to clear fields like the reference does) if excel_value != jira_value: task_changes[field_name] = excel_value if task_changes: actual_changes[task_id] = task_changes records_with_changes += 1 total_changes += len(task_changes) audit_log.append(f"Records with actual changes: {records_with_changes}") audit_log.append(f"Total field changes detected: {total_changes}") audit_log.append("") # Log detailed changes if actual_changes: audit_log.append("DETAILED CHANGES TO APPLY TO JIRA:") for task_id, changes in actual_changes.items(): change_list = [ f"{field}: '{value}'" for field, value in changes.items() ] audit_log.append(f"- Task ID {task_id}: {', '.join(change_list)}") audit_log.append("") # 4. Update JIRA tasks with actual changes if actual_changes: audit_log.append("Step 4: Updating JIRA tasks...") # Convert to Task objects for the connector tasks_to_update = [] for task_id, changes in actual_changes.items(): # Create task data structure expected by JIRA connector # Build the nested fields structure that JIRA expects fields = {} for field_name, new_value in changes.items(): # Map back to JIRA field structure using task_sync_definition field_config = self.task_sync_definition[field_name] field_path = field_config[1] # Extract the JIRA field ID from the path # For "put" fields, the path is like ['fields', 'customfield_10067'] if len(field_path) >= 2 and field_path[0] == "fields": jira_field_id = field_path[1] # Parse date fields back to JIRA format if self._is_date_field(field_name) and new_value: parsed_date = self._parse_date_from_excel(str(new_value)) if parsed_date: fields[jira_field_id] = parsed_date else: fields[jira_field_id] = new_value else: fields[jira_field_id] = new_value if fields: task_data = {"ID": task_id, "fields": fields} task = Task(data=task_data) tasks_to_update.append(task) # Write tasks back to JIRA try: await self.connector_ticket.write_tasks(tasks_to_update) audit_log.append( f"Successfully updated {len(tasks_to_update)} JIRA tasks" ) except Exception as e: audit_log.append(f"Failed to update JIRA tasks: {str(e)}") raise else: audit_log.append("Step 4: No changes to apply to JIRA") audit_log.append("") # Success summary end_time = get_utc_now() duration = (end_time - start_time).total_seconds() audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===") audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") audit_log.append(f"Duration: {duration:.2f} seconds") audit_log.append(f"Total Excel records processed: {len(excel_data)}") audit_log.append(f"Records with actual changes: {records_with_changes}") audit_log.append(f"JIRA tasks updated: {len(actual_changes)}") except Exception as e: # Error handling end_time = get_utc_now() duration = (end_time - start_time).total_seconds() audit_log.append("") audit_log.append("=== SYNC FAILED ===") audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}") audit_log.append(f"Duration before failure: {duration:.2f} seconds") audit_log.append(f"Error: {str(e)}") raise finally: # Write audit log to SharePoint await self._write_audit_log(audit_log, "excel_to_jira") def _transform_tasks( self, tasks: list[Task], include_put: bool = False ) -> list[Task]: """Transforms tasks according to the task_sync_definition.""" transformed_tasks = [] for task in tasks: transformed_data = {} # Process each field in the sync definition for field_name, field_config in self.task_sync_definition.items(): direction = field_config[0] # "get" or "put" field_path = field_config[1] # List of keys to navigate # Get the right fields if direction == "get" or include_put: # Extract value using the field path value = self._extract_field_value(task.data, field_path, field_name) transformed_data[field_name] = value # Create new Task with transformed data transformed_task = Task(data=transformed_data) transformed_tasks.append(transformed_task) return transformed_tasks def _extract_field_value(self, issue_data: dict, field_path: list[str], field_name: str = None) -> Any: """Extract field value from JIRA issue data using field path.""" value = issue_data try: for key in field_path: if value is not None: value = value[key] if value is None: return None # Handle complex objects that have a 'value' field (like custom field options) if isinstance(value, dict) and "value" in value: value = value["value"] # Handle lists of objects with 'value' fields elif ( isinstance(value, list) and len(value) > 0 and isinstance(value[0], dict) and "value" in value[0] ): value = value[0]["value"] # Apply ADF conversion for specific fields that contain ADF content if isinstance(value, dict) and value.get("type") == "doc": value = self._convert_adf_to_text(value) # Apply date formatting for date fields if field_name and self._is_date_field(field_name): value = self._format_date_for_excel(value) return value except (KeyError, TypeError): return None def _convert_adf_to_text(self, adf_data): """Convert Atlassian Document Format (ADF) to plain text. Based on Atlassian Document Format specification for JIRA fields. Handles paragraphs, lists, text formatting, and other ADF node types. Args: adf_data: ADF object or None Returns: str: Plain text content, or empty string if None/invalid """ if not adf_data or not isinstance(adf_data, dict): return "" if adf_data.get("type") != "doc": return str(adf_data) if adf_data else "" content = adf_data.get("content", []) if not isinstance(content, list): return "" def extract_text_from_content(content_list, list_level=0): """Recursively extract text from ADF content with proper formatting.""" text_parts = [] list_counter = 1 for item in content_list: if not isinstance(item, dict): continue item_type = item.get("type", "") if item_type == "text": # Extract text content, preserving formatting text = item.get("text", "") marks = item.get("marks", []) # Handle text formatting (bold, italic, etc.) if marks: for mark in marks: if mark.get("type") == "strong": text = f"**{text}**" elif mark.get("type") == "em": text = f"*{text}*" elif mark.get("type") == "code": text = f"`{text}`" elif mark.get("type") == "link": attrs = mark.get("attrs", {}) href = attrs.get("href", "") if href: text = f"[{text}]({href})" text_parts.append(text) elif item_type == "hardBreak": text_parts.append("\n") elif item_type == "paragraph": paragraph_content = item.get("content", []) if paragraph_content: paragraph_text = extract_text_from_content(paragraph_content, list_level) if paragraph_text.strip(): text_parts.append(paragraph_text) elif item_type == "bulletList": list_content = item.get("content", []) for list_item in list_content: if list_item.get("type") == "listItem": list_item_content = list_item.get("content", []) for list_paragraph in list_item_content: if list_paragraph.get("type") == "paragraph": list_paragraph_content = list_paragraph.get("content", []) if list_paragraph_content: indent = " " * list_level bullet_text = extract_text_from_content(list_paragraph_content, list_level + 1) if bullet_text.strip(): text_parts.append(f"{indent}• {bullet_text}") elif item_type == "orderedList": list_content = item.get("content", []) for list_item in list_content: if list_item.get("type") == "listItem": list_item_content = list_item.get("content", []) for list_paragraph in list_item_content: if list_paragraph.get("type") == "paragraph": list_paragraph_content = list_paragraph.get("content", []) if list_paragraph_content: indent = " " * list_level ordered_text = extract_text_from_content(list_paragraph_content, list_level + 1) if ordered_text.strip(): text_parts.append(f"{indent}{list_counter}. {ordered_text}") list_counter += 1 elif item_type == "listItem": # Handle nested list items list_item_content = item.get("content", []) if list_item_content: text_parts.append(extract_text_from_content(list_item_content, list_level)) elif item_type == "embedCard": # Handle embedded content (videos, etc.) attrs = item.get("attrs", {}) url = attrs.get("url", "") if url: text_parts.append(f"[Embedded Content: {url}]") elif item_type == "codeBlock": # Handle code blocks code_content = item.get("content", []) if code_content: code_text = extract_text_from_content(code_content, list_level) if code_text.strip(): text_parts.append(f"```\n{code_text}\n```") elif item_type == "blockquote": # Handle blockquotes quote_content = item.get("content", []) if quote_content: quote_text = extract_text_from_content(quote_content, list_level) if quote_text.strip(): text_parts.append(f"> {quote_text}") elif item_type == "heading": # Handle headings heading_content = item.get("content", []) if heading_content: heading_text = extract_text_from_content(heading_content, list_level) if heading_text.strip(): level = item.get("attrs", {}).get("level", 1) text_parts.append(f"{'#' * level} {heading_text}") elif item_type == "rule": # Handle horizontal rules text_parts.append("---") else: # Handle unknown types by trying to extract content if "content" in item: content_text = extract_text_from_content(item.get("content", []), list_level) if content_text.strip(): text_parts.append(content_text) return "\n".join(text_parts) result = extract_text_from_content(content) return result.strip() def _format_date_for_excel(self, date_value: Any) -> Optional[str]: """Format date value for Excel export. Handles various date formats from JIRA and converts them to a consistent format suitable for Excel display. Args: date_value: Date value from JIRA (string, datetime, or None) Returns: Formatted date string or None if invalid/empty """ if not date_value: return None try: # Handle ISO 8601 strings (JIRA format: 2025-09-16T12:33:10.044+0200) if isinstance(date_value, str): # Parse ISO format with timezone if 'T' in date_value and ('+' in date_value or 'Z' in date_value): dt = datetime.fromisoformat(date_value.replace('Z', '+00:00')) # Convert to UTC for consistency if dt.tzinfo: dt = dt.astimezone(timezone.utc) return dt.strftime('%Y-%m-%d %H:%M:%S UTC') # Handle simple date strings elif len(date_value) == 10 and date_value.count('-') == 2: dt = datetime.strptime(date_value, '%Y-%m-%d') return dt.strftime('%Y-%m-%d') else: # Try to parse as datetime dt = datetime.fromisoformat(date_value) return dt.strftime('%Y-%m-%d %H:%M:%S') # Handle datetime objects elif isinstance(date_value, datetime): if date_value.tzinfo: dt = date_value.astimezone(timezone.utc) else: dt = date_value return dt.strftime('%Y-%m-%d %H:%M:%S UTC') return str(date_value) except (ValueError, TypeError) as e: # Log error but don't fail the sync return str(date_value) if date_value else None def _parse_date_from_excel(self, date_string: str) -> Optional[str]: """Parse date string from Excel and convert to JIRA format. Converts Excel date strings back to JIRA-compatible ISO format. Args: date_string: Date string from Excel Returns: ISO formatted date string for JIRA or None if invalid """ if not date_string or not isinstance(date_string, str): return None try: # Handle various Excel date formats date_string = date_string.strip() # Try common Excel date formats formats_to_try = [ '%Y-%m-%d %H:%M:%S UTC', # Our export format '%Y-%m-%d %H:%M:%S', # Standard format '%Y-%m-%d', # Date only '%d.%m.%Y', # German format '%m/%d/%Y', # US format '%d/%m/%Y', # European format ] for fmt in formats_to_try: try: dt = datetime.strptime(date_string, fmt) # Convert to UTC and format as ISO if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) return dt.isoformat() except ValueError: continue # If no format matches, try pandas parsing try: dt = pd.to_datetime(date_string) if hasattr(dt, 'to_pydatetime'): dt = dt.to_pydatetime() if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) return dt.isoformat() except: pass return None except Exception: return None def _is_date_field(self, field_name: str) -> bool: """Check if a field is a date field based on its name. Args: field_name: Name of the field Returns: True if field is likely a date field """ date_keywords = ['date', 'time', 'created', 'updated', 'due', 'deadline'] return any(keyword in field_name.lower() for keyword in date_keywords) def _filter_empty_records(self, records: list[dict]) -> list[dict]: """Remove records that are missing an ID. Purposefully only filter by presence of 'ID' to avoid dropping valid rows with many empty optional fields. """ filtered: list[dict] = [] for row in records: if isinstance(row, dict) and row.get("ID"): filtered.append(row) return filtered def _merge_jira_with_existing( self, jira_data: list[dict], existing_data: list[dict] ) -> list[dict]: """Merge JIRA data with existing CSV data, updating only 'get' fields.""" # Create a lookup for existing data by ID existing_lookup = {row.get("ID"): row for row in existing_data if row.get("ID")} merged_data = [] for jira_row in jira_data: jira_id = jira_row.get("ID") if jira_id and jira_id in existing_lookup: # Update existing row with JIRA data (only 'get' fields) existing_row = existing_lookup[jira_id].copy() for field_name, field_config in self.task_sync_definition.items(): if field_config[0] == "get": # Only update 'get' fields existing_row[field_name] = jira_row.get(field_name) merged_data.append(existing_row) # Remove from lookup to track processed items del existing_lookup[jira_id] else: # New row from JIRA merged_data.append(jira_row) # Add any remaining existing rows that weren't in JIRA data merged_data.extend(existing_lookup.values()) return merged_data def _merge_jira_with_existing_detailed( self, jira_data: list[dict], existing_data: list[dict] ) -> tuple[list[dict], dict]: """Merge JIRA data with existing CSV data and track detailed changes.""" # Create a lookup for existing data by ID existing_lookup = {row.get("ID"): row for row in existing_data if row.get("ID")} merged_data = [] changes = [] updated_count = 0 added_count = 0 unchanged_count = 0 for jira_row in jira_data: jira_id = jira_row.get("ID") if jira_id and jira_id in existing_lookup: # Update existing row with JIRA data (only 'get' fields) existing_row = existing_lookup[jira_id].copy() row_changes = [] for field_name, field_config in self.task_sync_definition.items(): if field_config[0] == "get": # Only update 'get' fields old_value = existing_row.get(field_name, "") new_value = jira_row.get(field_name, "") # Convert None to empty string for comparison old_value = "" if old_value is None else str(old_value) new_value = "" if new_value is None else str(new_value) if old_value != new_value: row_changes.append( f"{field_name}: '{old_value}' → '{new_value}'" ) existing_row[field_name] = jira_row.get(field_name) merged_data.append(existing_row) if row_changes: updated_count += 1 changes.append( f"Row ID {jira_id} updated: {', '.join(row_changes)}" ) else: unchanged_count += 1 # Remove from lookup to track processed items del existing_lookup[jira_id] else: # New row from JIRA merged_data.append(jira_row) added_count += 1 changes.append(f"Row ID {jira_id} added as new record") # Add any remaining existing rows that weren't in JIRA data for remaining_row in existing_lookup.values(): merged_data.append(remaining_row) unchanged_count += 1 change_details = { "updated": updated_count, "added": added_count, "unchanged": unchanged_count, "changes": changes, } return merged_data, change_details async def _write_audit_log(self, audit_log: list[str], operation_type: str): """Write audit log to SharePoint.""" try: timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S") audit_filename = f"audit_{operation_type}_{timestamp}.log" # Convert audit log to bytes audit_content = "\n".join(audit_log).encode("utf-8") # Debug logging import logging logger = logging.getLogger(__name__) logger.debug(f"Writing audit log to folder: {self.audit_folder}, file: {audit_filename}") # Write to SharePoint await self.connector_sharepoint.upload_file( site_id=self.site_id, folder_path=self.audit_folder, file_name=audit_filename, content=audit_content, ) logger.debug("Audit log written successfully") except Exception as e: # If audit logging fails, we don't want to break the main sync process # Just log the error (this could be enhanced with fallback logging) import logging logger = logging.getLogger(__name__) logger.warning(f"Failed to write audit log: {str(e)}") logger.warning(f"Audit folder: {self.audit_folder}") logger.warning(f"Operation type: {operation_type}") import traceback logger.warning(f"Traceback: {traceback.format_exc()}") def _create_csv_content(self, data: list[dict], existing_headers: dict = None) -> bytes: """Create CSV content with 4-row structure matching reference code.""" # Get current timestamp for header timestamp = get_utc_now().strftime("%Y-%m-%d %H:%M:%S UTC") # Use existing headers if provided, otherwise use defaults if existing_headers is None: existing_headers = {"header1": "Header 1", "header2": "Header 2"} if not data: # Build an empty table with the expected columns from schema cols = list(self.task_sync_definition.keys()) df = pd.DataFrame(columns=cols) # Parse existing headers to extract individual columns import csv as csv_module header1_text = existing_headers.get("header1", "Header 1") header2_text = existing_headers.get("header2", "Header 2") # Parse the existing header rows header1_reader = csv_module.reader([header1_text]) header2_reader = csv_module.reader([header2_text]) header1_row = next(header1_reader, []) header2_row = next(header2_reader, []) # Row 1: Use existing header1 or default if len(header1_row) >= len(cols): header_row1_data = header1_row[:len(cols)] else: header_row1_data = header1_row + [""] * (len(cols) - len(header1_row)) header_row1 = pd.DataFrame([header_row1_data], columns=cols) # Row 2: Use existing header2 and add timestamp to second column if len(header2_row) >= len(cols): header_row2_data = header2_row[:len(cols)] else: header_row2_data = header2_row + [""] * (len(cols) - len(header2_row)) if len(header_row2_data) > 1: header_row2_data[1] = timestamp header_row2 = pd.DataFrame([header_row2_data], columns=cols) # Row 3: table headers table_headers = pd.DataFrame([cols], columns=cols) final_df = pd.concat( [header_row1, header_row2, table_headers, df], ignore_index=True ) csv_text = StringIO() final_df.to_csv(csv_text, index=False, header=False, quoting=1, escapechar='\\') return csv_text.getvalue().encode("utf-8") # Create DataFrame from data df = pd.DataFrame(data) # Force all columns to be object (string) type to preserve empty cells for column in df.columns: df[column] = df[column].astype("object") df[column] = df[column].fillna("") # Clean data: replace actual line breaks with \n and escape quotes for column in df.columns: df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False) df[column] = df[column].str.replace('"', '""', regex=False) # Create the 4-row structure # Parse existing headers to extract individual columns import csv as csv_module header1_text = existing_headers.get("header1", "Header 1") header2_text = existing_headers.get("header2", "Header 2") # Parse the existing header rows header1_reader = csv_module.reader([header1_text]) header2_reader = csv_module.reader([header2_text]) header1_row = next(header1_reader, []) header2_row = next(header2_reader, []) # Row 1: Use existing header1 or default if len(header1_row) >= len(df.columns): header_row1_data = header1_row[:len(df.columns)] else: header_row1_data = header1_row + [""] * (len(df.columns) - len(header1_row)) header_row1 = pd.DataFrame([header_row1_data], columns=df.columns) # Row 2: Use existing header2 and add timestamp to second column if len(header2_row) >= len(df.columns): header_row2_data = header2_row[:len(df.columns)] else: header_row2_data = header2_row + [""] * (len(df.columns) - len(header2_row)) if len(header_row2_data) > 1: header_row2_data[1] = timestamp header_row2 = pd.DataFrame([header_row2_data], columns=df.columns) # Row 3: Table headers (column names) table_headers = pd.DataFrame([df.columns.tolist()], columns=df.columns) # Concatenate all rows: header1 + header2 + table_headers + data final_df = pd.concat( [header_row1, header_row2, table_headers, df], ignore_index=True ) # Convert to CSV bytes with proper quoting for fields containing special characters csv_text = StringIO() final_df.to_csv(csv_text, index=False, header=False, quoting=1, escapechar='\\') return csv_text.getvalue().encode("utf-8") def _create_excel_content(self, data: list[dict], existing_headers: dict = None) -> bytes: """Create Excel content with 4-row structure matching reference code.""" # Get current timestamp for header timestamp = get_utc_now().strftime("%Y-%m-%d %H:%M:%S UTC") # Use existing headers if provided, otherwise use defaults if existing_headers is None: existing_headers = {"header1": "Header 1", "header2": "Header 2"} if not data: # Build an empty table with the expected columns from schema cols = list(self.task_sync_definition.keys()) df = pd.DataFrame(columns=cols) # Parse existing headers to extract individual columns import csv as csv_module header1_text = existing_headers.get("header1", "Header 1") header2_text = existing_headers.get("header2", "Header 2") # Parse the existing header rows header1_reader = csv_module.reader([header1_text]) header2_reader = csv_module.reader([header2_text]) header1_row = next(header1_reader, []) header2_row = next(header2_reader, []) # Row 1: Use existing header1 or default if len(header1_row) >= len(cols): header_row1_data = header1_row[:len(cols)] else: header_row1_data = header1_row + [""] * (len(cols) - len(header1_row)) header_row1 = pd.DataFrame([header_row1_data], columns=cols) # Row 2: Use existing header2 and add timestamp to second column if len(header2_row) >= len(cols): header_row2_data = header2_row[:len(cols)] else: header_row2_data = header2_row + [""] * (len(cols) - len(header2_row)) if len(header_row2_data) > 1: header_row2_data[1] = timestamp header_row2 = pd.DataFrame([header_row2_data], columns=cols) # Row 3: table headers table_headers = pd.DataFrame([cols], columns=cols) final_df = pd.concat( [header_row1, header_row2, table_headers, df], ignore_index=True ) # Create Excel file in memory excel_buffer = BytesIO() final_df.to_excel(excel_buffer, index=False, header=False, engine='openpyxl') return excel_buffer.getvalue() # Create DataFrame from data df = pd.DataFrame(data) # Force all columns to be object (string) type to preserve empty cells for column in df.columns: df[column] = df[column].astype("object") df[column] = df[column].fillna("") # Clean data: replace actual line breaks with \n and escape quotes for column in df.columns: df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False) df[column] = df[column].str.replace('"', '""', regex=False) # Create the 4-row structure # Parse existing headers to extract individual columns import csv as csv_module header1_text = existing_headers.get("header1", "Header 1") header2_text = existing_headers.get("header2", "Header 2") # Parse the existing header rows header1_reader = csv_module.reader([header1_text]) header2_reader = csv_module.reader([header2_text]) header1_row = next(header1_reader, []) header2_row = next(header2_reader, []) # Row 1: Use existing header1 or default if len(header1_row) >= len(df.columns): header_row1_data = header1_row[:len(df.columns)] else: header_row1_data = header1_row + [""] * (len(df.columns) - len(header1_row)) header_row1 = pd.DataFrame([header_row1_data], columns=df.columns) # Row 2: Use existing header2 and add timestamp to second column if len(header2_row) >= len(df.columns): header_row2_data = header2_row[:len(df.columns)] else: header_row2_data = header2_row + [""] * (len(df.columns) - len(header2_row)) if len(header_row2_data) > 1: header_row2_data[1] = timestamp header_row2 = pd.DataFrame([header_row2_data], columns=df.columns) # Row 3: Table headers (column names) table_headers = pd.DataFrame([df.columns.tolist()], columns=df.columns) # Concatenate all rows: header1 + header2 + table_headers + data final_df = pd.concat( [header_row1, header_row2, table_headers, df], ignore_index=True ) # Create Excel file in memory excel_buffer = BytesIO() final_df.to_excel(excel_buffer, index=False, header=False, engine='openpyxl') return excel_buffer.getvalue() def _parse_excel_content(self, excel_content: bytes) -> tuple[list[dict], dict]: """Parse Excel content with 4-row structure and return data and headers.""" try: # Load Excel file from bytes df = pd.read_excel( BytesIO(excel_content), engine='openpyxl', header=None ) # Extract the 4 parts: # Row 1: Static header row 1 header_row1 = df.iloc[0:1].copy() # Row 2: Static header row 2 header_row2 = df.iloc[1:2].copy() # Row 3: Table headers table_headers = df.iloc[2:3].copy() # Row 4+: Data rows df_data = df.iloc[3:].copy() # Set column names from row 3 df_data.columns = table_headers.iloc[0] # Reset index to start from 0 df_data = df_data.reset_index(drop=True) # Force all columns to be object (string) type and handle NaN values for column in df_data.columns: df_data[column] = df_data[column].astype('object') # Fill NaN values with empty string to keep cells empty df_data[column] = df_data[column].fillna('') # Convert DataFrame to list of dictionaries data = df_data.to_dict(orient='records') # Extract headers as strings (like CSV version) headers = { "header1": ",".join([str(x) if pd.notna(x) else "" for x in header_row1.iloc[0].tolist()]), "header2": ",".join([str(x) if pd.notna(x) else "" for x in header_row2.iloc[0].tolist()]) } return data, headers except Exception as e: raise Exception(f"Failed to parse Excel content: {str(e)}")