gateway/modules/interfaces/interfaceTicketObjects.py

from dataclasses import dataclass
from io import BytesIO, StringIO
from typing import Any, Optional
from datetime import datetime, timezone
import pandas as pd
import openpyxl
from modules.shared.timezoneUtils import get_utc_now

from modules.services.serviceSharepoint.mainSharepoint import SharepointService

from modules.interfaces.interfaceTicketModel import TicketBase, Task


@dataclass(slots=True)
class TicketSharepointSyncInterface:
    connector_ticket: TicketBase
    connector_sharepoint: SharepointService
    task_sync_definition: dict
    sync_folder: str
    sync_file: str
    backup_folder: str
    audit_folder: str
    site_id: str  # Keep for compatibility but not used with REST API

    @classmethod
    async def create(
        cls,
        connector_ticket: TicketBase,
        connector_sharepoint: SharepointService,
        task_sync_definition: dict,
        sync_folder: str,
        sync_file: str,
        backup_folder: str,
        audit_folder: str,
        site_id: str,
    ) -> "TicketSharepointSyncInterface":
        return cls(
            connector_ticket=connector_ticket,
            connector_sharepoint=connector_sharepoint,
            task_sync_definition=task_sync_definition,
            sync_folder=sync_folder,
            sync_file=sync_file,
            backup_folder=backup_folder,
            audit_folder=audit_folder,
            site_id=site_id,
        )

    async def create_backup(self):
        """Creates a backup of the current sync file in the backup folder."""
        timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S")
        backup_filename = f"backup_{timestamp}_{self.sync_file}"

        try:
            await self.connector_sharepoint.copy_file_async(
                site_id=self.site_id,
                source_folder=self.sync_folder,
                source_file=self.sync_file,
                dest_folder=self.backup_folder,
                dest_file=backup_filename,
            )
        except Exception as e:
            # If the source file doesn't exist (404 error), that's okay for first-time sync
            if "itemNotFound" in str(e) or "404" in str(e) or "could not be found" in str(e):
                raise Exception(f"Source file does not exist - no backup needed: {self.sync_file}")
            else:
                # Re-raise other errors
                raise

    async def sync_from_jira_to_csv(self):
        """Syncs tasks from JIRA to a CSV file in SharePoint."""
        start_time = get_utc_now()
        audit_log = []

        audit_log.append("=== JIRA TO CSV SYNC STARTED ===")
        audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
        audit_log.append(f"Sync File: {self.sync_file}")
        audit_log.append(f"Sync Folder: {self.sync_folder}")
        audit_log.append("")

        try:
            # 1. Read JIRA tickets
            audit_log.append("Step 1: Reading JIRA tickets...")
            tickets = await self.connector_ticket.read_tasks(limit=0)
            audit_log.append(f"JIRA issues read: {len(tickets)}")
            audit_log.append("")

            # 2. Transform tasks according to task_sync_definition
            audit_log.append("Step 2: Transforming JIRA data...")
            transformed_tasks = self._transform_tasks(tickets, include_put=True)
            jira_data = [task.data for task in transformed_tasks]
            before_count = len(jira_data)
            # Remove records without an ID to avoid blank rows
            jira_data = self._filter_empty_records(jira_data)
            after_count = len(jira_data)
            audit_log.append(f"JIRA issues transformed: {before_count}")
            audit_log.append(f"JIRA issues after ID filter: {after_count}")
            # Log a sample of IDs to diagnose empty export issues
            try:
                sample_ids = [str(row.get("ID")) for row in jira_data[:5]]
                audit_log.append(f"Sample IDs: {', '.join(sample_ids)}")
            except Exception:
                pass
            audit_log.append("")

            # 3. Create JIRA export file in audit folder
            audit_log.append("Step 3: Creating JIRA export file...")
            try:
                timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S")
                jira_export_filename = f"jira_export_{timestamp}.csv"
                # Use default headers for JIRA export
                jira_export_content = self._create_csv_content(jira_data, {"header1": "JIRA Export", "header2": "Raw Data"})
                await self.connector_sharepoint.upload_file(
                    site_id=self.site_id,
                    folder_path=self.audit_folder,
                    file_name=jira_export_filename,
                    content=jira_export_content,
                )
                audit_log.append(f"JIRA export file created: {jira_export_filename}")
            except Exception as e:
                audit_log.append(f"Failed to create JIRA export file: {str(e)}")
            audit_log.append("")

            # 4. Create backup of existing sync file (if it exists)
            audit_log.append("Step 4: Creating backup...")
            backup_created = False
            try:
                await self.create_backup()
                backup_created = True
                audit_log.append("Backup created successfully")
            except Exception as e:
                audit_log.append(
                    f"Backup creation failed (file might not exist): {str(e)}"
                )
            audit_log.append("")

            # 5. Try to read existing CSV file from SharePoint
            audit_log.append("Step 5: Reading existing CSV file...")
            existing_data = []
            existing_file_found = False
            existing_headers = {"header1": "", "header2": ""}
            try:
                file_path = f"{self.sync_folder}/{self.sync_file}"
                csv_content = await self.connector_sharepoint.download_file_by_path(
                    site_id=self.site_id, file_path=file_path
                )

                # Read the first two lines to get headers
                csv_lines = csv_content.decode('utf-8').split('\n')
                if len(csv_lines) >= 2:
                    # Store the raw first two lines as headers (preserving original formatting)
                    existing_headers["header1"] = csv_lines[0].rstrip('\r\n')
                    existing_headers["header2"] = csv_lines[1].rstrip('\r\n')

                # Try to read with robust CSV parsing (skip first 2 rows)
                df_existing = pd.read_csv(
                    BytesIO(csv_content),
                    skiprows=2,
                    quoting=1,  # QUOTE_ALL
                    escapechar='\\',
                    on_bad_lines='skip',  # Skip malformed lines
                    engine='python'  # More robust parsing
                )
                existing_data = df_existing.to_dict("records")
                existing_file_found = True
                audit_log.append(
                    f"Existing CSV file found with {len(existing_data)} records"
                )
                audit_log.append(f"Preserved headers: Header1='{existing_headers['header1']}', Header2='{existing_headers['header2']}'")
            except Exception as e:
                audit_log.append(f"No existing CSV file found or read error: {str(e)}")
            audit_log.append("")

            # 6. Merge JIRA data with existing data and track changes
            audit_log.append("Step 6: Merging JIRA data with existing data...")
            merged_data, change_details = self._merge_jira_with_existing_detailed(
                jira_data, existing_data
            )

            # Log detailed changes
            audit_log.append(f"Total records after merge: {len(merged_data)}")
            audit_log.append(f"Records updated: {change_details['updated']}")
            audit_log.append(f"Records added: {change_details['added']}")
            audit_log.append(f"Records unchanged: {change_details['unchanged']}")
            audit_log.append("")

            # Log individual changes
            if change_details["changes"]:
                audit_log.append("DETAILED CHANGES:")
                for change in change_details["changes"]:
                    audit_log.append(f"- {change}")
                audit_log.append("")

            # 7. Create CSV with 4-row structure and write to SharePoint
            audit_log.append("Step 7: Writing updated CSV to SharePoint...")
            csv_content = self._create_csv_content(merged_data, existing_headers)
            await self.connector_sharepoint.upload_file(
                site_id=self.site_id,
                folder_path=self.sync_folder,
                file_name=self.sync_file,
                content=csv_content,
            )
            audit_log.append("CSV file successfully written to SharePoint")
            audit_log.append("")

            # Success summary
            end_time = get_utc_now()
            duration = (end_time - start_time).total_seconds()
            audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===")
            audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
            audit_log.append(f"Duration: {duration:.2f} seconds")
            audit_log.append(f"Total JIRA issues processed: {len(jira_data)}")
            audit_log.append(f"Total records in final CSV: {len(merged_data)}")

        except Exception as e:
            # Error handling
            end_time = get_utc_now()
            duration = (end_time - start_time).total_seconds()
            audit_log.append("")
            audit_log.append("=== SYNC FAILED ===")
            audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
            audit_log.append(f"Duration before failure: {duration:.2f} seconds")
            audit_log.append(f"Error: {str(e)}")
            raise
        finally:
            # Write audit log to SharePoint
            await self._write_audit_log(audit_log, "jira_to_csv")

    async def sync_from_csv_to_jira(self):
        """Syncs tasks from a CSV file in SharePoint to JIRA."""
        start_time = get_utc_now()
        audit_log = []

        audit_log.append("=== CSV TO JIRA SYNC STARTED ===")
        audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
        audit_log.append(f"Sync File: {self.sync_file}")
        audit_log.append(f"Sync Folder: {self.sync_folder}")
        audit_log.append("")

        try:
            # 1. Read CSV file from SharePoint
            audit_log.append("Step 1: Reading CSV file from SharePoint...")
            try:
                file_path = f"{self.sync_folder}/{self.sync_file}"
                csv_content = await self.connector_sharepoint.download_file_by_path(
                    site_id=self.site_id, file_path=file_path
                )
                # Try to read with robust CSV parsing
                df = pd.read_csv(
                    BytesIO(csv_content),
                    skiprows=2,
                    quoting=1,  # QUOTE_ALL
                    escapechar='\\',
                    on_bad_lines='skip',  # Skip malformed lines
                    engine='python'  # More robust parsing
                )
                csv_data = df.to_dict("records")
                audit_log.append(
                    f"CSV file read successfully with {len(csv_data)} records"
                )
            except Exception as e:
                audit_log.append(f"Failed to read CSV file: {str(e)}")
                audit_log.append("CSV to JIRA sync aborted - no file to process")
                return
            audit_log.append("")

            # 2. Read current JIRA data for comparison
            audit_log.append("Step 2: Reading current JIRA data for comparison...")
            try:
                current_jira_tasks = await self.connector_ticket.read_tasks(limit=0)
                current_jira_data = self._transform_tasks(
                    current_jira_tasks, include_put=True
                )
                jira_lookup = {
                    task.data.get("ID"): task.data for task in current_jira_data
                }
                audit_log.append(f"Current JIRA data read: {len(jira_lookup)} tasks")
            except Exception as e:
                audit_log.append(f"Failed to read current JIRA data: {str(e)}")
                raise
            audit_log.append("")

            # 3. Detect actual changes in "put" fields
            audit_log.append("Step 3: Detecting changes in 'put' fields...")
            actual_changes = {}
            records_with_changes = 0
            total_changes = 0

            for row in csv_data:
                task_id = row.get("ID")
                if not task_id or task_id not in jira_lookup:
                    continue

                current_jira_task = jira_lookup[task_id]
                task_changes = {}

                for field_name, field_config in self.task_sync_definition.items():
                    if field_config[0] == "put":  # Only process "put" fields
                        csv_value = row.get(field_name, "")
                        jira_value = current_jira_task.get(field_name, "")

                        # Convert None to empty string for comparison
                        csv_value = "" if csv_value is None else str(csv_value).strip()
                        jira_value = (
                            "" if jira_value is None else str(jira_value).strip()
                        )

                        # Include if values are different (allow empty strings to clear fields like the reference does)
                        if csv_value != jira_value:
                            task_changes[field_name] = csv_value

                if task_changes:
                    actual_changes[task_id] = task_changes
                    records_with_changes += 1
                    total_changes += len(task_changes)

            audit_log.append(f"Records with actual changes: {records_with_changes}")
            audit_log.append(f"Total field changes detected: {total_changes}")
            audit_log.append("")

            # Log detailed changes
            if actual_changes:
                audit_log.append("DETAILED CHANGES TO APPLY TO JIRA:")
                for task_id, changes in actual_changes.items():
                    change_list = [
                        f"{field}: '{value}'" for field, value in changes.items()
                    ]
                    audit_log.append(f"- Task ID {task_id}: {', '.join(change_list)}")
                audit_log.append("")

            # 4. Update JIRA tasks with actual changes
            if actual_changes:
                audit_log.append("Step 4: Updating JIRA tasks...")

                # Convert to Task objects for the connector
                tasks_to_update = []
                for task_id, changes in actual_changes.items():
                    # Create task data structure expected by JIRA connector
                    # Build the nested fields structure that JIRA expects
                    fields = {}
                    for field_name, new_value in changes.items():
                        # Map back to JIRA field structure using task_sync_definition
                        field_config = self.task_sync_definition[field_name]
                        field_path = field_config[1]

                        # Extract the JIRA field ID from the path
                        # For "put" fields, the path is like ['fields', 'customfield_10067']
                        if len(field_path) >= 2 and field_path[0] == "fields":
                            jira_field_id = field_path[1]
                            # Parse date fields back to JIRA format
                            if self._is_date_field(field_name) and new_value:
                                parsed_date = self._parse_date_from_excel(str(new_value))
                                if parsed_date:
                                    fields[jira_field_id] = parsed_date
                                else:
                                    fields[jira_field_id] = new_value
                            else:
                                fields[jira_field_id] = new_value

                    if fields:
                        task_data = {"ID": task_id, "fields": fields}
                        task = Task(data=task_data)
                        tasks_to_update.append(task)

                # Write tasks back to JIRA
                try:
                    await self.connector_ticket.write_tasks(tasks_to_update)
                    audit_log.append(
                        f"Successfully updated {len(tasks_to_update)} JIRA tasks"
                    )
                except Exception as e:
                    audit_log.append(f"Failed to update JIRA tasks: {str(e)}")
                    raise
            else:
                audit_log.append("Step 4: No changes to apply to JIRA")
            audit_log.append("")

            # Success summary
            end_time = get_utc_now()
            duration = (end_time - start_time).total_seconds()
            audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===")
            audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
            audit_log.append(f"Duration: {duration:.2f} seconds")
            audit_log.append(f"Total CSV records processed: {len(csv_data)}")
            audit_log.append(f"Records with actual changes: {records_with_changes}")
            audit_log.append(f"JIRA tasks updated: {len(actual_changes)}")

        except Exception as e:
            # Error handling
            end_time = get_utc_now()
            duration = (end_time - start_time).total_seconds()
            audit_log.append("")
            audit_log.append("=== SYNC FAILED ===")
            audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
            audit_log.append(f"Duration before failure: {duration:.2f} seconds")
            audit_log.append(f"Error: {str(e)}")
            raise
        finally:
            # Write audit log to SharePoint
            await self._write_audit_log(audit_log, "csv_to_jira")

    async def sync_from_jira_to_excel(self):
        """Syncs tasks from JIRA to an Excel file in SharePoint."""
        start_time = get_utc_now()
        audit_log = []

        audit_log.append("=== JIRA TO EXCEL SYNC STARTED ===")
        audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
        audit_log.append(f"Sync File: {self.sync_file}")
        audit_log.append(f"Sync Folder: {self.sync_folder}")
        audit_log.append("")

        try:
            # 1. Read JIRA tickets
            audit_log.append("Step 1: Reading JIRA tickets...")
            tickets = await self.connector_ticket.read_tasks(limit=0)
            audit_log.append(f"JIRA issues read: {len(tickets)}")
            audit_log.append("")

            # 2. Transform tasks according to task_sync_definition
            audit_log.append("Step 2: Transforming JIRA data...")
            transformed_tasks = self._transform_tasks(tickets, include_put=True)
            jira_data = [task.data for task in transformed_tasks]
            audit_log.append(f"JIRA issues transformed: {len(jira_data)}")
            audit_log.append("")

            # 3. Create JIRA export file in audit folder
            audit_log.append("Step 3: Creating JIRA export file...")
            try:
                timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S")
                jira_export_filename = f"jira_export_{timestamp}.xlsx"
                # Use default headers for JIRA export
                jira_export_content = self._create_excel_content(jira_data, {"header1": "JIRA Export", "header2": "Raw Data"})
                await self.connector_sharepoint.upload_file(
                    site_id=self.site_id,
                    folder_path=self.audit_folder,
                    file_name=jira_export_filename,
                    content=jira_export_content,
                )
                audit_log.append(f"JIRA export file created: {jira_export_filename}")
            except Exception as e:
                audit_log.append(f"Failed to create JIRA export file: {str(e)}")
            audit_log.append("")

            # 4. Create backup of existing Excel file (if it exists)
            audit_log.append("Step 4: Creating backup...")
            backup_created = False
            try:
                await self.create_backup()
                backup_created = True
                audit_log.append("Backup created successfully")
            except Exception as e:
                audit_log.append(
                    f"Backup creation failed (file might not exist): {str(e)}"
                )
            audit_log.append("")

            # 5. Try to read existing Excel file from SharePoint
            audit_log.append("Step 5: Reading existing Excel file...")
            existing_data = []
            existing_file_found = False
            existing_headers = {"header1": "Header 1", "header2": "Header 2"}
            try:
                file_path = f"{self.sync_folder}/{self.sync_file}"
                excel_content = await self.connector_sharepoint.download_file_by_path(
                    site_id=self.site_id, file_path=file_path
                )

                # Parse Excel file with 4-row structure
                existing_data, existing_headers = self._parse_excel_content(excel_content)
                existing_file_found = True
                audit_log.append(
                    f"Existing Excel file found with {len(existing_data)} records"
                )
                audit_log.append(f"Preserved headers: Header1='{existing_headers['header1']}', Header2='{existing_headers['header2']}'")
            except Exception as e:
                audit_log.append(f"No existing Excel file found or read error: {str(e)}")
            audit_log.append("")

            # 6. Merge JIRA data with existing data and track changes
            audit_log.append("Step 6: Merging JIRA data with existing data...")
            merged_data, change_details = self._merge_jira_with_existing_detailed(
                jira_data, existing_data
            )

            # Log detailed changes
            audit_log.append(f"Total records after merge: {len(merged_data)}")
            audit_log.append(f"Records updated: {change_details['updated']}")
            audit_log.append(f"Records added: {change_details['added']}")
            audit_log.append(f"Records unchanged: {change_details['unchanged']}")
            audit_log.append("")

            # Log individual changes
            if change_details["changes"]:
                audit_log.append("DETAILED CHANGES:")
                for change in change_details["changes"]:
                    audit_log.append(f"- {change}")
                audit_log.append("")

            # 7. Create Excel with 4-row structure and write to SharePoint
            audit_log.append("Step 7: Writing updated Excel to SharePoint...")
            # Ensure no records without ID are written
            merged_data = self._filter_empty_records(merged_data)
            excel_content = self._create_excel_content(merged_data, existing_headers)
            await self.connector_sharepoint.upload_file(
                site_id=self.site_id,
                folder_path=self.sync_folder,
                file_name=self.sync_file,
                content=excel_content,
            )
            audit_log.append("Excel file successfully written to SharePoint")
            audit_log.append("")

            # Success summary
            end_time = get_utc_now()
            duration = (end_time - start_time).total_seconds()
            audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===")
            audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
            audit_log.append(f"Duration: {duration:.2f} seconds")
            audit_log.append(f"Total JIRA issues processed: {len(jira_data)}")
            audit_log.append(f"Total records in final Excel: {len(merged_data)}")

        except Exception as e:
            # Error handling
            end_time = get_utc_now()
            duration = (end_time - start_time).total_seconds()
            audit_log.append("")
            audit_log.append("=== SYNC FAILED ===")
            audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
            audit_log.append(f"Duration before failure: {duration:.2f} seconds")
            audit_log.append(f"Error: {str(e)}")
            raise
        finally:
            # Write audit log to SharePoint
            await self._write_audit_log(audit_log, "jira_to_excel")

    async def sync_from_excel_to_jira(self):
        """Syncs tasks from an Excel file in SharePoint to JIRA."""
        start_time = get_utc_now()
        audit_log = []

        audit_log.append("=== EXCEL TO JIRA SYNC STARTED ===")
        audit_log.append(f"Start Time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
        audit_log.append(f"Sync File: {self.sync_file}")
        audit_log.append(f"Sync Folder: {self.sync_folder}")
        audit_log.append("")

        try:
            # 1. Read Excel file from SharePoint
            audit_log.append("Step 1: Reading Excel file from SharePoint...")
            try:
                file_path = f"{self.sync_folder}/{self.sync_file}"
                excel_content = await self.connector_sharepoint.download_file_by_path(
                    site_id=self.site_id, file_path=file_path
                )
                # Parse Excel file with 4-row structure
                excel_data, _ = self._parse_excel_content(excel_content)
                audit_log.append(
                    f"Excel file read successfully with {len(excel_data)} records"
                )
            except Exception as e:
                audit_log.append(f"Failed to read Excel file: {str(e)}")
                audit_log.append("Excel to JIRA sync aborted - no file to process")
                return
            audit_log.append("")

            # 2. Read current JIRA data for comparison
            audit_log.append("Step 2: Reading current JIRA data for comparison...")
            try:
                current_jira_tasks = await self.connector_ticket.read_tasks(limit=0)
                current_jira_data = self._transform_tasks(
                    current_jira_tasks, include_put=True
                )
                jira_lookup = {
                    task.data.get("ID"): task.data for task in current_jira_data
                }
                audit_log.append(f"Current JIRA data read: {len(jira_lookup)} tasks")
            except Exception as e:
                audit_log.append(f"Failed to read current JIRA data: {str(e)}")
                raise
            audit_log.append("")

            # 3. Detect actual changes in "put" fields
            audit_log.append("Step 3: Detecting changes in 'put' fields...")
            actual_changes = {}
            records_with_changes = 0
            total_changes = 0

            for row in excel_data:
                task_id = row.get("ID")
                if not task_id or task_id not in jira_lookup:
                    continue

                current_jira_task = jira_lookup[task_id]
                task_changes = {}

                for field_name, field_config in self.task_sync_definition.items():
                    if field_config[0] == "put":  # Only process "put" fields
                        excel_value = row.get(field_name, "")
                        jira_value = current_jira_task.get(field_name, "")

                        # Convert None to empty string for comparison
                        excel_value = "" if excel_value is None else str(excel_value).strip()
                        jira_value = (
                            "" if jira_value is None else str(jira_value).strip()
                        )

                        # Include if values are different (allow empty strings to clear fields like the reference does)
                        if excel_value != jira_value:
                            task_changes[field_name] = excel_value

                if task_changes:
                    actual_changes[task_id] = task_changes
                    records_with_changes += 1
                    total_changes += len(task_changes)

            audit_log.append(f"Records with actual changes: {records_with_changes}")
            audit_log.append(f"Total field changes detected: {total_changes}")
            audit_log.append("")

            # Log detailed changes
            if actual_changes:
                audit_log.append("DETAILED CHANGES TO APPLY TO JIRA:")
                for task_id, changes in actual_changes.items():
                    change_list = [
                        f"{field}: '{value}'" for field, value in changes.items()
                    ]
                    audit_log.append(f"- Task ID {task_id}: {', '.join(change_list)}")
                audit_log.append("")

            # 4. Update JIRA tasks with actual changes
            if actual_changes:
                audit_log.append("Step 4: Updating JIRA tasks...")

                # Convert to Task objects for the connector
                tasks_to_update = []
                for task_id, changes in actual_changes.items():
                    # Create task data structure expected by JIRA connector
                    # Build the nested fields structure that JIRA expects
                    fields = {}
                    for field_name, new_value in changes.items():
                        # Map back to JIRA field structure using task_sync_definition
                        field_config = self.task_sync_definition[field_name]
                        field_path = field_config[1]

                        # Extract the JIRA field ID from the path
                        # For "put" fields, the path is like ['fields', 'customfield_10067']
                        if len(field_path) >= 2 and field_path[0] == "fields":
                            jira_field_id = field_path[1]
                            # Parse date fields back to JIRA format
                            if self._is_date_field(field_name) and new_value:
                                parsed_date = self._parse_date_from_excel(str(new_value))
                                if parsed_date:
                                    fields[jira_field_id] = parsed_date
                                else:
                                    fields[jira_field_id] = new_value
                            else:
                                fields[jira_field_id] = new_value

                    if fields:
                        task_data = {"ID": task_id, "fields": fields}
                        task = Task(data=task_data)
                        tasks_to_update.append(task)

                # Write tasks back to JIRA
                try:
                    await self.connector_ticket.write_tasks(tasks_to_update)
                    audit_log.append(
                        f"Successfully updated {len(tasks_to_update)} JIRA tasks"
                    )
                except Exception as e:
                    audit_log.append(f"Failed to update JIRA tasks: {str(e)}")
                    raise
            else:
                audit_log.append("Step 4: No changes to apply to JIRA")
            audit_log.append("")

            # Success summary
            end_time = get_utc_now()
            duration = (end_time - start_time).total_seconds()
            audit_log.append("=== SYNC COMPLETED SUCCESSFULLY ===")
            audit_log.append(f"End Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
            audit_log.append(f"Duration: {duration:.2f} seconds")
            audit_log.append(f"Total Excel records processed: {len(excel_data)}")
            audit_log.append(f"Records with actual changes: {records_with_changes}")
            audit_log.append(f"JIRA tasks updated: {len(actual_changes)}")

        except Exception as e:
            # Error handling
            end_time = get_utc_now()
            duration = (end_time - start_time).total_seconds()
            audit_log.append("")
            audit_log.append("=== SYNC FAILED ===")
            audit_log.append(f"Error Time: {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
            audit_log.append(f"Duration before failure: {duration:.2f} seconds")
            audit_log.append(f"Error: {str(e)}")
            raise
        finally:
            # Write audit log to SharePoint
            await self._write_audit_log(audit_log, "excel_to_jira")

    def _transform_tasks(
        self, tasks: list[Task], include_put: bool = False
        ) -> list[Task]:
        """Transforms tasks according to the task_sync_definition."""
        transformed_tasks = []

        for task in tasks:
            transformed_data = {}

            # Process each field in the sync definition
            for field_name, field_config in self.task_sync_definition.items():
                direction = field_config[0]  # "get" or "put"
                field_path = field_config[1]  # List of keys to navigate

                # Get the right fields
                if direction == "get" or include_put:
                    # Extract value using the field path
                    value = self._extract_field_value(task.data, field_path, field_name)
                    transformed_data[field_name] = value

            # Create new Task with transformed data
            transformed_task = Task(data=transformed_data)
            transformed_tasks.append(transformed_task)

        return transformed_tasks

    def _extract_field_value(self, issue_data: dict, field_path: list[str], field_name: str = None) -> Any:
        """Extract field value from JIRA issue data using field path."""
        value = issue_data
        try:
            for key in field_path:
                if value is not None:
                    value = value[key]

            if value is None:
                return None

            # Handle complex objects that have a 'value' field (like custom field options)
            if isinstance(value, dict) and "value" in value:
                value = value["value"]
            # Handle lists of objects with 'value' fields
            elif (
                isinstance(value, list)
                and len(value) > 0
                and isinstance(value[0], dict)
                and "value" in value[0]
            ):
                value = value[0]["value"]

            # Apply ADF conversion for specific fields that contain ADF content
            if isinstance(value, dict) and value.get("type") == "doc":
                value = self._convert_adf_to_text(value)

            # Apply date formatting for date fields
            if field_name and self._is_date_field(field_name):
                value = self._format_date_for_excel(value)

            return value
        except (KeyError, TypeError):
            return None

    def _convert_adf_to_text(self, adf_data):
        """Convert Atlassian Document Format (ADF) to plain text.

        Based on Atlassian Document Format specification for JIRA fields.
        Handles paragraphs, lists, text formatting, and other ADF node types.

        Args:
            adf_data: ADF object or None

        Returns:
            str: Plain text content, or empty string if None/invalid
        """
        if not adf_data or not isinstance(adf_data, dict):
            return ""

        if adf_data.get("type") != "doc":
            return str(adf_data) if adf_data else ""

        content = adf_data.get("content", [])
        if not isinstance(content, list):
            return ""

        def extract_text_from_content(content_list, list_level=0):
            """Recursively extract text from ADF content with proper formatting."""
            text_parts = []
            list_counter = 1

            for item in content_list:
                if not isinstance(item, dict):
                    continue

                item_type = item.get("type", "")

                if item_type == "text":
                    # Extract text content, preserving formatting
                    text = item.get("text", "")
                    marks = item.get("marks", [])

                    # Handle text formatting (bold, italic, etc.)
                    if marks:
                        for mark in marks:
                            if mark.get("type") == "strong":
                                text = f"**{text}**"
                            elif mark.get("type") == "em":
                                text = f"*{text}*"
                            elif mark.get("type") == "code":
                                text = f"`{text}`"
                            elif mark.get("type") == "link":
                                attrs = mark.get("attrs", {})
                                href = attrs.get("href", "")
                                if href:
                                    text = f"[{text}]({href})"

                    text_parts.append(text)

                elif item_type == "hardBreak":
                    text_parts.append("\n")

                elif item_type == "paragraph":
                    paragraph_content = item.get("content", [])
                    if paragraph_content:
                        paragraph_text = extract_text_from_content(paragraph_content, list_level)
                        if paragraph_text.strip():
                            text_parts.append(paragraph_text)

                elif item_type == "bulletList":
                    list_content = item.get("content", [])
                    for list_item in list_content:
                        if list_item.get("type") == "listItem":
                            list_item_content = list_item.get("content", [])
                            for list_paragraph in list_item_content:
                                if list_paragraph.get("type") == "paragraph":
                                    list_paragraph_content = list_paragraph.get("content", [])
                                    if list_paragraph_content:
                                        indent = "  " * list_level
                                        bullet_text = extract_text_from_content(list_paragraph_content, list_level + 1)
                                        if bullet_text.strip():
                                            text_parts.append(f"{indent}• {bullet_text}")

                elif item_type == "orderedList":
                    list_content = item.get("content", [])
                    for list_item in list_content:
                        if list_item.get("type") == "listItem":
                            list_item_content = list_item.get("content", [])
                            for list_paragraph in list_item_content:
                                if list_paragraph.get("type") == "paragraph":
                                    list_paragraph_content = list_paragraph.get("content", [])
                                    if list_paragraph_content:
                                        indent = "  " * list_level
                                        ordered_text = extract_text_from_content(list_paragraph_content, list_level + 1)
                                        if ordered_text.strip():
                                            text_parts.append(f"{indent}{list_counter}. {ordered_text}")
                                            list_counter += 1

                elif item_type == "listItem":
                    # Handle nested list items
                    list_item_content = item.get("content", [])
                    if list_item_content:
                        text_parts.append(extract_text_from_content(list_item_content, list_level))

                elif item_type == "embedCard":
                    # Handle embedded content (videos, etc.)
                    attrs = item.get("attrs", {})
                    url = attrs.get("url", "")
                    if url:
                        text_parts.append(f"[Embedded Content: {url}]")

                elif item_type == "codeBlock":
                    # Handle code blocks
                    code_content = item.get("content", [])
                    if code_content:
                        code_text = extract_text_from_content(code_content, list_level)
                        if code_text.strip():
                            text_parts.append(f"```\n{code_text}\n```")

                elif item_type == "blockquote":
                    # Handle blockquotes
                    quote_content = item.get("content", [])
                    if quote_content:
                        quote_text = extract_text_from_content(quote_content, list_level)
                        if quote_text.strip():
                            text_parts.append(f"> {quote_text}")

                elif item_type == "heading":
                    # Handle headings
                    heading_content = item.get("content", [])
                    if heading_content:
                        heading_text = extract_text_from_content(heading_content, list_level)
                        if heading_text.strip():
                            level = item.get("attrs", {}).get("level", 1)
                            text_parts.append(f"{'#' * level} {heading_text}")

                elif item_type == "rule":
                    # Handle horizontal rules
                    text_parts.append("---")

                else:
                    # Handle unknown types by trying to extract content
                    if "content" in item:
                        content_text = extract_text_from_content(item.get("content", []), list_level)
                        if content_text.strip():
                            text_parts.append(content_text)

            return "\n".join(text_parts)

        result = extract_text_from_content(content)
        return result.strip()

    def _format_date_for_excel(self, date_value: Any) -> Optional[str]:
        """Format date value for Excel export.

        Handles various date formats from JIRA and converts them to a consistent format
        suitable for Excel display.

        Args:
            date_value: Date value from JIRA (string, datetime, or None)

        Returns:
            Formatted date string or None if invalid/empty
        """
        if not date_value:
            return None

        try:
            # Handle ISO 8601 strings (JIRA format: 2025-09-16T12:33:10.044+0200)
            if isinstance(date_value, str):
                # Parse ISO format with timezone
                if 'T' in date_value and ('+' in date_value or 'Z' in date_value):
                    dt = datetime.fromisoformat(date_value.replace('Z', '+00:00'))
                    # Convert to UTC for consistency
                    if dt.tzinfo:
                        dt = dt.astimezone(timezone.utc)
                    return dt.strftime('%Y-%m-%d %H:%M:%S UTC')
                # Handle simple date strings
                elif len(date_value) == 10 and date_value.count('-') == 2:
                    dt = datetime.strptime(date_value, '%Y-%m-%d')
                    return dt.strftime('%Y-%m-%d')
                else:
                    # Try to parse as datetime
                    dt = datetime.fromisoformat(date_value)
                    return dt.strftime('%Y-%m-%d %H:%M:%S')

            # Handle datetime objects
            elif isinstance(date_value, datetime):
                if date_value.tzinfo:
                    dt = date_value.astimezone(timezone.utc)
                else:
                    dt = date_value
                return dt.strftime('%Y-%m-%d %H:%M:%S UTC')

            return str(date_value)

        except (ValueError, TypeError) as e:
            # Log error but don't fail the sync
            return str(date_value) if date_value else None

    def _parse_date_from_excel(self, date_string: str) -> Optional[str]:
        """Parse date string from Excel and convert to JIRA format.

        Converts Excel date strings back to JIRA-compatible ISO format.

        Args:
            date_string: Date string from Excel

        Returns:
            ISO formatted date string for JIRA or None if invalid
        """
        if not date_string or not isinstance(date_string, str):
            return None

        try:
            # Handle various Excel date formats
            date_string = date_string.strip()

            # Try common Excel date formats
            formats_to_try = [
                '%Y-%m-%d %H:%M:%S UTC',  # Our export format
                '%Y-%m-%d %H:%M:%S',      # Standard format
                '%Y-%m-%d',               # Date only
                '%d.%m.%Y',               # German format
                '%m/%d/%Y',               # US format
                '%d/%m/%Y',               # European format
            ]

            for fmt in formats_to_try:
                try:
                    dt = datetime.strptime(date_string, fmt)
                    # Convert to UTC and format as ISO
                    if dt.tzinfo is None:
                        dt = dt.replace(tzinfo=timezone.utc)
                    return dt.isoformat()
                except ValueError:
                    continue

            # If no format matches, try pandas parsing
            try:
                dt = pd.to_datetime(date_string)
                if hasattr(dt, 'to_pydatetime'):
                    dt = dt.to_pydatetime()
                if dt.tzinfo is None:
                    dt = dt.replace(tzinfo=timezone.utc)
                return dt.isoformat()
            except:
                pass

            return None

        except Exception:
            return None

    def _is_date_field(self, field_name: str) -> bool:
        """Check if a field is a date field based on its name.

        Args:
            field_name: Name of the field

        Returns:
            True if field is likely a date field
        """
        date_keywords = ['date', 'time', 'created', 'updated', 'due', 'deadline']
        return any(keyword in field_name.lower() for keyword in date_keywords)

    def _filter_empty_records(self, records: list[dict]) -> list[dict]:
        """Remove records that are missing an ID.

        Purposefully only filter by presence of 'ID' to avoid dropping
        valid rows with many empty optional fields.
        """
        filtered: list[dict] = []
        for row in records:
            if isinstance(row, dict) and row.get("ID"):
                filtered.append(row)
        return filtered

    def _merge_jira_with_existing(
        self, jira_data: list[dict], existing_data: list[dict]
    ) -> list[dict]:
        """Merge JIRA data with existing CSV data, updating only 'get' fields."""
        # Create a lookup for existing data by ID
        existing_lookup = {row.get("ID"): row for row in existing_data if row.get("ID")}

        merged_data = []
        for jira_row in jira_data:
            jira_id = jira_row.get("ID")
            if jira_id and jira_id in existing_lookup:
                # Update existing row with JIRA data (only 'get' fields)
                existing_row = existing_lookup[jira_id].copy()
                for field_name, field_config in self.task_sync_definition.items():
                    if field_config[0] == "get":  # Only update 'get' fields
                        existing_row[field_name] = jira_row.get(field_name)
                merged_data.append(existing_row)
                # Remove from lookup to track processed items
                del existing_lookup[jira_id]
            else:
                # New row from JIRA
                merged_data.append(jira_row)

        # Add any remaining existing rows that weren't in JIRA data
        merged_data.extend(existing_lookup.values())

        return merged_data

    def _merge_jira_with_existing_detailed(
        self, jira_data: list[dict], existing_data: list[dict]
    ) -> tuple[list[dict], dict]:
        """Merge JIRA data with existing CSV data and track detailed changes."""
        # Create a lookup for existing data by ID
        existing_lookup = {row.get("ID"): row for row in existing_data if row.get("ID")}

        merged_data = []
        changes = []
        updated_count = 0
        added_count = 0
        unchanged_count = 0

        for jira_row in jira_data:
            jira_id = jira_row.get("ID")
            if jira_id and jira_id in existing_lookup:
                # Update existing row with JIRA data (only 'get' fields)
                existing_row = existing_lookup[jira_id].copy()
                row_changes = []

                for field_name, field_config in self.task_sync_definition.items():
                    if field_config[0] == "get":  # Only update 'get' fields
                        old_value = existing_row.get(field_name, "")
                        new_value = jira_row.get(field_name, "")

                        # Convert None to empty string for comparison
                        old_value = "" if old_value is None else str(old_value)
                        new_value = "" if new_value is None else str(new_value)

                        if old_value != new_value:
                            row_changes.append(
                                f"{field_name}: '{old_value}' → '{new_value}'"
                            )

                        existing_row[field_name] = jira_row.get(field_name)

                merged_data.append(existing_row)

                if row_changes:
                    updated_count += 1
                    changes.append(
                        f"Row ID {jira_id} updated: {', '.join(row_changes)}"
                    )
                else:
                    unchanged_count += 1

                # Remove from lookup to track processed items
                del existing_lookup[jira_id]
            else:
                # New row from JIRA
                merged_data.append(jira_row)
                added_count += 1
                changes.append(f"Row ID {jira_id} added as new record")

        # Add any remaining existing rows that weren't in JIRA data
        for remaining_row in existing_lookup.values():
            merged_data.append(remaining_row)
            unchanged_count += 1

        change_details = {
            "updated": updated_count,
            "added": added_count,
            "unchanged": unchanged_count,
            "changes": changes,
        }

        return merged_data, change_details

    async def _write_audit_log(self, audit_log: list[str], operation_type: str):
        """Write audit log to SharePoint."""
        try:
            timestamp = get_utc_now().strftime("%Y%m%d_%H%M%S")
            audit_filename = f"audit_{operation_type}_{timestamp}.log"

            # Convert audit log to bytes
            audit_content = "\n".join(audit_log).encode("utf-8")

            # Debug logging
            import logging
            logger = logging.getLogger(__name__)
            logger.debug(f"Writing audit log to folder: {self.audit_folder}, file: {audit_filename}")

            # Write to SharePoint
            await self.connector_sharepoint.upload_file(
                site_id=self.site_id,
                folder_path=self.audit_folder,
                file_name=audit_filename,
                content=audit_content,
            )
            logger.debug("Audit log written successfully")
        except Exception as e:
            # If audit logging fails, we don't want to break the main sync process
            # Just log the error (this could be enhanced with fallback logging)
            import logging
            logger = logging.getLogger(__name__)
            logger.warning(f"Failed to write audit log: {str(e)}")
            logger.warning(f"Audit folder: {self.audit_folder}")
            logger.warning(f"Operation type: {operation_type}")
            import traceback
            logger.warning(f"Traceback: {traceback.format_exc()}")

    def _create_csv_content(self, data: list[dict], existing_headers: dict = None) -> bytes:
        """Create CSV content with 4-row structure matching reference code."""
        # Get current timestamp for header
        timestamp = get_utc_now().strftime("%Y-%m-%d %H:%M:%S UTC")

        # Use existing headers if provided, otherwise use defaults
        if existing_headers is None:
            existing_headers = {"header1": "Header 1", "header2": "Header 2"}

        if not data:
            # Build an empty table with the expected columns from schema
            cols = list(self.task_sync_definition.keys())

            df = pd.DataFrame(columns=cols)

            # Parse existing headers to extract individual columns
            import csv as csv_module
            header1_text = existing_headers.get("header1", "Header 1")
            header2_text = existing_headers.get("header2", "Header 2")

            # Parse the existing header rows
            header1_reader = csv_module.reader([header1_text])
            header2_reader = csv_module.reader([header2_text])
            header1_row = next(header1_reader, [])
            header2_row = next(header2_reader, [])

            # Row 1: Use existing header1 or default
            if len(header1_row) >= len(cols):
                header_row1_data = header1_row[:len(cols)]
            else:
                header_row1_data = header1_row + [""] * (len(cols) - len(header1_row))
            header_row1 = pd.DataFrame([header_row1_data], columns=cols)

            # Row 2: Use existing header2 and add timestamp to second column
            if len(header2_row) >= len(cols):
                header_row2_data = header2_row[:len(cols)]
            else:
                header_row2_data = header2_row + [""] * (len(cols) - len(header2_row))
            if len(header_row2_data) > 1:
                header_row2_data[1] = timestamp
            header_row2 = pd.DataFrame([header_row2_data], columns=cols)

            # Row 3: table headers
            table_headers = pd.DataFrame([cols], columns=cols)

            final_df = pd.concat(
                [header_row1, header_row2, table_headers, df], ignore_index=True
            )
            csv_text = StringIO()
            final_df.to_csv(csv_text, index=False, header=False, quoting=1, escapechar='\\')
            return csv_text.getvalue().encode("utf-8")

        # Create DataFrame from data
        df = pd.DataFrame(data)

        # Force all columns to be object (string) type to preserve empty cells
        for column in df.columns:
            df[column] = df[column].astype("object")
            df[column] = df[column].fillna("")

        # Clean data: replace actual line breaks with \n and escape quotes
        for column in df.columns:
            df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False)
            df[column] = df[column].str.replace('"', '""', regex=False)

        # Create the 4-row structure
        # Parse existing headers to extract individual columns
        import csv as csv_module
        header1_text = existing_headers.get("header1", "Header 1")
        header2_text = existing_headers.get("header2", "Header 2")

        # Parse the existing header rows
        header1_reader = csv_module.reader([header1_text])
        header2_reader = csv_module.reader([header2_text])
        header1_row = next(header1_reader, [])
        header2_row = next(header2_reader, [])

        # Row 1: Use existing header1 or default
        if len(header1_row) >= len(df.columns):
            header_row1_data = header1_row[:len(df.columns)]
        else:
            header_row1_data = header1_row + [""] * (len(df.columns) - len(header1_row))
        header_row1 = pd.DataFrame([header_row1_data], columns=df.columns)

        # Row 2: Use existing header2 and add timestamp to second column
        if len(header2_row) >= len(df.columns):
            header_row2_data = header2_row[:len(df.columns)]
        else:
            header_row2_data = header2_row + [""] * (len(df.columns) - len(header2_row))
        if len(header_row2_data) > 1:
            header_row2_data[1] = timestamp
        header_row2 = pd.DataFrame([header_row2_data], columns=df.columns)

        # Row 3: Table headers (column names)
        table_headers = pd.DataFrame([df.columns.tolist()], columns=df.columns)

        # Concatenate all rows: header1 + header2 + table_headers + data
        final_df = pd.concat(
            [header_row1, header_row2, table_headers, df], ignore_index=True
        )

        # Convert to CSV bytes with proper quoting for fields containing special characters
        csv_text = StringIO()
        final_df.to_csv(csv_text, index=False, header=False, quoting=1, escapechar='\\')
        return csv_text.getvalue().encode("utf-8")

    def _create_excel_content(self, data: list[dict], existing_headers: dict = None) -> bytes:
        """Create Excel content with 4-row structure matching reference code."""
        # Get current timestamp for header
        timestamp = get_utc_now().strftime("%Y-%m-%d %H:%M:%S UTC")

        # Use existing headers if provided, otherwise use defaults
        if existing_headers is None:
            existing_headers = {"header1": "Header 1", "header2": "Header 2"}

        if not data:
            # Build an empty table with the expected columns from schema
            cols = list(self.task_sync_definition.keys())

            df = pd.DataFrame(columns=cols)

            # Parse existing headers to extract individual columns
            import csv as csv_module
            header1_text = existing_headers.get("header1", "Header 1")
            header2_text = existing_headers.get("header2", "Header 2")

            # Parse the existing header rows
            header1_reader = csv_module.reader([header1_text])
            header2_reader = csv_module.reader([header2_text])
            header1_row = next(header1_reader, [])
            header2_row = next(header2_reader, [])

            # Row 1: Use existing header1 or default
            if len(header1_row) >= len(cols):
                header_row1_data = header1_row[:len(cols)]
            else:
                header_row1_data = header1_row + [""] * (len(cols) - len(header1_row))
            header_row1 = pd.DataFrame([header_row1_data], columns=cols)

            # Row 2: Use existing header2 and add timestamp to second column
            if len(header2_row) >= len(cols):
                header_row2_data = header2_row[:len(cols)]
            else:
                header_row2_data = header2_row + [""] * (len(cols) - len(header2_row))
            if len(header_row2_data) > 1:
                header_row2_data[1] = timestamp
            header_row2 = pd.DataFrame([header_row2_data], columns=cols)

            # Row 3: table headers
            table_headers = pd.DataFrame([cols], columns=cols)

            final_df = pd.concat(
                [header_row1, header_row2, table_headers, df], ignore_index=True
            )

            # Create Excel file in memory
            excel_buffer = BytesIO()
            final_df.to_excel(excel_buffer, index=False, header=False, engine='openpyxl')
            return excel_buffer.getvalue()

        # Create DataFrame from data
        df = pd.DataFrame(data)

        # Force all columns to be object (string) type to preserve empty cells
        for column in df.columns:
            df[column] = df[column].astype("object")
            df[column] = df[column].fillna("")

        # Clean data: replace actual line breaks with \n and escape quotes
        for column in df.columns:
            df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False)
            df[column] = df[column].str.replace('"', '""', regex=False)

        # Create the 4-row structure
        # Parse existing headers to extract individual columns
        import csv as csv_module
        header1_text = existing_headers.get("header1", "Header 1")
        header2_text = existing_headers.get("header2", "Header 2")

        # Parse the existing header rows
        header1_reader = csv_module.reader([header1_text])
        header2_reader = csv_module.reader([header2_text])
        header1_row = next(header1_reader, [])
        header2_row = next(header2_reader, [])

        # Row 1: Use existing header1 or default
        if len(header1_row) >= len(df.columns):
            header_row1_data = header1_row[:len(df.columns)]
        else:
            header_row1_data = header1_row + [""] * (len(df.columns) - len(header1_row))
        header_row1 = pd.DataFrame([header_row1_data], columns=df.columns)

        # Row 2: Use existing header2 and add timestamp to second column
        if len(header2_row) >= len(df.columns):
            header_row2_data = header2_row[:len(df.columns)]
        else:
            header_row2_data = header2_row + [""] * (len(df.columns) - len(header2_row))
        if len(header_row2_data) > 1:
            header_row2_data[1] = timestamp
        header_row2 = pd.DataFrame([header_row2_data], columns=df.columns)

        # Row 3: Table headers (column names)
        table_headers = pd.DataFrame([df.columns.tolist()], columns=df.columns)

        # Concatenate all rows: header1 + header2 + table_headers + data
        final_df = pd.concat(
            [header_row1, header_row2, table_headers, df], ignore_index=True
        )

        # Create Excel file in memory
        excel_buffer = BytesIO()
        final_df.to_excel(excel_buffer, index=False, header=False, engine='openpyxl')
        return excel_buffer.getvalue()

    def _parse_excel_content(self, excel_content: bytes) -> tuple[list[dict], dict]:
        """Parse Excel content with 4-row structure and return data and headers."""
        try:
            # Load Excel file from bytes
            df = pd.read_excel(
                BytesIO(excel_content),
                engine='openpyxl',
                header=None
            )

            # Extract the 4 parts:
            # Row 1: Static header row 1
            header_row1 = df.iloc[0:1].copy()

            # Row 2: Static header row 2
            header_row2 = df.iloc[1:2].copy()

            # Row 3: Table headers
            table_headers = df.iloc[2:3].copy()

            # Row 4+: Data rows
            df_data = df.iloc[3:].copy()
            # Set column names from row 3
            df_data.columns = table_headers.iloc[0]
            # Reset index to start from 0
            df_data = df_data.reset_index(drop=True)

            # Force all columns to be object (string) type and handle NaN values
            for column in df_data.columns:
                df_data[column] = df_data[column].astype('object')
                # Fill NaN values with empty string to keep cells empty
                df_data[column] = df_data[column].fillna('')

            # Convert DataFrame to list of dictionaries
            data = df_data.to_dict(orient='records')

            # Extract headers as strings (like CSV version)
            headers = {
                "header1": ",".join([str(x) if pd.notna(x) else "" for x in header_row1.iloc[0].tolist()]),
                "header2": ",".join([str(x) if pd.notna(x) else "" for x in header_row2.iloc[0].tolist()])
            }

            return data, headers

        except Exception as e:
            raise Exception(f"Failed to parse Excel content: {str(e)}")