full delta attributes parsing

2025-09-16 22:20:31 +02:00 · 2025-09-16 22:20:31 +02:00 · 598e137e8a
commit 598e137e8a
parent 72c13ffe12
6 changed files with 26758 additions and 1519 deletions
--- a/delta_sync_data.txt
+++ b/delta_sync_data.txt
--- a/jira_debug/search_response_0.json
+++ b/jira_debug/search_response_0.json
--- a/jira_debug/search_response_ChkjU3RyaW5nJlJF.json
+++ b/jira_debug/search_response_ChkjU3RyaW5nJlJF.json
--- a/modules/connectors/connectorTicketJira.py
+++ b/modules/connectors/connectorTicketJira.py
@ -68,8 +68,8 @@ class ConnectorTicketJira(TicketBase):
                headers = {"Content-Type": "application/json"}
                payload = {
                    "jql": jql_query, 
-                    "maxResults": 1,
-                    "fields": ["summary", "status", "assignee", "created", "updated", "priority", "issuetype", "project", "customfield_10168", "customfield_10067", "customfield_10065"]
+                    "maxResults": 1
+                    # Don't specify fields to get all available fields
                }
                async with session.post(url, json=payload, auth=auth, headers=headers) as response:
                    if response.status != 200:
@ -187,7 +187,7 @@ class ConnectorTicketJira(TicketBase):
                    payload = {
                        "jql": jql_query,
                        "maxResults": max_results,
-                        "fields": ["summary", "status", "assignee", "created", "updated", "priority", "issuetype", "project", "customfield_10168", "customfield_10067", "customfield_10065"]
+                        "fields": ["*all"]  # Get all fields
                    }
                    if next_page_token:
                        # For subsequent pages, include BOTH jql and nextPageToken
--- a/modules/interfaces/interfaceTicketObjects.py
+++ b/modules/interfaces/interfaceTicketObjects.py
@ -1,6 +1,7 @@
 from dataclasses import dataclass
 from io import BytesIO, StringIO
-from typing import Any
+from typing import Any, Optional
+from datetime import datetime, timezone
 import pandas as pd
 import openpyxl
 from modules.shared.timezoneUtils import get_utc_now
@ -345,6 +346,14 @@ class TicketSharepointSyncInterface:
                        # For "put" fields, the path is like ['fields', 'customfield_10067']
                        if len(field_path) >= 2 and field_path[0] == "fields":
                            jira_field_id = field_path[1]
+                            # Parse date fields back to JIRA format
+                            if self._is_date_field(field_name) and new_value:
+                                parsed_date = self._parse_date_from_excel(str(new_value))
+                                if parsed_date:
+                                    fields[jira_field_id] = parsed_date
+                                else:
+                                    fields[jira_field_id] = new_value
+                            else:
                                fields[jira_field_id] = new_value

                    if fields:
@ -637,6 +646,14 @@ class TicketSharepointSyncInterface:
                        # For "put" fields, the path is like ['fields', 'customfield_10067']
                        if len(field_path) >= 2 and field_path[0] == "fields":
                            jira_field_id = field_path[1]
+                            # Parse date fields back to JIRA format
+                            if self._is_date_field(field_name) and new_value:
+                                parsed_date = self._parse_date_from_excel(str(new_value))
+                                if parsed_date:
+                                    fields[jira_field_id] = parsed_date
+                                else:
+                                    fields[jira_field_id] = new_value
+                            else:
                                fields[jira_field_id] = new_value

                    if fields:
@ -698,7 +715,7 @@ class TicketSharepointSyncInterface:
                # Get the right fields
                if direction == "get" or include_put:
                    # Extract value using the field path
-                    value = self._extract_field_value(task.data, field_path)
+                    value = self._extract_field_value(task.data, field_path, field_name)
                    transformed_data[field_name] = value

            # Create new Task with transformed data
@ -707,7 +724,7 @@ class TicketSharepointSyncInterface:

        return transformed_tasks

-    def _extract_field_value(self, issue_data: dict, field_path: list[str]) -> Any:
+    def _extract_field_value(self, issue_data: dict, field_path: list[str], field_name: str = None) -> Any:
        """Extract field value from JIRA issue data using field path."""
        value = issue_data
        try:
@ -730,10 +747,280 @@ class TicketSharepointSyncInterface:
            ):
                value = value[0]["value"]

+            # Apply ADF conversion for specific fields that contain ADF content
+            if isinstance(value, dict) and value.get("type") == "doc":
+                value = self._convert_adf_to_text(value)
+            
+            # Apply date formatting for date fields
+            if field_name and self._is_date_field(field_name):
+                value = self._format_date_for_excel(value)
+
            return value
        except (KeyError, TypeError):
            return None

+    def _convert_adf_to_text(self, adf_data):
+        """Convert Atlassian Document Format (ADF) to plain text.
+        
+        Based on Atlassian Document Format specification for JIRA fields.
+        Handles paragraphs, lists, text formatting, and other ADF node types.
+        
+        Args:
+            adf_data: ADF object or None
+            
+        Returns:
+            str: Plain text content, or empty string if None/invalid
+        """
+        if not adf_data or not isinstance(adf_data, dict):
+            return ""
+        
+        if adf_data.get("type") != "doc":
+            return str(adf_data) if adf_data else ""
+        
+        content = adf_data.get("content", [])
+        if not isinstance(content, list):
+            return ""
+        
+        def extract_text_from_content(content_list, list_level=0):
+            """Recursively extract text from ADF content with proper formatting."""
+            text_parts = []
+            list_counter = 1
+            
+            for item in content_list:
+                if not isinstance(item, dict):
+                    continue
+                    
+                item_type = item.get("type", "")
+                
+                if item_type == "text":
+                    # Extract text content, preserving formatting
+                    text = item.get("text", "")
+                    marks = item.get("marks", [])
+                    
+                    # Handle text formatting (bold, italic, etc.)
+                    if marks:
+                        for mark in marks:
+                            if mark.get("type") == "strong":
+                                text = f"**{text}**"
+                            elif mark.get("type") == "em":
+                                text = f"*{text}*"
+                            elif mark.get("type") == "code":
+                                text = f"`{text}`"
+                            elif mark.get("type") == "link":
+                                attrs = mark.get("attrs", {})
+                                href = attrs.get("href", "")
+                                if href:
+                                    text = f"[{text}]({href})"
+                    
+                    text_parts.append(text)
+                    
+                elif item_type == "hardBreak":
+                    text_parts.append("\n")
+                    
+                elif item_type == "paragraph":
+                    paragraph_content = item.get("content", [])
+                    if paragraph_content:
+                        paragraph_text = extract_text_from_content(paragraph_content, list_level)
+                        if paragraph_text.strip():
+                            text_parts.append(paragraph_text)
+                            
+                elif item_type == "bulletList":
+                    list_content = item.get("content", [])
+                    for list_item in list_content:
+                        if list_item.get("type") == "listItem":
+                            list_item_content = list_item.get("content", [])
+                            for list_paragraph in list_item_content:
+                                if list_paragraph.get("type") == "paragraph":
+                                    list_paragraph_content = list_paragraph.get("content", [])
+                                    if list_paragraph_content:
+                                        indent = "  " * list_level
+                                        bullet_text = extract_text_from_content(list_paragraph_content, list_level + 1)
+                                        if bullet_text.strip():
+                                            text_parts.append(f"{indent}• {bullet_text}")
+                                            
+                elif item_type == "orderedList":
+                    list_content = item.get("content", [])
+                    for list_item in list_content:
+                        if list_item.get("type") == "listItem":
+                            list_item_content = list_item.get("content", [])
+                            for list_paragraph in list_item_content:
+                                if list_paragraph.get("type") == "paragraph":
+                                    list_paragraph_content = list_paragraph.get("content", [])
+                                    if list_paragraph_content:
+                                        indent = "  " * list_level
+                                        ordered_text = extract_text_from_content(list_paragraph_content, list_level + 1)
+                                        if ordered_text.strip():
+                                            text_parts.append(f"{indent}{list_counter}. {ordered_text}")
+                                            list_counter += 1
+                                            
+                elif item_type == "listItem":
+                    # Handle nested list items
+                    list_item_content = item.get("content", [])
+                    if list_item_content:
+                        text_parts.append(extract_text_from_content(list_item_content, list_level))
+                        
+                elif item_type == "embedCard":
+                    # Handle embedded content (videos, etc.)
+                    attrs = item.get("attrs", {})
+                    url = attrs.get("url", "")
+                    if url:
+                        text_parts.append(f"[Embedded Content: {url}]")
+                        
+                elif item_type == "codeBlock":
+                    # Handle code blocks
+                    code_content = item.get("content", [])
+                    if code_content:
+                        code_text = extract_text_from_content(code_content, list_level)
+                        if code_text.strip():
+                            text_parts.append(f"```\n{code_text}\n```")
+                            
+                elif item_type == "blockquote":
+                    # Handle blockquotes
+                    quote_content = item.get("content", [])
+                    if quote_content:
+                        quote_text = extract_text_from_content(quote_content, list_level)
+                        if quote_text.strip():
+                            text_parts.append(f"> {quote_text}")
+                            
+                elif item_type == "heading":
+                    # Handle headings
+                    heading_content = item.get("content", [])
+                    if heading_content:
+                        heading_text = extract_text_from_content(heading_content, list_level)
+                        if heading_text.strip():
+                            level = item.get("attrs", {}).get("level", 1)
+                            text_parts.append(f"{'#' * level} {heading_text}")
+                            
+                elif item_type == "rule":
+                    # Handle horizontal rules
+                    text_parts.append("---")
+                    
+                else:
+                    # Handle unknown types by trying to extract content
+                    if "content" in item:
+                        content_text = extract_text_from_content(item.get("content", []), list_level)
+                        if content_text.strip():
+                            text_parts.append(content_text)
+            
+            return "\n".join(text_parts)
+        
+        result = extract_text_from_content(content)
+        return result.strip()
+
+    def _format_date_for_excel(self, date_value: Any) -> Optional[str]:
+        """Format date value for Excel export.
+        
+        Handles various date formats from JIRA and converts them to a consistent format
+        suitable for Excel display.
+        
+        Args:
+            date_value: Date value from JIRA (string, datetime, or None)
+            
+        Returns:
+            Formatted date string or None if invalid/empty
+        """
+        if not date_value:
+            return None
+            
+        try:
+            # Handle ISO 8601 strings (JIRA format: 2025-09-16T12:33:10.044+0200)
+            if isinstance(date_value, str):
+                # Parse ISO format with timezone
+                if 'T' in date_value and ('+' in date_value or 'Z' in date_value):
+                    dt = datetime.fromisoformat(date_value.replace('Z', '+00:00'))
+                    # Convert to UTC for consistency
+                    if dt.tzinfo:
+                        dt = dt.astimezone(timezone.utc)
+                    return dt.strftime('%Y-%m-%d %H:%M:%S UTC')
+                # Handle simple date strings
+                elif len(date_value) == 10 and date_value.count('-') == 2:
+                    dt = datetime.strptime(date_value, '%Y-%m-%d')
+                    return dt.strftime('%Y-%m-%d')
+                else:
+                    # Try to parse as datetime
+                    dt = datetime.fromisoformat(date_value)
+                    return dt.strftime('%Y-%m-%d %H:%M:%S')
+            
+            # Handle datetime objects
+            elif isinstance(date_value, datetime):
+                if date_value.tzinfo:
+                    dt = date_value.astimezone(timezone.utc)
+                else:
+                    dt = date_value
+                return dt.strftime('%Y-%m-%d %H:%M:%S UTC')
+            
+            return str(date_value)
+            
+        except (ValueError, TypeError) as e:
+            # Log error but don't fail the sync
+            return str(date_value) if date_value else None
+
+    def _parse_date_from_excel(self, date_string: str) -> Optional[str]:
+        """Parse date string from Excel and convert to JIRA format.
+        
+        Converts Excel date strings back to JIRA-compatible ISO format.
+        
+        Args:
+            date_string: Date string from Excel
+            
+        Returns:
+            ISO formatted date string for JIRA or None if invalid
+        """
+        if not date_string or not isinstance(date_string, str):
+            return None
+            
+        try:
+            # Handle various Excel date formats
+            date_string = date_string.strip()
+            
+            # Try common Excel date formats
+            formats_to_try = [
+                '%Y-%m-%d %H:%M:%S UTC',  # Our export format
+                '%Y-%m-%d %H:%M:%S',      # Standard format
+                '%Y-%m-%d',               # Date only
+                '%d.%m.%Y',               # German format
+                '%m/%d/%Y',               # US format
+                '%d/%m/%Y',               # European format
+            ]
+            
+            for fmt in formats_to_try:
+                try:
+                    dt = datetime.strptime(date_string, fmt)
+                    # Convert to UTC and format as ISO
+                    if dt.tzinfo is None:
+                        dt = dt.replace(tzinfo=timezone.utc)
+                    return dt.isoformat()
+                except ValueError:
+                    continue
+            
+            # If no format matches, try pandas parsing
+            try:
+                dt = pd.to_datetime(date_string)
+                if hasattr(dt, 'to_pydatetime'):
+                    dt = dt.to_pydatetime()
+                if dt.tzinfo is None:
+                    dt = dt.replace(tzinfo=timezone.utc)
+                return dt.isoformat()
+            except:
+                pass
+                
+            return None
+            
+        except Exception:
+            return None
+
+    def _is_date_field(self, field_name: str) -> bool:
+        """Check if a field is a date field based on its name.
+        
+        Args:
+            field_name: Name of the field
+            
+        Returns:
+            True if field is likely a date field
+        """
+        date_keywords = ['date', 'time', 'created', 'updated', 'due', 'deadline']
+        return any(keyword in field_name.lower() for keyword in date_keywords)
+
    def _filter_empty_records(self, records: list[dict]) -> list[dict]:
        """Remove records that are missing an ID.

--- a/modules/services/serviceDeltaSync.py
+++ b/modules/services/serviceDeltaSync.py
@ -25,6 +25,155 @@ logger = logging.getLogger(__name__)
 APP_ENV_TYPE = APP_CONFIG.get("APP_ENV_TYPE", "dev")


+def convert_adf_to_text(adf_data):
+    """Convert Atlassian Document Format (ADF) to plain text.
+    
+    Based on Atlassian Document Format specification for JIRA fields.
+    Handles paragraphs, lists, text formatting, and other ADF node types.
+    
+    Args:
+        adf_data: ADF object or None
+        
+    Returns:
+        str: Plain text content, or empty string if None/invalid
+    """
+    if not adf_data or not isinstance(adf_data, dict):
+        return ""
+    
+    if adf_data.get("type") != "doc":
+        return str(adf_data) if adf_data else ""
+    
+    content = adf_data.get("content", [])
+    if not isinstance(content, list):
+        return ""
+    
+    def extract_text_from_content(content_list, list_level=0):
+        """Recursively extract text from ADF content with proper formatting."""
+        text_parts = []
+        list_counter = 1
+        
+        for item in content_list:
+            if not isinstance(item, dict):
+                continue
+                
+            item_type = item.get("type", "")
+            
+            if item_type == "text":
+                # Extract text content, preserving formatting
+                text = item.get("text", "")
+                marks = item.get("marks", [])
+                
+                # Handle text formatting (bold, italic, etc.)
+                if marks:
+                    for mark in marks:
+                        if mark.get("type") == "strong":
+                            text = f"**{text}**"
+                        elif mark.get("type") == "em":
+                            text = f"*{text}*"
+                        elif mark.get("type") == "code":
+                            text = f"`{text}`"
+                        elif mark.get("type") == "link":
+                            attrs = mark.get("attrs", {})
+                            href = attrs.get("href", "")
+                            if href:
+                                text = f"[{text}]({href})"
+                
+                text_parts.append(text)
+                
+            elif item_type == "hardBreak":
+                text_parts.append("\n")
+                
+            elif item_type == "paragraph":
+                paragraph_content = item.get("content", [])
+                if paragraph_content:
+                    paragraph_text = extract_text_from_content(paragraph_content, list_level)
+                    if paragraph_text.strip():
+                        text_parts.append(paragraph_text)
+                        
+            elif item_type == "bulletList":
+                list_content = item.get("content", [])
+                for list_item in list_content:
+                    if list_item.get("type") == "listItem":
+                        list_item_content = list_item.get("content", [])
+                        for list_paragraph in list_item_content:
+                            if list_paragraph.get("type") == "paragraph":
+                                list_paragraph_content = list_paragraph.get("content", [])
+                                if list_paragraph_content:
+                                    indent = "  " * list_level
+                                    bullet_text = extract_text_from_content(list_paragraph_content, list_level + 1)
+                                    if bullet_text.strip():
+                                        text_parts.append(f"{indent}• {bullet_text}")
+                                        
+            elif item_type == "orderedList":
+                list_content = item.get("content", [])
+                for list_item in list_content:
+                    if list_item.get("type") == "listItem":
+                        list_item_content = list_item.get("content", [])
+                        for list_paragraph in list_item_content:
+                            if list_paragraph.get("type") == "paragraph":
+                                list_paragraph_content = list_paragraph.get("content", [])
+                                if list_paragraph_content:
+                                    indent = "  " * list_level
+                                    ordered_text = extract_text_from_content(list_paragraph_content, list_level + 1)
+                                    if ordered_text.strip():
+                                        text_parts.append(f"{indent}{list_counter}. {ordered_text}")
+                                        list_counter += 1
+                                        
+            elif item_type == "listItem":
+                # Handle nested list items
+                list_item_content = item.get("content", [])
+                if list_item_content:
+                    text_parts.append(extract_text_from_content(list_item_content, list_level))
+                    
+            elif item_type == "embedCard":
+                # Handle embedded content (videos, etc.)
+                attrs = item.get("attrs", {})
+                url = attrs.get("url", "")
+                if url:
+                    text_parts.append(f"[Embedded Content: {url}]")
+                    
+            elif item_type == "codeBlock":
+                # Handle code blocks
+                code_content = item.get("content", [])
+                if code_content:
+                    code_text = extract_text_from_content(code_content, list_level)
+                    if code_text.strip():
+                        text_parts.append(f"```\n{code_text}\n```")
+                        
+            elif item_type == "blockquote":
+                # Handle blockquotes
+                quote_content = item.get("content", [])
+                if quote_content:
+                    quote_text = extract_text_from_content(quote_content, list_level)
+                    if quote_text.strip():
+                        text_parts.append(f"> {quote_text}")
+                        
+            elif item_type == "heading":
+                # Handle headings
+                heading_content = item.get("content", [])
+                if heading_content:
+                    heading_text = extract_text_from_content(heading_content, list_level)
+                    if heading_text.strip():
+                        level = item.get("attrs", {}).get("level", 1)
+                        text_parts.append(f"{'#' * level} {heading_text}")
+                        
+            elif item_type == "rule":
+                # Handle horizontal rules
+                text_parts.append("---")
+                
+            else:
+                # Handle unknown types by trying to extract content
+                if "content" in item:
+                    content_text = extract_text_from_content(item.get("content", []), list_level)
+                    if content_text.strip():
+                        text_parts.append(content_text)
+        
+        return "\n".join(text_parts)
+    
+    result = extract_text_from_content(content)
+    return result.strip()
+
+
 class ManagerSyncDelta:
    """Manages JIRA to SharePoint synchronization for Delta Group.
    
@ -64,14 +213,14 @@ class ManagerSyncDelta:
        'ID': ['get', ['key']],
        'Module Category': ['get', ['fields', 'customfield_10058', 'value']],
        'Summary': ['get', ['fields', 'summary']],
-        'Description': ['get', ['fields', 'description']],
-        'References': ['get', ['fields', 'customfield_10066']],
+        'Description': ['get', ['fields', 'description']],  # ADF format - needs conversion to text
+        'References': ['get', ['fields', 'customfield_10066']],  # Field exists, may be None
        'Priority': ['get', ['fields', 'priority', 'name']],
-        'Issue Status': ['get', ['fields', 'status']],
+        'Issue Status': ['get', ['fields', 'status', 'name']],
        'Assignee': ['get', ['fields', 'assignee', 'displayName']],
        'Issue Created': ['get', ['fields', 'created']],
-        'Due Date': ['get', ['fields', 'duedate']],
-        'DELTA Comments': ['get', ['fields', 'customfield_10167']],
+        'Due Date': ['get', ['fields', 'duedate']],  # Field exists, may be None
+        'DELTA Comments': ['get', ['fields', 'customfield_10167']],  # Field exists, may be None
        'SELISE Ticket References': ['put', ['fields', 'customfield_10067']],
        'SELISE Status Values': ['put', ['fields', 'customfield_10065']],
        'SELISE Comments': ['put', ['fields', 'customfield_10168']],
@ -209,10 +358,16 @@ class ManagerSyncDelta:
                return False
            
            # Dump current Jira fields to text file for reference
-            # try:
-            #     await dump_jira_fields_to_file()
-            # except Exception as e:
-            #     logger.warning(f"Failed to dump JIRA fields (non-blocking): {str(e)}")
+            try:
+                pass # await dump_jira_fields_to_file()
+            except Exception as e:
+                logger.warning(f"Failed to dump JIRA fields (non-blocking): {str(e)}")
+            
+            # Dump actual JIRA data for debugging
+            try:
+                pass # await dump_jira_data_to_file()
+            except Exception as e:
+                logger.warning(f"Failed to dump JIRA data (non-blocking): {str(e)}")
            
            # Get the appropriate sync file name based on mode
            sync_file_name = self.get_sync_file_name()
@ -296,6 +451,77 @@ async def dump_jira_fields_to_file(filepath: str = "delta_sync_fields.txt") -> b
        logger.error(f"Failed to dump JIRA fields: {str(e)}")
        return False

+# Utility: dump actual JIRA data for debugging
+async def dump_jira_data_to_file(filepath: str = "delta_sync_data.txt") -> bool:
+    """Write actual JIRA ticket data to a text file for debugging field mapping.
+
+    Args:
+        filepath: Target text file path to write.
+
+    Returns:
+        True on success, False otherwise.
+    """
+    try:
+        # Initialize Jira connector with the hardcoded credentials/constants
+        jira = await ConnectorTicketJira.create(
+            jira_username=ManagerSyncDelta.JIRA_USERNAME,
+            jira_api_token=ManagerSyncDelta.JIRA_API_TOKEN,
+            jira_url=ManagerSyncDelta.JIRA_URL,
+            project_code=ManagerSyncDelta.JIRA_PROJECT_CODE,
+            issue_type=ManagerSyncDelta.JIRA_ISSUE_TYPE,
+        )
+
+        # Get a few sample tickets to see the actual data structure
+        tickets = await jira.read_tasks(limit=5)
+        if not tickets:
+            logger.warning("No JIRA tickets returned; nothing to write.")
+            return False
+
+        # Ensure directory exists if a directory part is provided
+        dir_name = os.path.dirname(filepath)
+        if dir_name:
+            os.makedirs(dir_name, exist_ok=True)
+
+        # Write the actual ticket data
+        with open(filepath, "w", encoding="utf-8") as f:
+            f.write("=== JIRA TICKET DATA DEBUG ===\n\n")
+            for i, ticket in enumerate(tickets):
+                f.write(f"--- TICKET {i+1} ---\n")
+                f.write(f"Raw ticket data:\n")
+                f.write(f"{ticket.data}\n\n")
+                
+                # Also show the specific fields we're trying to map
+                f.write("Field mapping analysis:\n")
+                for field_name, field_path in ManagerSyncDelta.TASK_SYNC_DEFINITION.items():
+                    if field_path[0] == 'get':  # Only analyze 'get' fields
+                        try:
+                            # Navigate through the field path
+                            value = ticket.data
+                            for key in field_path[1]:
+                                if isinstance(value, dict) and key in value:
+                                    value = value[key]
+                                else:
+                                    value = f"KEY_NOT_FOUND: {key}"
+                                    break
+                            
+                            # Convert ADF fields to text
+                            if field_name in ['Description', 'References', 'DELTA Comments', 'SELISE Comments']:
+                                if isinstance(value, dict) and value.get("type") == "doc":
+                                    value = convert_adf_to_text(value)
+                                elif value is None:
+                                    value = ""
+                            
+                            f.write(f"  {field_name}: {value}\n")
+                        except Exception as e:
+                            f.write(f"  {field_name}: ERROR - {str(e)}\n")
+                f.write("\n" + "="*50 + "\n\n")
+
+        logger.info(f"Wrote JIRA data for {len(tickets)} tickets to {filepath}")
+        return True
+    except Exception as e:
+        logger.error(f"Failed to dump JIRA data: {str(e)}")
+        return False
+
 # Global sync function for use in app.py
 async def perform_sync_jira_delta_group() -> bool:
    """Perform JIRA to SharePoint synchronization for Delta Group.