full delta attributes parsing

This commit is contained in:
ValueOn AG 2025-09-16 22:20:31 +02:00
parent 72c13ffe12
commit 598e137e8a
6 changed files with 26758 additions and 1519 deletions

171
delta_sync_data.txt Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -68,8 +68,8 @@ class ConnectorTicketJira(TicketBase):
headers = {"Content-Type": "application/json"}
payload = {
"jql": jql_query,
"maxResults": 1,
"fields": ["summary", "status", "assignee", "created", "updated", "priority", "issuetype", "project", "customfield_10168", "customfield_10067", "customfield_10065"]
"maxResults": 1
# Don't specify fields to get all available fields
}
async with session.post(url, json=payload, auth=auth, headers=headers) as response:
if response.status != 200:
@ -187,7 +187,7 @@ class ConnectorTicketJira(TicketBase):
payload = {
"jql": jql_query,
"maxResults": max_results,
"fields": ["summary", "status", "assignee", "created", "updated", "priority", "issuetype", "project", "customfield_10168", "customfield_10067", "customfield_10065"]
"fields": ["*all"] # Get all fields
}
if next_page_token:
# For subsequent pages, include BOTH jql and nextPageToken

View file

@ -1,6 +1,7 @@
from dataclasses import dataclass
from io import BytesIO, StringIO
from typing import Any
from typing import Any, Optional
from datetime import datetime, timezone
import pandas as pd
import openpyxl
from modules.shared.timezoneUtils import get_utc_now
@ -345,6 +346,14 @@ class TicketSharepointSyncInterface:
# For "put" fields, the path is like ['fields', 'customfield_10067']
if len(field_path) >= 2 and field_path[0] == "fields":
jira_field_id = field_path[1]
# Parse date fields back to JIRA format
if self._is_date_field(field_name) and new_value:
parsed_date = self._parse_date_from_excel(str(new_value))
if parsed_date:
fields[jira_field_id] = parsed_date
else:
fields[jira_field_id] = new_value
else:
fields[jira_field_id] = new_value
if fields:
@ -637,6 +646,14 @@ class TicketSharepointSyncInterface:
# For "put" fields, the path is like ['fields', 'customfield_10067']
if len(field_path) >= 2 and field_path[0] == "fields":
jira_field_id = field_path[1]
# Parse date fields back to JIRA format
if self._is_date_field(field_name) and new_value:
parsed_date = self._parse_date_from_excel(str(new_value))
if parsed_date:
fields[jira_field_id] = parsed_date
else:
fields[jira_field_id] = new_value
else:
fields[jira_field_id] = new_value
if fields:
@ -698,7 +715,7 @@ class TicketSharepointSyncInterface:
# Get the right fields
if direction == "get" or include_put:
# Extract value using the field path
value = self._extract_field_value(task.data, field_path)
value = self._extract_field_value(task.data, field_path, field_name)
transformed_data[field_name] = value
# Create new Task with transformed data
@ -707,7 +724,7 @@ class TicketSharepointSyncInterface:
return transformed_tasks
def _extract_field_value(self, issue_data: dict, field_path: list[str]) -> Any:
def _extract_field_value(self, issue_data: dict, field_path: list[str], field_name: str = None) -> Any:
"""Extract field value from JIRA issue data using field path."""
value = issue_data
try:
@ -730,10 +747,280 @@ class TicketSharepointSyncInterface:
):
value = value[0]["value"]
# Apply ADF conversion for specific fields that contain ADF content
if isinstance(value, dict) and value.get("type") == "doc":
value = self._convert_adf_to_text(value)
# Apply date formatting for date fields
if field_name and self._is_date_field(field_name):
value = self._format_date_for_excel(value)
return value
except (KeyError, TypeError):
return None
def _convert_adf_to_text(self, adf_data):
"""Convert Atlassian Document Format (ADF) to plain text.
Based on Atlassian Document Format specification for JIRA fields.
Handles paragraphs, lists, text formatting, and other ADF node types.
Args:
adf_data: ADF object or None
Returns:
str: Plain text content, or empty string if None/invalid
"""
if not adf_data or not isinstance(adf_data, dict):
return ""
if adf_data.get("type") != "doc":
return str(adf_data) if adf_data else ""
content = adf_data.get("content", [])
if not isinstance(content, list):
return ""
def extract_text_from_content(content_list, list_level=0):
"""Recursively extract text from ADF content with proper formatting."""
text_parts = []
list_counter = 1
for item in content_list:
if not isinstance(item, dict):
continue
item_type = item.get("type", "")
if item_type == "text":
# Extract text content, preserving formatting
text = item.get("text", "")
marks = item.get("marks", [])
# Handle text formatting (bold, italic, etc.)
if marks:
for mark in marks:
if mark.get("type") == "strong":
text = f"**{text}**"
elif mark.get("type") == "em":
text = f"*{text}*"
elif mark.get("type") == "code":
text = f"`{text}`"
elif mark.get("type") == "link":
attrs = mark.get("attrs", {})
href = attrs.get("href", "")
if href:
text = f"[{text}]({href})"
text_parts.append(text)
elif item_type == "hardBreak":
text_parts.append("\n")
elif item_type == "paragraph":
paragraph_content = item.get("content", [])
if paragraph_content:
paragraph_text = extract_text_from_content(paragraph_content, list_level)
if paragraph_text.strip():
text_parts.append(paragraph_text)
elif item_type == "bulletList":
list_content = item.get("content", [])
for list_item in list_content:
if list_item.get("type") == "listItem":
list_item_content = list_item.get("content", [])
for list_paragraph in list_item_content:
if list_paragraph.get("type") == "paragraph":
list_paragraph_content = list_paragraph.get("content", [])
if list_paragraph_content:
indent = " " * list_level
bullet_text = extract_text_from_content(list_paragraph_content, list_level + 1)
if bullet_text.strip():
text_parts.append(f"{indent}{bullet_text}")
elif item_type == "orderedList":
list_content = item.get("content", [])
for list_item in list_content:
if list_item.get("type") == "listItem":
list_item_content = list_item.get("content", [])
for list_paragraph in list_item_content:
if list_paragraph.get("type") == "paragraph":
list_paragraph_content = list_paragraph.get("content", [])
if list_paragraph_content:
indent = " " * list_level
ordered_text = extract_text_from_content(list_paragraph_content, list_level + 1)
if ordered_text.strip():
text_parts.append(f"{indent}{list_counter}. {ordered_text}")
list_counter += 1
elif item_type == "listItem":
# Handle nested list items
list_item_content = item.get("content", [])
if list_item_content:
text_parts.append(extract_text_from_content(list_item_content, list_level))
elif item_type == "embedCard":
# Handle embedded content (videos, etc.)
attrs = item.get("attrs", {})
url = attrs.get("url", "")
if url:
text_parts.append(f"[Embedded Content: {url}]")
elif item_type == "codeBlock":
# Handle code blocks
code_content = item.get("content", [])
if code_content:
code_text = extract_text_from_content(code_content, list_level)
if code_text.strip():
text_parts.append(f"```\n{code_text}\n```")
elif item_type == "blockquote":
# Handle blockquotes
quote_content = item.get("content", [])
if quote_content:
quote_text = extract_text_from_content(quote_content, list_level)
if quote_text.strip():
text_parts.append(f"> {quote_text}")
elif item_type == "heading":
# Handle headings
heading_content = item.get("content", [])
if heading_content:
heading_text = extract_text_from_content(heading_content, list_level)
if heading_text.strip():
level = item.get("attrs", {}).get("level", 1)
text_parts.append(f"{'#' * level} {heading_text}")
elif item_type == "rule":
# Handle horizontal rules
text_parts.append("---")
else:
# Handle unknown types by trying to extract content
if "content" in item:
content_text = extract_text_from_content(item.get("content", []), list_level)
if content_text.strip():
text_parts.append(content_text)
return "\n".join(text_parts)
result = extract_text_from_content(content)
return result.strip()
def _format_date_for_excel(self, date_value: Any) -> Optional[str]:
"""Format date value for Excel export.
Handles various date formats from JIRA and converts them to a consistent format
suitable for Excel display.
Args:
date_value: Date value from JIRA (string, datetime, or None)
Returns:
Formatted date string or None if invalid/empty
"""
if not date_value:
return None
try:
# Handle ISO 8601 strings (JIRA format: 2025-09-16T12:33:10.044+0200)
if isinstance(date_value, str):
# Parse ISO format with timezone
if 'T' in date_value and ('+' in date_value or 'Z' in date_value):
dt = datetime.fromisoformat(date_value.replace('Z', '+00:00'))
# Convert to UTC for consistency
if dt.tzinfo:
dt = dt.astimezone(timezone.utc)
return dt.strftime('%Y-%m-%d %H:%M:%S UTC')
# Handle simple date strings
elif len(date_value) == 10 and date_value.count('-') == 2:
dt = datetime.strptime(date_value, '%Y-%m-%d')
return dt.strftime('%Y-%m-%d')
else:
# Try to parse as datetime
dt = datetime.fromisoformat(date_value)
return dt.strftime('%Y-%m-%d %H:%M:%S')
# Handle datetime objects
elif isinstance(date_value, datetime):
if date_value.tzinfo:
dt = date_value.astimezone(timezone.utc)
else:
dt = date_value
return dt.strftime('%Y-%m-%d %H:%M:%S UTC')
return str(date_value)
except (ValueError, TypeError) as e:
# Log error but don't fail the sync
return str(date_value) if date_value else None
def _parse_date_from_excel(self, date_string: str) -> Optional[str]:
"""Parse date string from Excel and convert to JIRA format.
Converts Excel date strings back to JIRA-compatible ISO format.
Args:
date_string: Date string from Excel
Returns:
ISO formatted date string for JIRA or None if invalid
"""
if not date_string or not isinstance(date_string, str):
return None
try:
# Handle various Excel date formats
date_string = date_string.strip()
# Try common Excel date formats
formats_to_try = [
'%Y-%m-%d %H:%M:%S UTC', # Our export format
'%Y-%m-%d %H:%M:%S', # Standard format
'%Y-%m-%d', # Date only
'%d.%m.%Y', # German format
'%m/%d/%Y', # US format
'%d/%m/%Y', # European format
]
for fmt in formats_to_try:
try:
dt = datetime.strptime(date_string, fmt)
# Convert to UTC and format as ISO
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt.isoformat()
except ValueError:
continue
# If no format matches, try pandas parsing
try:
dt = pd.to_datetime(date_string)
if hasattr(dt, 'to_pydatetime'):
dt = dt.to_pydatetime()
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt.isoformat()
except:
pass
return None
except Exception:
return None
def _is_date_field(self, field_name: str) -> bool:
"""Check if a field is a date field based on its name.
Args:
field_name: Name of the field
Returns:
True if field is likely a date field
"""
date_keywords = ['date', 'time', 'created', 'updated', 'due', 'deadline']
return any(keyword in field_name.lower() for keyword in date_keywords)
def _filter_empty_records(self, records: list[dict]) -> list[dict]:
"""Remove records that are missing an ID.

View file

@ -25,6 +25,155 @@ logger = logging.getLogger(__name__)
APP_ENV_TYPE = APP_CONFIG.get("APP_ENV_TYPE", "dev")
def convert_adf_to_text(adf_data):
"""Convert Atlassian Document Format (ADF) to plain text.
Based on Atlassian Document Format specification for JIRA fields.
Handles paragraphs, lists, text formatting, and other ADF node types.
Args:
adf_data: ADF object or None
Returns:
str: Plain text content, or empty string if None/invalid
"""
if not adf_data or not isinstance(adf_data, dict):
return ""
if adf_data.get("type") != "doc":
return str(adf_data) if adf_data else ""
content = adf_data.get("content", [])
if not isinstance(content, list):
return ""
def extract_text_from_content(content_list, list_level=0):
"""Recursively extract text from ADF content with proper formatting."""
text_parts = []
list_counter = 1
for item in content_list:
if not isinstance(item, dict):
continue
item_type = item.get("type", "")
if item_type == "text":
# Extract text content, preserving formatting
text = item.get("text", "")
marks = item.get("marks", [])
# Handle text formatting (bold, italic, etc.)
if marks:
for mark in marks:
if mark.get("type") == "strong":
text = f"**{text}**"
elif mark.get("type") == "em":
text = f"*{text}*"
elif mark.get("type") == "code":
text = f"`{text}`"
elif mark.get("type") == "link":
attrs = mark.get("attrs", {})
href = attrs.get("href", "")
if href:
text = f"[{text}]({href})"
text_parts.append(text)
elif item_type == "hardBreak":
text_parts.append("\n")
elif item_type == "paragraph":
paragraph_content = item.get("content", [])
if paragraph_content:
paragraph_text = extract_text_from_content(paragraph_content, list_level)
if paragraph_text.strip():
text_parts.append(paragraph_text)
elif item_type == "bulletList":
list_content = item.get("content", [])
for list_item in list_content:
if list_item.get("type") == "listItem":
list_item_content = list_item.get("content", [])
for list_paragraph in list_item_content:
if list_paragraph.get("type") == "paragraph":
list_paragraph_content = list_paragraph.get("content", [])
if list_paragraph_content:
indent = " " * list_level
bullet_text = extract_text_from_content(list_paragraph_content, list_level + 1)
if bullet_text.strip():
text_parts.append(f"{indent}{bullet_text}")
elif item_type == "orderedList":
list_content = item.get("content", [])
for list_item in list_content:
if list_item.get("type") == "listItem":
list_item_content = list_item.get("content", [])
for list_paragraph in list_item_content:
if list_paragraph.get("type") == "paragraph":
list_paragraph_content = list_paragraph.get("content", [])
if list_paragraph_content:
indent = " " * list_level
ordered_text = extract_text_from_content(list_paragraph_content, list_level + 1)
if ordered_text.strip():
text_parts.append(f"{indent}{list_counter}. {ordered_text}")
list_counter += 1
elif item_type == "listItem":
# Handle nested list items
list_item_content = item.get("content", [])
if list_item_content:
text_parts.append(extract_text_from_content(list_item_content, list_level))
elif item_type == "embedCard":
# Handle embedded content (videos, etc.)
attrs = item.get("attrs", {})
url = attrs.get("url", "")
if url:
text_parts.append(f"[Embedded Content: {url}]")
elif item_type == "codeBlock":
# Handle code blocks
code_content = item.get("content", [])
if code_content:
code_text = extract_text_from_content(code_content, list_level)
if code_text.strip():
text_parts.append(f"```\n{code_text}\n```")
elif item_type == "blockquote":
# Handle blockquotes
quote_content = item.get("content", [])
if quote_content:
quote_text = extract_text_from_content(quote_content, list_level)
if quote_text.strip():
text_parts.append(f"> {quote_text}")
elif item_type == "heading":
# Handle headings
heading_content = item.get("content", [])
if heading_content:
heading_text = extract_text_from_content(heading_content, list_level)
if heading_text.strip():
level = item.get("attrs", {}).get("level", 1)
text_parts.append(f"{'#' * level} {heading_text}")
elif item_type == "rule":
# Handle horizontal rules
text_parts.append("---")
else:
# Handle unknown types by trying to extract content
if "content" in item:
content_text = extract_text_from_content(item.get("content", []), list_level)
if content_text.strip():
text_parts.append(content_text)
return "\n".join(text_parts)
result = extract_text_from_content(content)
return result.strip()
class ManagerSyncDelta:
"""Manages JIRA to SharePoint synchronization for Delta Group.
@ -64,14 +213,14 @@ class ManagerSyncDelta:
'ID': ['get', ['key']],
'Module Category': ['get', ['fields', 'customfield_10058', 'value']],
'Summary': ['get', ['fields', 'summary']],
'Description': ['get', ['fields', 'description']],
'References': ['get', ['fields', 'customfield_10066']],
'Description': ['get', ['fields', 'description']], # ADF format - needs conversion to text
'References': ['get', ['fields', 'customfield_10066']], # Field exists, may be None
'Priority': ['get', ['fields', 'priority', 'name']],
'Issue Status': ['get', ['fields', 'status']],
'Issue Status': ['get', ['fields', 'status', 'name']],
'Assignee': ['get', ['fields', 'assignee', 'displayName']],
'Issue Created': ['get', ['fields', 'created']],
'Due Date': ['get', ['fields', 'duedate']],
'DELTA Comments': ['get', ['fields', 'customfield_10167']],
'Due Date': ['get', ['fields', 'duedate']], # Field exists, may be None
'DELTA Comments': ['get', ['fields', 'customfield_10167']], # Field exists, may be None
'SELISE Ticket References': ['put', ['fields', 'customfield_10067']],
'SELISE Status Values': ['put', ['fields', 'customfield_10065']],
'SELISE Comments': ['put', ['fields', 'customfield_10168']],
@ -209,10 +358,16 @@ class ManagerSyncDelta:
return False
# Dump current Jira fields to text file for reference
# try:
# await dump_jira_fields_to_file()
# except Exception as e:
# logger.warning(f"Failed to dump JIRA fields (non-blocking): {str(e)}")
try:
pass # await dump_jira_fields_to_file()
except Exception as e:
logger.warning(f"Failed to dump JIRA fields (non-blocking): {str(e)}")
# Dump actual JIRA data for debugging
try:
pass # await dump_jira_data_to_file()
except Exception as e:
logger.warning(f"Failed to dump JIRA data (non-blocking): {str(e)}")
# Get the appropriate sync file name based on mode
sync_file_name = self.get_sync_file_name()
@ -296,6 +451,77 @@ async def dump_jira_fields_to_file(filepath: str = "delta_sync_fields.txt") -> b
logger.error(f"Failed to dump JIRA fields: {str(e)}")
return False
# Utility: dump actual JIRA data for debugging
async def dump_jira_data_to_file(filepath: str = "delta_sync_data.txt") -> bool:
"""Write actual JIRA ticket data to a text file for debugging field mapping.
Args:
filepath: Target text file path to write.
Returns:
True on success, False otherwise.
"""
try:
# Initialize Jira connector with the hardcoded credentials/constants
jira = await ConnectorTicketJira.create(
jira_username=ManagerSyncDelta.JIRA_USERNAME,
jira_api_token=ManagerSyncDelta.JIRA_API_TOKEN,
jira_url=ManagerSyncDelta.JIRA_URL,
project_code=ManagerSyncDelta.JIRA_PROJECT_CODE,
issue_type=ManagerSyncDelta.JIRA_ISSUE_TYPE,
)
# Get a few sample tickets to see the actual data structure
tickets = await jira.read_tasks(limit=5)
if not tickets:
logger.warning("No JIRA tickets returned; nothing to write.")
return False
# Ensure directory exists if a directory part is provided
dir_name = os.path.dirname(filepath)
if dir_name:
os.makedirs(dir_name, exist_ok=True)
# Write the actual ticket data
with open(filepath, "w", encoding="utf-8") as f:
f.write("=== JIRA TICKET DATA DEBUG ===\n\n")
for i, ticket in enumerate(tickets):
f.write(f"--- TICKET {i+1} ---\n")
f.write(f"Raw ticket data:\n")
f.write(f"{ticket.data}\n\n")
# Also show the specific fields we're trying to map
f.write("Field mapping analysis:\n")
for field_name, field_path in ManagerSyncDelta.TASK_SYNC_DEFINITION.items():
if field_path[0] == 'get': # Only analyze 'get' fields
try:
# Navigate through the field path
value = ticket.data
for key in field_path[1]:
if isinstance(value, dict) and key in value:
value = value[key]
else:
value = f"KEY_NOT_FOUND: {key}"
break
# Convert ADF fields to text
if field_name in ['Description', 'References', 'DELTA Comments', 'SELISE Comments']:
if isinstance(value, dict) and value.get("type") == "doc":
value = convert_adf_to_text(value)
elif value is None:
value = ""
f.write(f" {field_name}: {value}\n")
except Exception as e:
f.write(f" {field_name}: ERROR - {str(e)}\n")
f.write("\n" + "="*50 + "\n\n")
logger.info(f"Wrote JIRA data for {len(tickets)} tickets to {filepath}")
return True
except Exception as e:
logger.error(f"Failed to dump JIRA data: {str(e)}")
return False
# Global sync function for use in app.py
async def perform_sync_jira_delta_group() -> bool:
"""Perform JIRA to SharePoint synchronization for Delta Group.