gateway/modules/workflows/methods/methodJira/helpers/adfConverter.py
2025-12-17 10:45:09 +01:00

180 lines
7.5 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
ADF Converter helper for JIRA operations.
Handles conversion of Atlassian Document Format (ADF) to plain text.
"""
import logging
from typing import Any
logger = logging.getLogger(__name__)
class AdfConverterHelper:
"""Helper for ADF conversion operations"""
def __init__(self, methodInstance):
"""
Initialize ADF converter helper.
Args:
methodInstance: Instance of MethodJira (for access to services)
"""
self.method = methodInstance
self.services = methodInstance.services
def convertAdfToText(self, adfData):
"""Convert Atlassian Document Format (ADF) to plain text.
Based on Atlassian Document Format specification for JIRA fields.
Handles paragraphs, lists, text formatting, and other ADF node types.
Args:
adfData: ADF object or None
Returns:
str: Plain text content, or empty string if None/invalid
"""
if not adfData or not isinstance(adfData, dict):
return ""
if adfData.get("type") != "doc":
return str(adfData) if adfData else ""
content = adfData.get("content", [])
if not isinstance(content, list):
return ""
def extractTextFromContent(contentList, listLevel=0):
"""Recursively extract text from ADF content with proper formatting."""
textParts = []
listCounter = 1
for item in contentList:
if not isinstance(item, dict):
continue
itemType = item.get("type", "")
if itemType == "text":
# Extract text content, preserving formatting
text = item.get("text", "")
marks = item.get("marks", [])
# Handle text formatting (bold, italic, etc.)
if marks:
for mark in marks:
if mark.get("type") == "strong":
text = f"**{text}**"
elif mark.get("type") == "em":
text = f"*{text}*"
elif mark.get("type") == "code":
text = f"`{text}`"
elif mark.get("type") == "link":
attrs = mark.get("attrs", {})
href = attrs.get("href", "")
if href:
text = f"[{text}]({href})"
textParts.append(text)
elif itemType == "hardBreak":
textParts.append("\n")
elif itemType == "paragraph":
paragraphContent = item.get("content", [])
if paragraphContent:
paragraphText = extractTextFromContent(paragraphContent, listLevel)
if paragraphText.strip():
textParts.append(paragraphText)
elif itemType == "bulletList":
listContent = item.get("content", [])
if listContent:
listText = extractTextFromContent(listContent, listLevel + 1)
if listText.strip():
textParts.append(listText)
elif itemType == "orderedList":
listContent = item.get("content", [])
if listContent:
listText = extractTextFromContent(listContent, listLevel + 1)
if listText.strip():
textParts.append(listText)
elif itemType == "listItem":
itemContent = item.get("content", [])
if itemContent:
indent = " " * listLevel
itemText = extractTextFromContent(itemContent, listLevel)
if itemText.strip():
prefix = f"{indent}- " if listLevel > 0 else "- "
textParts.append(f"{prefix}{itemText}")
elif itemType == "heading":
level = item.get("attrs", {}).get("level", 1)
headingContent = item.get("content", [])
if headingContent:
headingText = extractTextFromContent(headingContent, listLevel)
if headingText.strip():
prefix = "#" * level + " "
textParts.append(f"{prefix}{headingText}")
elif itemType == "codeBlock":
codeContent = item.get("content", [])
if codeContent:
codeText = extractTextFromContent(codeContent, listLevel)
if codeText.strip():
textParts.append(f"```\n{codeText}\n```")
elif itemType == "blockquote":
quoteContent = item.get("content", [])
if quoteContent:
quoteText = extractTextFromContent(quoteContent, listLevel)
if quoteText.strip():
textParts.append(f"> {quoteText}")
elif itemType == "table":
tableContent = item.get("content", [])
if tableContent:
tableText = extractTextFromContent(tableContent, listLevel)
if tableText.strip():
textParts.append(tableText)
elif itemType == "tableRow":
rowContent = item.get("content", [])
if rowContent:
rowText = extractTextFromContent(rowContent, listLevel)
if rowText.strip():
textParts.append(rowText)
elif itemType == "tableCell":
cellContent = item.get("content", [])
if cellContent:
cellText = extractTextFromContent(cellContent, listLevel)
if cellText.strip():
textParts.append(cellText)
elif itemType == "mediaGroup":
# Skip media groups for now
pass
elif itemType == "media":
# Skip media for now
pass
else:
# Unknown type - try to extract content if available
if "content" in item:
unknownContent = item.get("content", [])
if unknownContent:
unknownText = extractTextFromContent(unknownContent, listLevel)
if unknownText.strip():
textParts.append(unknownText)
return "".join(textParts)
result = extractTextFromContent(content)
return result.strip() if result else ""