gateway/modules/workflows/methods/methodJira/helpers/adfConverter.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.

"""
ADF Converter helper for JIRA operations.
Handles conversion of Atlassian Document Format (ADF) to plain text.
"""

import logging
from typing import Any

logger = logging.getLogger(__name__)

class AdfConverterHelper:
    """Helper for ADF conversion operations"""

    def __init__(self, methodInstance):
        """
        Initialize ADF converter helper.

        Args:
            methodInstance: Instance of MethodJira (for access to services)
        """
        self.method = methodInstance
        self.services = methodInstance.services

    def convertAdfToText(self, adfData):
        """Convert Atlassian Document Format (ADF) to plain text.

        Based on Atlassian Document Format specification for JIRA fields.
        Handles paragraphs, lists, text formatting, and other ADF node types.

        Args:
            adfData: ADF object or None

        Returns:
            str: Plain text content, or empty string if None/invalid
        """
        if not adfData or not isinstance(adfData, dict):
            return ""

        if adfData.get("type") != "doc":
            return str(adfData) if adfData else ""

        content = adfData.get("content", [])
        if not isinstance(content, list):
            return ""

        def extractTextFromContent(contentList, listLevel=0):
            """Recursively extract text from ADF content with proper formatting."""
            textParts = []
            listCounter = 1

            for item in contentList:
                if not isinstance(item, dict):
                    continue

                itemType = item.get("type", "")

                if itemType == "text":
                    # Extract text content, preserving formatting
                    text = item.get("text", "")
                    marks = item.get("marks", [])

                    # Handle text formatting (bold, italic, etc.)
                    if marks:
                        for mark in marks:
                            if mark.get("type") == "strong":
                                text = f"**{text}**"
                            elif mark.get("type") == "em":
                                text = f"*{text}*"
                            elif mark.get("type") == "code":
                                text = f"`{text}`"
                            elif mark.get("type") == "link":
                                attrs = mark.get("attrs", {})
                                href = attrs.get("href", "")
                                if href:
                                    text = f"[{text}]({href})"

                    textParts.append(text)

                elif itemType == "hardBreak":
                    textParts.append("\n")

                elif itemType == "paragraph":
                    paragraphContent = item.get("content", [])
                    if paragraphContent:
                        paragraphText = extractTextFromContent(paragraphContent, listLevel)
                        if paragraphText.strip():
                            textParts.append(paragraphText)

                elif itemType == "bulletList":
                    listContent = item.get("content", [])
                    if listContent:
                        listText = extractTextFromContent(listContent, listLevel + 1)
                        if listText.strip():
                            textParts.append(listText)

                elif itemType == "orderedList":
                    listContent = item.get("content", [])
                    if listContent:
                        listText = extractTextFromContent(listContent, listLevel + 1)
                        if listText.strip():
                            textParts.append(listText)

                elif itemType == "listItem":
                    itemContent = item.get("content", [])
                    if itemContent:
                        indent = "  " * listLevel
                        itemText = extractTextFromContent(itemContent, listLevel)
                        if itemText.strip():
                            prefix = f"{indent}- " if listLevel > 0 else "- "
                            textParts.append(f"{prefix}{itemText}")

                elif itemType == "heading":
                    level = item.get("attrs", {}).get("level", 1)
                    headingContent = item.get("content", [])
                    if headingContent:
                        headingText = extractTextFromContent(headingContent, listLevel)
                        if headingText.strip():
                            prefix = "#" * level + " "
                            textParts.append(f"{prefix}{headingText}")

                elif itemType == "codeBlock":
                    codeContent = item.get("content", [])
                    if codeContent:
                        codeText = extractTextFromContent(codeContent, listLevel)
                        if codeText.strip():
                            textParts.append(f"```\n{codeText}\n```")

                elif itemType == "blockquote":
                    quoteContent = item.get("content", [])
                    if quoteContent:
                        quoteText = extractTextFromContent(quoteContent, listLevel)
                        if quoteText.strip():
                            textParts.append(f"> {quoteText}")

                elif itemType == "table":
                    tableContent = item.get("content", [])
                    if tableContent:
                        tableText = extractTextFromContent(tableContent, listLevel)
                        if tableText.strip():
                            textParts.append(tableText)

                elif itemType == "tableRow":
                    rowContent = item.get("content", [])
                    if rowContent:
                        rowText = extractTextFromContent(rowContent, listLevel)
                        if rowText.strip():
                            textParts.append(rowText)

                elif itemType == "tableCell":
                    cellContent = item.get("content", [])
                    if cellContent:
                        cellText = extractTextFromContent(cellContent, listLevel)
                        if cellText.strip():
                            textParts.append(cellText)

                elif itemType == "mediaGroup":
                    # Skip media groups for now
                    pass

                elif itemType == "media":
                    # Skip media for now
                    pass

                else:
                    # Unknown type - try to extract content if available
                    if "content" in item:
                        unknownContent = item.get("content", [])
                        if unknownContent:
                            unknownText = extractTextFromContent(unknownContent, listLevel)
                            if unknownText.strip():
                                textParts.append(unknownText)

            return "".join(textParts)

        result = extractTextFromContent(content)
        return result.strip() if result else ""