gateway/modules/workflows/methods/methodJira/actions/parseCsvContent.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.

"""
Parse CSV Content action for JIRA operations.
Parses CSV content with custom headers.
"""

import logging
import json
import io
import pandas as pd
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument

logger = logging.getLogger(__name__)

@action
async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
    """
    Parse CSV content with custom headers.

    Parameters:
    - csvContent (str, required): Document reference containing CSV file content as bytes
    - skipRows (int, optional): Number of header rows to skip (default: 2)
    - hasCustomHeaders (bool, optional): Whether CSV has custom header rows (default: true)

    Returns:
    - ActionResult with ActionDocument containing parsed data and headers as JSON
    """
    try:
        csvContentParam = parameters.get("csvContent")
        if not csvContentParam:
            return ActionResult.isFailure(error="csvContent parameter is required")

        skipRows = parameters.get("skipRows", 2)
        hasCustomHeaders = parameters.get("hasCustomHeaders", True)

        # Get CSV content from document
        csvBytes = self.documentParsing.getDocumentData(csvContentParam)
        if csvBytes is None:
            return ActionResult.isFailure(error="Could not get CSV content from document reference")

        # Convert to bytes if needed
        if isinstance(csvBytes, str):
            csvBytes = csvBytes.encode('utf-8')
        elif not isinstance(csvBytes, bytes):
            return ActionResult.isFailure(error="CSV content must be bytes or string")

        # Parse headers if hasCustomHeaders
        headers = {"header1": "Header 1", "header2": "Header 2"}
        if hasCustomHeaders:
            csvLines = csvBytes.decode('utf-8').split('\n')
            if len(csvLines) >= 2:
                headers["header1"] = csvLines[0].rstrip('\r\n')
                headers["header2"] = csvLines[1].rstrip('\r\n')

        # Parse CSV data
        df = pd.read_csv(
            io.BytesIO(csvBytes),
            skiprows=skipRows,
            quoting=1,
            escapechar='\\',
            on_bad_lines='skip',
            engine='python'
        )

        # Convert to dict records
        for column in df.columns:
            df[column] = df[column].astype('object').fillna('')
        data = df.to_dict(orient='records')

        logger.info(f"Parsed CSV: {len(data)} rows, {len(df.columns)} columns")

        # Generate filename
        workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
        filename = self._generateMeaningfulFileName(
            "parsed_csv_data",
            "json",
            workflowContext,
            "parseCsvContent"
        )

        result = {
            "data": data,
            "headers": headers,
            "rowCount": len(data),
            "columnCount": len(df.columns)
        }

        validationMetadata = self._createValidationMetadata(
            "parseCsvContent",
            rowCount=len(data),
            columnCount=len(df.columns),
            skipRows=skipRows
        )

        document = ActionDocument(
            documentName=filename,
            documentData=json.dumps(result, indent=2, ensure_ascii=False),
            mimeType="application/json",
            validationMetadata=validationMetadata
        )

        return ActionResult.isSuccess(documents=[document])

    except Exception as e:
        errorMsg = f"Error parsing CSV content: {str(e)}"
        logger.error(errorMsg)
        return ActionResult.isFailure(error=errorMsg)