gateway/modules/workflows/methods/methodJira/actions/parseCsvContent.py

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.

import logging
import json
import io
import pandas as pd
from typing import Dict, Any
from modules.datamodels.datamodelChat import ActionResult, ActionDocument

logger = logging.getLogger(__name__)

async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
    try:
        csvContentParam = parameters.get("csvContent")
        if not csvContentParam:
            return ActionResult.isFailure(error="csvContent parameter is required")

        skipRows = parameters.get("skipRows", 2)
        hasCustomHeaders = parameters.get("hasCustomHeaders", True)

        # Get CSV content from document
        csvBytes = self.documentParsing.getDocumentData(csvContentParam)
        if csvBytes is None:
            return ActionResult.isFailure(error="Could not get CSV content from document reference")

        # Convert to bytes if needed
        if isinstance(csvBytes, str):
            csvBytes = csvBytes.encode('utf-8')
        elif not isinstance(csvBytes, bytes):
            return ActionResult.isFailure(error="CSV content must be bytes or string")

        # Parse headers if hasCustomHeaders
        headers = {"header1": "Header 1", "header2": "Header 2"}
        if hasCustomHeaders:
            csvLines = csvBytes.decode('utf-8').split('\n')
            if len(csvLines) >= 2:
                headers["header1"] = csvLines[0].rstrip('\r\n')
                headers["header2"] = csvLines[1].rstrip('\r\n')

        # Parse CSV data
        df = pd.read_csv(
            io.BytesIO(csvBytes),
            skiprows=skipRows,
            quoting=1,
            escapechar='\\',
            on_bad_lines='skip',
            engine='python'
        )

        # Convert to dict records
        for column in df.columns:
            df[column] = df[column].astype('object').fillna('')
        data = df.to_dict(orient='records')

        logger.info(f"Parsed CSV: {len(data)} rows, {len(df.columns)} columns")

        # Generate filename
        workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
        filename = self._generateMeaningfulFileName(
            "parsed_csv_data",
            "json",
            workflowContext,
            "parseCsvContent"
        )

        result = {
            "data": data,
            "headers": headers,
            "rowCount": len(data),
            "columnCount": len(df.columns)
        }

        validationMetadata = self._createValidationMetadata(
            "parseCsvContent",
            rowCount=len(data),
            columnCount=len(df.columns),
            skipRows=skipRows
        )

        document = ActionDocument(
            documentName=filename,
            documentData=json.dumps(result, indent=2, ensure_ascii=False),
            mimeType="application/json",
            validationMetadata=validationMetadata
        )

        return ActionResult.isSuccess(documents=[document])

    except Exception as e:
        errorMsg = f"Error parsing CSV content: {str(e)}"
        logger.error(errorMsg)
        return ActionResult.isFailure(error=errorMsg)