# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Parse CSV Content action for JIRA operations. Parses CSV content with custom headers. """ import logging import json import io import pandas as pd from typing import Dict, Any from modules.workflows.methods.methodBase import action from modules.datamodels.datamodelChat import ActionResult, ActionDocument logger = logging.getLogger(__name__) @action async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult: """ Parse CSV content with custom headers. Parameters: - csvContent (str, required): Document reference containing CSV file content as bytes - skipRows (int, optional): Number of header rows to skip (default: 2) - hasCustomHeaders (bool, optional): Whether CSV has custom header rows (default: true) Returns: - ActionResult with ActionDocument containing parsed data and headers as JSON """ try: csvContentParam = parameters.get("csvContent") if not csvContentParam: return ActionResult.isFailure(error="csvContent parameter is required") skipRows = parameters.get("skipRows", 2) hasCustomHeaders = parameters.get("hasCustomHeaders", True) # Get CSV content from document csvBytes = self.documentParsing.getDocumentData(csvContentParam) if csvBytes is None: return ActionResult.isFailure(error="Could not get CSV content from document reference") # Convert to bytes if needed if isinstance(csvBytes, str): csvBytes = csvBytes.encode('utf-8') elif not isinstance(csvBytes, bytes): return ActionResult.isFailure(error="CSV content must be bytes or string") # Parse headers if hasCustomHeaders headers = {"header1": "Header 1", "header2": "Header 2"} if hasCustomHeaders: csvLines = csvBytes.decode('utf-8').split('\n') if len(csvLines) >= 2: headers["header1"] = csvLines[0].rstrip('\r\n') headers["header2"] = csvLines[1].rstrip('\r\n') # Parse CSV data df = pd.read_csv( io.BytesIO(csvBytes), skiprows=skipRows, quoting=1, escapechar='\\', on_bad_lines='skip', engine='python' ) # Convert to dict records for column in df.columns: df[column] = df[column].astype('object').fillna('') data = df.to_dict(orient='records') logger.info(f"Parsed CSV: {len(data)} rows, {len(df.columns)} columns") # Generate filename workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None filename = self._generateMeaningfulFileName( "parsed_csv_data", "json", workflowContext, "parseCsvContent" ) result = { "data": data, "headers": headers, "rowCount": len(data), "columnCount": len(df.columns) } validationMetadata = self._createValidationMetadata( "parseCsvContent", rowCount=len(data), columnCount=len(df.columns), skipRows=skipRows ) document = ActionDocument( documentName=filename, documentData=json.dumps(result, indent=2, ensure_ascii=False), mimeType="application/json", validationMetadata=validationMetadata ) return ActionResult.isSuccess(documents=[document]) except Exception as e: errorMsg = f"Error parsing CSV content: {str(e)}" logger.error(errorMsg) return ActionResult.isFailure(error=errorMsg)