gateway/modules/workflows/methods/methodJira/actions/parseCsvContent.py
2025-12-17 10:45:09 +01:00

112 lines
3.8 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Parse CSV Content action for JIRA operations.
Parses CSV content with custom headers.
"""
import logging
import json
import io
import pandas as pd
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Parse CSV content with custom headers.
Parameters:
- csvContent (str, required): Document reference containing CSV file content as bytes
- skipRows (int, optional): Number of header rows to skip (default: 2)
- hasCustomHeaders (bool, optional): Whether CSV has custom header rows (default: true)
Returns:
- ActionResult with ActionDocument containing parsed data and headers as JSON
"""
try:
csvContentParam = parameters.get("csvContent")
if not csvContentParam:
return ActionResult.isFailure(error="csvContent parameter is required")
skipRows = parameters.get("skipRows", 2)
hasCustomHeaders = parameters.get("hasCustomHeaders", True)
# Get CSV content from document
csvBytes = self.documentParsing.getDocumentData(csvContentParam)
if csvBytes is None:
return ActionResult.isFailure(error="Could not get CSV content from document reference")
# Convert to bytes if needed
if isinstance(csvBytes, str):
csvBytes = csvBytes.encode('utf-8')
elif not isinstance(csvBytes, bytes):
return ActionResult.isFailure(error="CSV content must be bytes or string")
# Parse headers if hasCustomHeaders
headers = {"header1": "Header 1", "header2": "Header 2"}
if hasCustomHeaders:
csvLines = csvBytes.decode('utf-8').split('\n')
if len(csvLines) >= 2:
headers["header1"] = csvLines[0].rstrip('\r\n')
headers["header2"] = csvLines[1].rstrip('\r\n')
# Parse CSV data
df = pd.read_csv(
io.BytesIO(csvBytes),
skiprows=skipRows,
quoting=1,
escapechar='\\',
on_bad_lines='skip',
engine='python'
)
# Convert to dict records
for column in df.columns:
df[column] = df[column].astype('object').fillna('')
data = df.to_dict(orient='records')
logger.info(f"Parsed CSV: {len(data)} rows, {len(df.columns)} columns")
# Generate filename
workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
filename = self._generateMeaningfulFileName(
"parsed_csv_data",
"json",
workflowContext,
"parseCsvContent"
)
result = {
"data": data,
"headers": headers,
"rowCount": len(data),
"columnCount": len(df.columns)
}
validationMetadata = self._createValidationMetadata(
"parseCsvContent",
rowCount=len(data),
columnCount=len(df.columns),
skipRows=skipRows
)
document = ActionDocument(
documentName=filename,
documentData=json.dumps(result, indent=2, ensure_ascii=False),
mimeType="application/json",
validationMetadata=validationMetadata
)
return ActionResult.isSuccess(documents=[document])
except Exception as e:
errorMsg = f"Error parsing CSV content: {str(e)}"
logger.error(errorMsg)
return ActionResult.isFailure(error=errorMsg)