112 lines
3.8 KiB
Python
112 lines
3.8 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
|
|
"""
|
|
Parse CSV Content action for JIRA operations.
|
|
Parses CSV content with custom headers.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
import io
|
|
import pandas as pd
|
|
from typing import Dict, Any
|
|
from modules.workflows.methods.methodBase import action
|
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@action
|
|
async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
Parse CSV content with custom headers.
|
|
|
|
Parameters:
|
|
- csvContent (str, required): Document reference containing CSV file content as bytes
|
|
- skipRows (int, optional): Number of header rows to skip (default: 2)
|
|
- hasCustomHeaders (bool, optional): Whether CSV has custom header rows (default: true)
|
|
|
|
Returns:
|
|
- ActionResult with ActionDocument containing parsed data and headers as JSON
|
|
"""
|
|
try:
|
|
csvContentParam = parameters.get("csvContent")
|
|
if not csvContentParam:
|
|
return ActionResult.isFailure(error="csvContent parameter is required")
|
|
|
|
skipRows = parameters.get("skipRows", 2)
|
|
hasCustomHeaders = parameters.get("hasCustomHeaders", True)
|
|
|
|
# Get CSV content from document
|
|
csvBytes = self.documentParsing.getDocumentData(csvContentParam)
|
|
if csvBytes is None:
|
|
return ActionResult.isFailure(error="Could not get CSV content from document reference")
|
|
|
|
# Convert to bytes if needed
|
|
if isinstance(csvBytes, str):
|
|
csvBytes = csvBytes.encode('utf-8')
|
|
elif not isinstance(csvBytes, bytes):
|
|
return ActionResult.isFailure(error="CSV content must be bytes or string")
|
|
|
|
# Parse headers if hasCustomHeaders
|
|
headers = {"header1": "Header 1", "header2": "Header 2"}
|
|
if hasCustomHeaders:
|
|
csvLines = csvBytes.decode('utf-8').split('\n')
|
|
if len(csvLines) >= 2:
|
|
headers["header1"] = csvLines[0].rstrip('\r\n')
|
|
headers["header2"] = csvLines[1].rstrip('\r\n')
|
|
|
|
# Parse CSV data
|
|
df = pd.read_csv(
|
|
io.BytesIO(csvBytes),
|
|
skiprows=skipRows,
|
|
quoting=1,
|
|
escapechar='\\',
|
|
on_bad_lines='skip',
|
|
engine='python'
|
|
)
|
|
|
|
# Convert to dict records
|
|
for column in df.columns:
|
|
df[column] = df[column].astype('object').fillna('')
|
|
data = df.to_dict(orient='records')
|
|
|
|
logger.info(f"Parsed CSV: {len(data)} rows, {len(df.columns)} columns")
|
|
|
|
# Generate filename
|
|
workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
|
|
filename = self._generateMeaningfulFileName(
|
|
"parsed_csv_data",
|
|
"json",
|
|
workflowContext,
|
|
"parseCsvContent"
|
|
)
|
|
|
|
result = {
|
|
"data": data,
|
|
"headers": headers,
|
|
"rowCount": len(data),
|
|
"columnCount": len(df.columns)
|
|
}
|
|
|
|
validationMetadata = self._createValidationMetadata(
|
|
"parseCsvContent",
|
|
rowCount=len(data),
|
|
columnCount=len(df.columns),
|
|
skipRows=skipRows
|
|
)
|
|
|
|
document = ActionDocument(
|
|
documentName=filename,
|
|
documentData=json.dumps(result, indent=2, ensure_ascii=False),
|
|
mimeType="application/json",
|
|
validationMetadata=validationMetadata
|
|
)
|
|
|
|
return ActionResult.isSuccess(documents=[document])
|
|
|
|
except Exception as e:
|
|
errorMsg = f"Error parsing CSV content: {str(e)}"
|
|
logger.error(errorMsg)
|
|
return ActionResult.isFailure(error=errorMsg)
|
|
|