gateway/modules/workflows/methods/methodJira/actions/parseCsvContent.py
2026-01-20 00:55:39 +01:00

94 lines
3.2 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
import logging
import json
import io
import pandas as pd
from typing import Dict, Any
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
try:
csvContentParam = parameters.get("csvContent")
if not csvContentParam:
return ActionResult.isFailure(error="csvContent parameter is required")
skipRows = parameters.get("skipRows", 2)
hasCustomHeaders = parameters.get("hasCustomHeaders", True)
# Get CSV content from document
csvBytes = self.documentParsing.getDocumentData(csvContentParam)
if csvBytes is None:
return ActionResult.isFailure(error="Could not get CSV content from document reference")
# Convert to bytes if needed
if isinstance(csvBytes, str):
csvBytes = csvBytes.encode('utf-8')
elif not isinstance(csvBytes, bytes):
return ActionResult.isFailure(error="CSV content must be bytes or string")
# Parse headers if hasCustomHeaders
headers = {"header1": "Header 1", "header2": "Header 2"}
if hasCustomHeaders:
csvLines = csvBytes.decode('utf-8').split('\n')
if len(csvLines) >= 2:
headers["header1"] = csvLines[0].rstrip('\r\n')
headers["header2"] = csvLines[1].rstrip('\r\n')
# Parse CSV data
df = pd.read_csv(
io.BytesIO(csvBytes),
skiprows=skipRows,
quoting=1,
escapechar='\\',
on_bad_lines='skip',
engine='python'
)
# Convert to dict records
for column in df.columns:
df[column] = df[column].astype('object').fillna('')
data = df.to_dict(orient='records')
logger.info(f"Parsed CSV: {len(data)} rows, {len(df.columns)} columns")
# Generate filename
workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
filename = self._generateMeaningfulFileName(
"parsed_csv_data",
"json",
workflowContext,
"parseCsvContent"
)
result = {
"data": data,
"headers": headers,
"rowCount": len(data),
"columnCount": len(df.columns)
}
validationMetadata = self._createValidationMetadata(
"parseCsvContent",
rowCount=len(data),
columnCount=len(df.columns),
skipRows=skipRows
)
document = ActionDocument(
documentName=filename,
documentData=json.dumps(result, indent=2, ensure_ascii=False),
mimeType="application/json",
validationMetadata=validationMetadata
)
return ActionResult.isSuccess(documents=[document])
except Exception as e:
errorMsg = f"Error parsing CSV content: {str(e)}"
logger.error(errorMsg)
return ActionResult.isFailure(error=errorMsg)