gateway/modules/workflows/methods/methodJira/actions/createCsvContent.py
2025-12-17 10:45:09 +01:00

157 lines
6.2 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Create CSV Content action for JIRA operations.
Creates CSV content with custom headers.
"""
import logging
import json
import base64
import pandas as pd
import csv as csv_module
from io import StringIO
from datetime import datetime, UTC
from typing import Dict, Any
from modules.workflows.methods.methodBase import action
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
logger = logging.getLogger(__name__)
@action
async def createCsvContent(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Create CSV content with custom headers.
Parameters:
- data (str, required): Document reference containing data as JSON (with "data" field from mergeTicketData)
- headers (str, optional): Document reference containing headers JSON (from parseCsvContent/parseExcelContent)
- columns (str or list, optional): List of column names (if not provided, extracted from taskSyncDefinition or data)
- taskSyncDefinition (str or dict, optional): Field mapping definition (used to extract column names if columns not provided)
Returns:
- ActionResult with ActionDocument containing CSV content as bytes
"""
try:
dataParam = parameters.get("data")
if not dataParam:
return ActionResult.isFailure(error="data parameter is required")
headersParam = parameters.get("headers")
columnsParam = parameters.get("columns")
taskSyncDefinitionParam = parameters.get("taskSyncDefinition")
# Get data from document
dataJson = self.documentParsing.parseJsonFromDocument(dataParam)
if dataJson is None:
return ActionResult.isFailure(error="Could not parse data from document reference")
# Extract data array if wrapped in object
if isinstance(dataJson, dict) and "data" in dataJson:
dataList = dataJson["data"]
elif isinstance(dataJson, list):
dataList = dataJson
else:
return ActionResult.isFailure(error="Data must be a JSON array or object with 'data' field")
# Get headers
headers = {"header1": "Header 1", "header2": "Header 2"}
if headersParam:
headersJson = self.documentParsing.parseJsonFromDocument(headersParam)
if headersJson and isinstance(headersJson, dict) and "headers" in headersJson:
headers = headersJson["headers"]
elif headersJson and isinstance(headersJson, dict):
headers = headersJson
# Get columns
if columnsParam:
if isinstance(columnsParam, str):
try:
columns = json.loads(columnsParam) if columnsParam.startswith('[') or columnsParam.startswith('{') else columnsParam.split(',')
except:
columns = columnsParam.split(',')
elif isinstance(columnsParam, list):
columns = columnsParam
else:
columns = None
elif taskSyncDefinitionParam:
# Extract columns from taskSyncDefinition
if isinstance(taskSyncDefinitionParam, str):
taskSyncDefinition = json.loads(taskSyncDefinitionParam)
else:
taskSyncDefinition = taskSyncDefinitionParam
columns = list(taskSyncDefinition.keys())
elif dataList and len(dataList) > 0:
columns = list(dataList[0].keys())
else:
columns = []
# Create DataFrame
if not dataList:
df = pd.DataFrame(columns=columns)
else:
df = pd.DataFrame(dataList)
# Ensure all columns exist
for col in columns:
if col not in df.columns:
df[col] = ""
# Reorder columns
df = df[columns]
# Clean data
for column in df.columns:
df[column] = df[column].astype("object").fillna("")
df[column] = df[column].astype(str).str.replace('\n', '\\n', regex=False).str.replace('"', '""', regex=False)
# Create headers with timestamp
timestamp = datetime.fromtimestamp(self.services.utils.timestampGetUtc(), UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
header1Row = next(csv_module.reader([headers.get("header1", "Header 1")]), [])
header2Row = next(csv_module.reader([headers.get("header2", "Header 2")]), [])
if len(header2Row) > 1:
header2Row[1] = timestamp
headerRow1 = pd.DataFrame([header1Row + [""] * (len(df.columns) - len(header1Row))], columns=df.columns)
headerRow2 = pd.DataFrame([header2Row + [""] * (len(df.columns) - len(header2Row))], columns=df.columns)
tableHeaders = pd.DataFrame([df.columns.tolist()], columns=df.columns)
finalDf = pd.concat([headerRow1, headerRow2, tableHeaders, df], ignore_index=True)
# Convert to CSV bytes
out = StringIO()
finalDf.to_csv(out, index=False, header=False, quoting=1, escapechar='\\')
csvBytes = out.getvalue().encode('utf-8')
logger.info(f"Created CSV content: {len(dataList)} rows, {len(columns)} columns")
# Generate filename
workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
filename = self._generateMeaningfulFileName(
"ticket_sync",
"csv",
workflowContext,
"createCsvContent"
)
validationMetadata = self._createValidationMetadata(
"createCsvContent",
rowCount=len(dataList),
columnCount=len(columns)
)
# Store as base64 for document
csvBase64 = base64.b64encode(csvBytes).decode('utf-8')
document = ActionDocument(
documentName=filename,
documentData=csvBase64,
mimeType="application/octet-stream",
validationMetadata=validationMetadata
)
return ActionResult.isSuccess(documents=[document])
except Exception as e:
errorMsg = f"Error creating CSV content: {str(e)}"
logger.error(errorMsg)
return ActionResult.isFailure(error=errorMsg)