121 lines
4.4 KiB
Python
121 lines
4.4 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
|
|
"""
|
|
Parse Excel Content action for JIRA operations.
|
|
Parses Excel content with custom headers.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
import pandas as pd
|
|
from io import BytesIO
|
|
from typing import Dict, Any
|
|
from modules.workflows.methods.methodBase import action
|
|
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
@action
|
|
async def parseExcelContent(self, parameters: Dict[str, Any]) -> ActionResult:
|
|
"""
|
|
Parse Excel content with custom headers.
|
|
|
|
Parameters:
|
|
- excelContent (str, required): Document reference containing Excel file content as bytes
|
|
- skipRows (int, optional): Number of header rows to skip (default: 3)
|
|
- hasCustomHeaders (bool, optional): Whether Excel has custom header rows (default: true)
|
|
|
|
Returns:
|
|
- ActionResult with ActionDocument containing parsed data and headers as JSON
|
|
"""
|
|
try:
|
|
excelContentParam = parameters.get("excelContent")
|
|
if not excelContentParam:
|
|
return ActionResult.isFailure(error="excelContent parameter is required")
|
|
|
|
skipRows = parameters.get("skipRows", 3)
|
|
hasCustomHeaders = parameters.get("hasCustomHeaders", True)
|
|
|
|
# Get Excel content from document
|
|
excelBytes = self.documentParsing.getDocumentData(excelContentParam)
|
|
if excelBytes is None:
|
|
return ActionResult.isFailure(error="Could not get Excel content from document reference")
|
|
|
|
# Convert to bytes if needed
|
|
if isinstance(excelBytes, str):
|
|
excelBytes = excelBytes.encode('latin-1') # Excel might have binary data
|
|
elif not isinstance(excelBytes, bytes):
|
|
return ActionResult.isFailure(error="Excel content must be bytes or string")
|
|
|
|
# Parse Excel
|
|
df = pd.read_excel(BytesIO(excelBytes), engine='openpyxl', header=None)
|
|
|
|
# Extract headers if hasCustomHeaders
|
|
headers = {"header1": "Header 1", "header2": "Header 2"}
|
|
if hasCustomHeaders and len(df) >= 3:
|
|
headerRow1 = df.iloc[0:1].copy()
|
|
headerRow2 = df.iloc[1:2].copy()
|
|
tableHeaders = df.iloc[2:3].copy()
|
|
dfData = df.iloc[skipRows:].copy()
|
|
dfData.columns = tableHeaders.iloc[0]
|
|
|
|
headers = {
|
|
"header1": ",".join([str(x) if pd.notna(x) else "" for x in headerRow1.iloc[0].tolist()]),
|
|
"header2": ",".join([str(x) if pd.notna(x) else "" for x in headerRow2.iloc[0].tolist()]),
|
|
}
|
|
else:
|
|
# No custom headers, use standard parsing
|
|
if skipRows > 0:
|
|
dfData = df.iloc[skipRows:].copy()
|
|
if len(df) > skipRows:
|
|
dfData.columns = df.iloc[skipRows-1]
|
|
else:
|
|
dfData = df.copy()
|
|
|
|
# Reset index and clean data
|
|
dfData = dfData.reset_index(drop=True)
|
|
for column in dfData.columns:
|
|
dfData[column] = dfData[column].astype('object').fillna('')
|
|
|
|
data = dfData.to_dict(orient='records')
|
|
|
|
logger.info(f"Parsed Excel: {len(data)} rows, {len(dfData.columns)} columns")
|
|
|
|
# Generate filename
|
|
workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None
|
|
filename = self._generateMeaningfulFileName(
|
|
"parsed_excel_data",
|
|
"json",
|
|
workflowContext,
|
|
"parseExcelContent"
|
|
)
|
|
|
|
result = {
|
|
"data": data,
|
|
"headers": headers,
|
|
"rowCount": len(data),
|
|
"columnCount": len(dfData.columns)
|
|
}
|
|
|
|
validationMetadata = self._createValidationMetadata(
|
|
"parseExcelContent",
|
|
rowCount=len(data),
|
|
columnCount=len(dfData.columns),
|
|
skipRows=skipRows
|
|
)
|
|
|
|
document = ActionDocument(
|
|
documentName=filename,
|
|
documentData=json.dumps(result, indent=2, ensure_ascii=False),
|
|
mimeType="application/json",
|
|
validationMetadata=validationMetadata
|
|
)
|
|
|
|
return ActionResult.isSuccess(documents=[document])
|
|
|
|
except Exception as e:
|
|
errorMsg = f"Error parsing Excel content: {str(e)}"
|
|
logger.error(errorMsg)
|
|
return ActionResult.isFailure(error=errorMsg)
|
|
|