# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Parse Excel Content action for JIRA operations. Parses Excel content with custom headers. """ import logging import json import pandas as pd from io import BytesIO from typing import Dict, Any from modules.workflows.methods.methodBase import action from modules.datamodels.datamodelChat import ActionResult, ActionDocument logger = logging.getLogger(__name__) @action async def parseExcelContent(self, parameters: Dict[str, Any]) -> ActionResult: """ Parse Excel content with custom headers. Parameters: - excelContent (str, required): Document reference containing Excel file content as bytes - skipRows (int, optional): Number of header rows to skip (default: 3) - hasCustomHeaders (bool, optional): Whether Excel has custom header rows (default: true) Returns: - ActionResult with ActionDocument containing parsed data and headers as JSON """ try: excelContentParam = parameters.get("excelContent") if not excelContentParam: return ActionResult.isFailure(error="excelContent parameter is required") skipRows = parameters.get("skipRows", 3) hasCustomHeaders = parameters.get("hasCustomHeaders", True) # Get Excel content from document excelBytes = self.documentParsing.getDocumentData(excelContentParam) if excelBytes is None: return ActionResult.isFailure(error="Could not get Excel content from document reference") # Convert to bytes if needed if isinstance(excelBytes, str): excelBytes = excelBytes.encode('latin-1') # Excel might have binary data elif not isinstance(excelBytes, bytes): return ActionResult.isFailure(error="Excel content must be bytes or string") # Parse Excel df = pd.read_excel(BytesIO(excelBytes), engine='openpyxl', header=None) # Extract headers if hasCustomHeaders headers = {"header1": "Header 1", "header2": "Header 2"} if hasCustomHeaders and len(df) >= 3: headerRow1 = df.iloc[0:1].copy() headerRow2 = df.iloc[1:2].copy() tableHeaders = df.iloc[2:3].copy() dfData = df.iloc[skipRows:].copy() dfData.columns = tableHeaders.iloc[0] headers = { "header1": ",".join([str(x) if pd.notna(x) else "" for x in headerRow1.iloc[0].tolist()]), "header2": ",".join([str(x) if pd.notna(x) else "" for x in headerRow2.iloc[0].tolist()]), } else: # No custom headers, use standard parsing if skipRows > 0: dfData = df.iloc[skipRows:].copy() if len(df) > skipRows: dfData.columns = df.iloc[skipRows-1] else: dfData = df.copy() # Reset index and clean data dfData = dfData.reset_index(drop=True) for column in dfData.columns: dfData[column] = dfData[column].astype('object').fillna('') data = dfData.to_dict(orient='records') logger.info(f"Parsed Excel: {len(data)} rows, {len(dfData.columns)} columns") # Generate filename workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None filename = self._generateMeaningfulFileName( "parsed_excel_data", "json", workflowContext, "parseExcelContent" ) result = { "data": data, "headers": headers, "rowCount": len(data), "columnCount": len(dfData.columns) } validationMetadata = self._createValidationMetadata( "parseExcelContent", rowCount=len(data), columnCount=len(dfData.columns), skipRows=skipRows ) document = ActionDocument( documentName=filename, documentData=json.dumps(result, indent=2, ensure_ascii=False), mimeType="application/json", validationMetadata=validationMetadata ) return ActionResult.isSuccess(documents=[document]) except Exception as e: errorMsg = f"Error parsing Excel content: {str(e)}" logger.error(errorMsg) return ActionResult.isFailure(error=errorMsg)