# Copyright (c) 2025 Patrick Motsch # All rights reserved. import logging import json import pandas as pd from io import BytesIO from typing import Dict, Any from modules.aichat.datamodelFeatureAiChat import ActionResult, ActionDocument logger = logging.getLogger(__name__) async def parseExcelContent(self, parameters: Dict[str, Any]) -> ActionResult: try: excelContentParam = parameters.get("excelContent") if not excelContentParam: return ActionResult.isFailure(error="excelContent parameter is required") skipRows = parameters.get("skipRows", 3) hasCustomHeaders = parameters.get("hasCustomHeaders", True) # Get Excel content from document excelBytes = self.documentParsing.getDocumentData(excelContentParam) if excelBytes is None: return ActionResult.isFailure(error="Could not get Excel content from document reference") # Convert to bytes if needed if isinstance(excelBytes, str): excelBytes = excelBytes.encode('latin-1') # Excel might have binary data elif not isinstance(excelBytes, bytes): return ActionResult.isFailure(error="Excel content must be bytes or string") # Parse Excel df = pd.read_excel(BytesIO(excelBytes), engine='openpyxl', header=None) # Extract headers if hasCustomHeaders headers = {"header1": "Header 1", "header2": "Header 2"} if hasCustomHeaders and len(df) >= 3: headerRow1 = df.iloc[0:1].copy() headerRow2 = df.iloc[1:2].copy() tableHeaders = df.iloc[2:3].copy() dfData = df.iloc[skipRows:].copy() dfData.columns = tableHeaders.iloc[0] headers = { "header1": ",".join([str(x) if pd.notna(x) else "" for x in headerRow1.iloc[0].tolist()]), "header2": ",".join([str(x) if pd.notna(x) else "" for x in headerRow2.iloc[0].tolist()]), } else: # No custom headers, use standard parsing if skipRows > 0: dfData = df.iloc[skipRows:].copy() if len(df) > skipRows: dfData.columns = df.iloc[skipRows-1] else: dfData = df.copy() # Reset index and clean data dfData = dfData.reset_index(drop=True) for column in dfData.columns: dfData[column] = dfData[column].astype('object').fillna('') data = dfData.to_dict(orient='records') logger.info(f"Parsed Excel: {len(data)} rows, {len(dfData.columns)} columns") # Generate filename workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None filename = self._generateMeaningfulFileName( "parsed_excel_data", "json", workflowContext, "parseExcelContent" ) result = { "data": data, "headers": headers, "rowCount": len(data), "columnCount": len(dfData.columns) } validationMetadata = self._createValidationMetadata( "parseExcelContent", rowCount=len(data), columnCount=len(dfData.columns), skipRows=skipRows ) document = ActionDocument( documentName=filename, documentData=json.dumps(result, indent=2, ensure_ascii=False), mimeType="application/json", validationMetadata=validationMetadata ) return ActionResult.isSuccess(documents=[document]) except Exception as e: errorMsg = f"Error parsing Excel content: {str(e)}" logger.error(errorMsg) return ActionResult.isFailure(error=errorMsg)