# Copyright (c) 2025 Patrick Motsch # All rights reserved. import logging import json import io import pandas as pd from typing import Dict, Any from modules.datamodels.datamodelChatbot import ActionResult, ActionDocument logger = logging.getLogger(__name__) async def parseCsvContent(self, parameters: Dict[str, Any]) -> ActionResult: try: csvContentParam = parameters.get("csvContent") if not csvContentParam: return ActionResult.isFailure(error="csvContent parameter is required") skipRows = parameters.get("skipRows", 2) hasCustomHeaders = parameters.get("hasCustomHeaders", True) # Get CSV content from document csvBytes = self.documentParsing.getDocumentData(csvContentParam) if csvBytes is None: return ActionResult.isFailure(error="Could not get CSV content from document reference") # Convert to bytes if needed if isinstance(csvBytes, str): csvBytes = csvBytes.encode('utf-8') elif not isinstance(csvBytes, bytes): return ActionResult.isFailure(error="CSV content must be bytes or string") # Parse headers if hasCustomHeaders headers = {"header1": "Header 1", "header2": "Header 2"} if hasCustomHeaders: csvLines = csvBytes.decode('utf-8').split('\n') if len(csvLines) >= 2: headers["header1"] = csvLines[0].rstrip('\r\n') headers["header2"] = csvLines[1].rstrip('\r\n') # Parse CSV data df = pd.read_csv( io.BytesIO(csvBytes), skiprows=skipRows, quoting=1, escapechar='\\', on_bad_lines='skip', engine='python' ) # Convert to dict records for column in df.columns: df[column] = df[column].astype('object').fillna('') data = df.to_dict(orient='records') logger.info(f"Parsed CSV: {len(data)} rows, {len(df.columns)} columns") # Generate filename workflowContext = self.services.chat.getWorkflowContext() if hasattr(self.services, 'chat') else None filename = self._generateMeaningfulFileName( "parsed_csv_data", "json", workflowContext, "parseCsvContent" ) result = { "data": data, "headers": headers, "rowCount": len(data), "columnCount": len(df.columns) } validationMetadata = self._createValidationMetadata( "parseCsvContent", rowCount=len(data), columnCount=len(df.columns), skipRows=skipRows ) document = ActionDocument( documentName=filename, documentData=json.dumps(result, indent=2, ensure_ascii=False), mimeType="application/json", validationMetadata=validationMetadata ) return ActionResult.isSuccess(documents=[document]) except Exception as e: errorMsg = f"Error parsing CSV content: {str(e)}" logger.error(errorMsg) return ActionResult.isFailure(error=errorMsg)