# Copyright (c) 2025 Patrick Motsch # All rights reserved. from typing import Any, Dict, List from modules.datamodels.datamodelExtraction import ContentPart from ..subRegistry import Chunker class TableChunker(Chunker): def chunk(self, part: ContentPart, options: Dict[str, Any]) -> list[Dict[str, Any]]: maxBytes = int(options.get("tableChunkSize", 40000)) chunks: List[Dict[str, Any]] = [] current: List[str] = [] size = 0 for line in part.data.split('\n'): lineSize = len(line.encode('utf-8')) + 1 if size + lineSize > maxBytes and current: data = '\n'.join(current) chunks.append({"data": data, "size": len(data.encode('utf-8')), "order": len(chunks)}) current = [line] size = lineSize else: current.append(line) size += lineSize if current: data = '\n'.join(current) chunks.append({"data": data, "size": len(data.encode('utf-8')), "order": len(chunks)}) return chunks