diff --git a/modules/services/serviceExtraction/chunking/image_chunker.py b/modules/services/serviceExtraction/chunking/chunkerImage.py similarity index 100% rename from modules/services/serviceExtraction/chunking/image_chunker.py rename to modules/services/serviceExtraction/chunking/chunkerImage.py diff --git a/modules/services/serviceExtraction/chunking/structure_chunker.py b/modules/services/serviceExtraction/chunking/chunkerStructure.py similarity index 100% rename from modules/services/serviceExtraction/chunking/structure_chunker.py rename to modules/services/serviceExtraction/chunking/chunkerStructure.py diff --git a/modules/services/serviceExtraction/chunking/table_chunker.py b/modules/services/serviceExtraction/chunking/chunkerTable.py similarity index 100% rename from modules/services/serviceExtraction/chunking/table_chunker.py rename to modules/services/serviceExtraction/chunking/chunkerTable.py diff --git a/modules/services/serviceExtraction/chunking/text_chunker.py b/modules/services/serviceExtraction/chunking/chunkerText.py similarity index 100% rename from modules/services/serviceExtraction/chunking/text_chunker.py rename to modules/services/serviceExtraction/chunking/chunkerText.py diff --git a/modules/services/serviceExtraction/formats/__init__.py b/modules/services/serviceExtraction/extractors/__init__.py similarity index 100% rename from modules/services/serviceExtraction/formats/__init__.py rename to modules/services/serviceExtraction/extractors/__init__.py diff --git a/modules/services/serviceExtraction/formats/binary_extractor.py b/modules/services/serviceExtraction/extractors/extractorBinary.py similarity index 100% rename from modules/services/serviceExtraction/formats/binary_extractor.py rename to modules/services/serviceExtraction/extractors/extractorBinary.py diff --git a/modules/services/serviceExtraction/formats/csv_extractor.py b/modules/services/serviceExtraction/extractors/extractorCsv.py similarity index 100% rename from modules/services/serviceExtraction/formats/csv_extractor.py rename to modules/services/serviceExtraction/extractors/extractorCsv.py diff --git a/modules/services/serviceExtraction/formats/docx_extractor.py b/modules/services/serviceExtraction/extractors/extractorDocx.py similarity index 100% rename from modules/services/serviceExtraction/formats/docx_extractor.py rename to modules/services/serviceExtraction/extractors/extractorDocx.py diff --git a/modules/services/serviceExtraction/formats/html_extractor.py b/modules/services/serviceExtraction/extractors/extractorHtml.py similarity index 100% rename from modules/services/serviceExtraction/formats/html_extractor.py rename to modules/services/serviceExtraction/extractors/extractorHtml.py diff --git a/modules/services/serviceExtraction/formats/image_extractor.py b/modules/services/serviceExtraction/extractors/extractorImage.py similarity index 100% rename from modules/services/serviceExtraction/formats/image_extractor.py rename to modules/services/serviceExtraction/extractors/extractorImage.py diff --git a/modules/services/serviceExtraction/formats/json_extractor.py b/modules/services/serviceExtraction/extractors/extractorJson.py similarity index 100% rename from modules/services/serviceExtraction/formats/json_extractor.py rename to modules/services/serviceExtraction/extractors/extractorJson.py diff --git a/modules/services/serviceExtraction/formats/pdf_extractor.py b/modules/services/serviceExtraction/extractors/extractorPdf.py similarity index 100% rename from modules/services/serviceExtraction/formats/pdf_extractor.py rename to modules/services/serviceExtraction/extractors/extractorPdf.py diff --git a/modules/services/serviceExtraction/formats/pptx_extractor.py b/modules/services/serviceExtraction/extractors/extractorPptx.py similarity index 100% rename from modules/services/serviceExtraction/formats/pptx_extractor.py rename to modules/services/serviceExtraction/extractors/extractorPptx.py diff --git a/modules/services/serviceExtraction/formats/text_extractor.py b/modules/services/serviceExtraction/extractors/extractorText.py similarity index 100% rename from modules/services/serviceExtraction/formats/text_extractor.py rename to modules/services/serviceExtraction/extractors/extractorText.py diff --git a/modules/services/serviceExtraction/formats/xlsx_extractor.py b/modules/services/serviceExtraction/extractors/extractorXlsx.py similarity index 100% rename from modules/services/serviceExtraction/formats/xlsx_extractor.py rename to modules/services/serviceExtraction/extractors/extractorXlsx.py diff --git a/modules/services/serviceExtraction/formats/xml_extractor.py b/modules/services/serviceExtraction/extractors/extractorXml.py similarity index 100% rename from modules/services/serviceExtraction/formats/xml_extractor.py rename to modules/services/serviceExtraction/extractors/extractorXml.py diff --git a/modules/services/serviceExtraction/merging/default_merger.py b/modules/services/serviceExtraction/merging/mergerDefault.py similarity index 100% rename from modules/services/serviceExtraction/merging/default_merger.py rename to modules/services/serviceExtraction/merging/mergerDefault.py diff --git a/modules/services/serviceExtraction/merging/table_merger.py b/modules/services/serviceExtraction/merging/mergerTable.py similarity index 100% rename from modules/services/serviceExtraction/merging/table_merger.py rename to modules/services/serviceExtraction/merging/mergerTable.py diff --git a/modules/services/serviceExtraction/merging/text_merger.py b/modules/services/serviceExtraction/merging/mergerText.py similarity index 100% rename from modules/services/serviceExtraction/merging/text_merger.py rename to modules/services/serviceExtraction/merging/mergerText.py diff --git a/modules/services/serviceExtraction/intelligent_merger.py b/modules/services/serviceExtraction/subMerger.py similarity index 100% rename from modules/services/serviceExtraction/intelligent_merger.py rename to modules/services/serviceExtraction/subMerger.py diff --git a/modules/services/serviceExtraction/subPipeline.py b/modules/services/serviceExtraction/subPipeline.py index c3833fa7..515fd293 100644 --- a/modules/services/serviceExtraction/subPipeline.py +++ b/modules/services/serviceExtraction/subPipeline.py @@ -5,10 +5,10 @@ import os from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart from .subUtils import makeId from .subRegistry import ExtractorRegistry, ChunkerRegistry -from .merging.text_merger import TextMerger -from .merging.table_merger import TableMerger -from .merging.default_merger import DefaultMerger -from .intelligent_merger import IntelligentTokenAwareMerger +from .merging.mergerText import TextMerger +from .merging.mergerTable import TableMerger +from .merging.mergerDefault import DefaultMerger +from .subMerger import IntelligentTokenAwareMerger logger = logging.getLogger(__name__) @@ -248,13 +248,13 @@ def _applyMerging(parts: List[ContentPart], strategy: Dict[str, Any]) -> List[Co # Check if intelligent merging is enabled if strategy.get("useIntelligentMerging", False): model_capabilities = strategy.get("modelCapabilities", {}) - intelligent_merger = IntelligentTokenAwareMerger(model_capabilities) + subMerger = IntelligentTokenAwareMerger(model_capabilities) # Use intelligent merging for all parts - merged = intelligent_merger.merge_chunks_intelligently(parts, strategy.get("prompt", "")) + merged = subMerger.merge_chunks_intelligently(parts, strategy.get("prompt", "")) # Calculate and log optimization stats - stats = intelligent_merger.calculate_optimization_stats(parts, merged) + stats = subMerger.calculate_optimization_stats(parts, merged) logger.info(f"🧠 Intelligent merging stats: {stats}") print(f"🔍 DEBUG: Intelligent merging: {stats['original_ai_calls']} → {stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)") diff --git a/modules/services/serviceExtraction/subRegistry.py b/modules/services/serviceExtraction/subRegistry.py index a6bd3445..7f4b9c11 100644 --- a/modules/services/serviceExtraction/subRegistry.py +++ b/modules/services/serviceExtraction/subRegistry.py @@ -22,17 +22,17 @@ class ExtractorRegistry: self._fallback: Optional[Extractor] = None # Register built-ins try: - from .formats.text_extractor import TextExtractor - from .formats.csv_extractor import CsvExtractor - from .formats.json_extractor import JsonExtractor - from .formats.xml_extractor import XmlExtractor - from .formats.html_extractor import HtmlExtractor - from .formats.pdf_extractor import PdfExtractor - from .formats.docx_extractor import DocxExtractor - from .formats.xlsx_extractor import XlsxExtractor - from .formats.pptx_extractor import PptxExtractor - from .formats.image_extractor import ImageExtractor - from .formats.binary_extractor import BinaryExtractor + from .extractors.extractorText import TextExtractor + from .extractors.extractorCsv import CsvExtractor + from .extractors.extractorJson import JsonExtractor + from .extractors.extractorXml import XmlExtractor + from .extractors.extractorHtml import HtmlExtractor + from .extractors.extractorPdf import PdfExtractor + from .extractors.extractorDocx import DocxExtractor + from .extractors.extractorXlsx import XlsxExtractor + from .extractors.extractorPptx import PptxExtractor + from .extractors.extractorImage import ImageExtractor + from .extractors.extractorBinary import BinaryExtractor self.register("text/plain", TextExtractor()) self.register("text/markdown", TextExtractor()) self.register("text/csv", CsvExtractor()) @@ -93,10 +93,10 @@ class ChunkerRegistry: self._noop = Chunker() # Register default chunkers try: - from .chunking.text_chunker import TextChunker - from .chunking.table_chunker import TableChunker - from .chunking.structure_chunker import StructureChunker - from .chunking.image_chunker import ImageChunker + from .chunking.chunkerText import TextChunker + from .chunking.chunkerTable import TableChunker + from .chunking.chunkerStructure import StructureChunker + from .chunking.chunkerImage import ImageChunker self.register("text", TextChunker()) self.register("table", TableChunker()) self.register("structure", StructureChunker()) diff --git a/modules/services/serviceGeneration/renderers/rendererExcel.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py similarity index 99% rename from modules/services/serviceGeneration/renderers/rendererExcel.py rename to modules/services/serviceGeneration/renderers/rendererXlsx.py index 6ea4ff32..9885988d 100644 --- a/modules/services/serviceGeneration/renderers/rendererExcel.py +++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py @@ -17,7 +17,7 @@ try: except ImportError: OPENPYXL_AVAILABLE = False -class RendererExcel(BaseRenderer): +class RendererXlsx(BaseRenderer): """Renders content to Excel format using openpyxl.""" @classmethod