# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
from typing import List
import logging

from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, ExtractionOptions, MergeStrategy
from modules.datamodels.datamodelUdm import applyUdmOutputDetail
from .subUtils import makeId
from .subRegistry import ExtractorRegistry, ChunkerRegistry

logger = logging.getLogger(__name__)


# REMOVED: _mergeParts function - unused, functionality replaced by applyMerging in interfaceAiObjects.py


def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: ChunkerRegistry, documentBytes: bytes, fileName: str, mimeType: str, options: ExtractionOptions) -> ContentExtracted:
    
    extractor = extractorRegistry.resolve(mimeType, fileName)
    if extractor is None:
        # fallback: single binary part
        part = ContentPart(
            id=makeId(),
            parentId=None,
            label="file",
            typeGroup="binary",
            mimeType=mimeType or "application/octet-stream",
            data="",
            metadata={"warning": "No extractor registered"}
        )
        return ContentExtracted(id=makeId(), parts=[part])

    extractCtx = {
        "fileName": fileName,
        "mimeType": mimeType,
        "lazyContainer": options.lazyContainer,
    }
    parts = extractor.extract(documentBytes, extractCtx)
    
    # REMOVED: poolAndLimit(parts, chunkerRegistry, options)
    # REMOVED: Chunking logic - now handled in AI call phase
    
    # Apply merging strategy if provided (preserve existing logic)
    if options.mergeStrategy:
        # Use module-level applyMerging function
        from .mainServiceExtraction import applyMerging
        parts = applyMerging(parts, options.mergeStrategy)

    ec_id = makeId()
    extracted = ContentExtracted(id=ec_id, parts=parts)
    if options.outputFormat in ("udm", "both"):
        udm = extractor.extractToUdm(
            documentBytes,
            {**extractCtx, "extractionId": ec_id},
            precomputedParts=parts,
        )
        extracted.udm = applyUdmOutputDetail(udm, options.outputDetail)
    return extracted


# REMOVED: poolAndLimit function - chunking now handled in AI call phase
# REMOVED: applyMerging function - moved to interfaceAiObjects.py for proper interface-level access