# Copyright (c) 2025 Patrick Motsch
# All rights reserved.

import logging
import time
from typing import Dict, Any
from modules.datamodels.datamodelChat import ActionResult, ActionDocument
from modules.datamodels.datamodelDocref import DocumentReferenceList
from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentExtracted, ContentPart

logger = logging.getLogger(__name__)

async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult:
    operationId = None
    try:
        workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
        operationId = f"context_extract_{workflowId}_{int(time.time())}"
        
        # Extract documentList from parameters dict
        documentListParam = parameters.get("documentList")
        if not documentListParam:
            return ActionResult.isFailure(error="documentList is required")
        
        # Convert to DocumentReferenceList if needed
        if isinstance(documentListParam, DocumentReferenceList):
            documentList = documentListParam
        elif isinstance(documentListParam, str):
            documentList = DocumentReferenceList.from_string_list([documentListParam])
        elif isinstance(documentListParam, list):
            documentList = DocumentReferenceList.from_string_list(documentListParam)
        else:
            return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
        
        # Start progress tracking
        parentOperationId = parameters.get('parentOperationId')
        self.services.chat.progressLogStart(
            operationId,
            "Extracting content from documents",
            "Content Extraction",
            f"Documents: {len(documentList.references)}",
            parentOperationId=parentOperationId
        )
        
        # Get ChatDocuments from documentList
        self.services.chat.progressLogUpdate(operationId, 0.2, "Loading documents")
        chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList)
        
        if not chatDocuments:
            self.services.chat.progressLogFinish(operationId, False)
            return ActionResult.isFailure(error="No documents found in documentList")
        
        logger.info(f"Extracting content from {len(chatDocuments)} documents")
        
        # Prepare extraction options
        self.services.chat.progressLogUpdate(operationId, 0.3, "Preparing extraction options")
        extractionOptionsParam = parameters.get("extractionOptions")
        
        # Convert dict to ExtractionOptions object if needed, or create defaults
        if extractionOptionsParam:
            if isinstance(extractionOptionsParam, dict):
                # Ensure required fields are present
                if "prompt" not in extractionOptionsParam:
                    extractionOptionsParam["prompt"] = "Extract all content from the document"
                if "mergeStrategy" not in extractionOptionsParam:
                    extractionOptionsParam["mergeStrategy"] = MergeStrategy(
                        mergeType="concatenate",
                        groupBy="typeGroup",
                        orderBy="id"
                    )
                # Convert dict to ExtractionOptions object
                try:
                    extractionOptions = ExtractionOptions(**extractionOptionsParam)
                except Exception as e:
                    logger.warning(f"Failed to create ExtractionOptions from dict: {str(e)}, using defaults")
                    extractionOptions = None
            elif isinstance(extractionOptionsParam, ExtractionOptions):
                extractionOptions = extractionOptionsParam
            else:
                # Invalid type, use defaults
                logger.warning(f"Invalid extractionOptions type: {type(extractionOptionsParam)}, using defaults")
                extractionOptions = None
        else:
            extractionOptions = None
        
        # If extractionOptions not provided, create defaults
        if not extractionOptions:
            # Default extraction options for pure content extraction (no AI processing)
            extractionOptions = ExtractionOptions(
                prompt="Extract all content from the document",
                mergeStrategy=MergeStrategy(
                    mergeType="concatenate",
                    groupBy="typeGroup",
                    orderBy="id"
                ),
                processDocumentsIndividually=True
            )
        
        # Call extraction service with hierarchical progress logging
        self.services.chat.progressLogUpdate(operationId, 0.4, "Initiating")
        self.services.chat.progressLogUpdate(operationId, 0.5, f"Extracting content from {len(chatDocuments)} documents")
        # Pass operationId for hierarchical per-document progress logging
        extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId)
        
        # Check if neutralization is enabled and should be applied automatically
        neutralizationEnabled = False
        try:
            config = self.services.neutralization.getConfig()
            neutralizationEnabled = config and config.enabled
        except Exception as e:
            logger.debug(f"Could not check neutralization config: {str(e)}")
        
        # Neutralize extracted data if enabled (for dynamic mode: after extraction, before AI processing)
        if neutralizationEnabled:
            self.services.chat.progressLogUpdate(operationId, 0.7, "Neutralizing extracted data")
            logger.info("Neutralization enabled - neutralizing extracted content data")
            
            # Neutralize each ContentExtracted result
            for extracted in extractedResults:
                if extracted.parts:
                    neutralizedParts = []
                    for part in extracted.parts:
                        if not isinstance(part, ContentPart):
                            # Try to parse as ContentPart if it's a dict
                            if isinstance(part, dict):
                                try:
                                    part = ContentPart(**part)
                                except Exception as e:
                                    logger.warning(f"Could not parse ContentPart: {str(e)}")
                                    neutralizedParts.append(part)
                                    continue
                            else:
                                neutralizedParts.append(part)
                                continue
                        
                        # Neutralize the data field if it contains text
                        if part.data:
                            try:
                                # Call neutralization service
                                neutralizationResult = self.services.neutralization.processText(part.data)
                                
                                if neutralizationResult and 'neutralized_text' in neutralizationResult:
                                    # Replace data with neutralized text
                                    neutralizedData = neutralizationResult['neutralized_text']
                                    
                                    # Create new ContentPart with neutralized data
                                    neutralizedPart = ContentPart(
                                        id=part.id,
                                        parentId=part.parentId,
                                        label=part.label,
                                        typeGroup=part.typeGroup,
                                        mimeType=part.mimeType,
                                        data=neutralizedData,
                                        metadata=part.metadata.copy() if part.metadata else {}
                                    )
                                    neutralizedParts.append(neutralizedPart)
                                else:
                                    # Neutralization failed, use original part
                                    logger.warning(f"Neutralization did not return neutralized_text for part {part.id}")
                                    neutralizedParts.append(part)
                            except Exception as e:
                                logger.error(f"Error neutralizing part {part.id}: {str(e)}")
                                # On error, use original part
                                neutralizedParts.append(part)
                        else:
                            # No data to neutralize, keep original part
                            neutralizedParts.append(part)
                    
                    # Update extracted result with neutralized parts
                    extracted.parts = neutralizedParts
                    logger.info(f"Neutralized {len(neutralizedParts)} content parts")
        
        # Build ActionDocuments from ContentExtracted results
        self.services.chat.progressLogUpdate(operationId, 0.8, "Building result documents")
        actionDocuments = []
        # Map extracted results back to original documents by index (results are in same order)
        for i, extracted in enumerate(extractedResults):
            # Get original document name if available
            originalDoc = chatDocuments[i] if i < len(chatDocuments) else None
            if originalDoc and hasattr(originalDoc, 'fileName') and originalDoc.fileName:
                # Use original filename with "extracted_" prefix
                baseName = originalDoc.fileName.rsplit('.', 1)[0] if '.' in originalDoc.fileName else originalDoc.fileName
                documentName = f"{baseName}_extracted_{extracted.id}.json"
            else:
                # Fallback to generic name with index
                documentName = f"document_{i+1:03d}_extracted_{extracted.id}.json"
            
            # Store ContentExtracted object in ActionDocument.documentData
            validationMetadata = {
                "actionType": "context.extractContent",
                "documentIndex": i,
                "extractedId": extracted.id,
                "partCount": len(extracted.parts) if extracted.parts else 0,
                "neutralized": neutralizationEnabled,
                "originalFileName": originalDoc.fileName if originalDoc and hasattr(originalDoc, 'fileName') else None
            }
            actionDoc = ActionDocument(
                documentName=documentName,
                documentData=extracted,  # ContentExtracted object
                mimeType="application/json",
                validationMetadata=validationMetadata
            )
            actionDocuments.append(actionDoc)
        
        self.services.chat.progressLogFinish(operationId, True)
        
        return ActionResult.isSuccess(documents=actionDocuments)
        
    except Exception as e:
        logger.error(f"Error in content extraction: {str(e)}")
        
        try:
            if operationId:
                self.services.chat.progressLogFinish(operationId, False)
        except Exception:
            pass
        
        return ActionResult.isFailure(error=str(e))