"""
Document processing method module.
Handles document operations using the document service.
"""

import logging
import os
from typing import Dict, Any, List, Optional
from datetime import datetime, UTC

from modules.workflows.methods.methodBase import MethodBase, action
from modules.datamodels.datamodelWorkflow import ActionResult, ChatDocument
from modules.datamodels.datamodelAi import AiCallOptions, OperationType, Priority

logger = logging.getLogger(__name__)

class MethodDocument(MethodBase):
    """Document method implementation for document operations"""
    
    def __init__(self, services):
        """Initialize the document method"""
        super().__init__(services)
        self.name = "document"
        self.description = "Handle document operations like extraction and analysis"

    def _format_timestamp_for_filename(self) -> str:
        """Format current timestamp as YYYYMMDD-hhmmss for filenames."""
        return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")

    @action
    async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        Extract content from any document using AI prompt.
        
        Parameters:
            documentList (list): Document list reference(s) - List of document references to extract content from
            prompt (str): AI prompt for extraction - Specific prompt describing what content to extract and how to process it
            operationType (str, optional): Type of operation - Use 'extract_content', 'analyze_document', 'summarize_content', etc. (default: 'extract_content')
            processDocumentsIndividually (bool, optional): Process each document separately - Set to True for individual processing, False for batch processing (default: True)
            chunkAllowed (bool, optional): Allow content chunking - Set to True to allow AI service to chunk large content, False to process as-is (default: True)
            mergeStrategy (dict, optional): Strategy for merging results - Specify how to merge chunked content: groupBy, orderBy, mergeType (default: concatenate)
            expectedDocumentFormats (list, optional): Expected output formats - List of format specifications with extension, mimeType, description
            includeMetadata (bool, optional): Include document metadata - Set to True to include file metadata in results (default: True)
        """
        try:
            documentList = parameters.get("documentList")
            if isinstance(documentList, str):
                documentList = [documentList]
            prompt = parameters.get("prompt")
            operationType = parameters.get("operationType", "extract_content")
            processDocumentsIndividually = parameters.get("processDocumentsIndividually", True)
            chunkAllowed = parameters.get("chunkAllowed", True)
            mergeStrategy = parameters.get("mergeStrategy", {
                "groupBy": "typeGroup",
                "orderBy": "id",
                "mergeType": "concatenate"
            })
            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
            includeMetadata = parameters.get("includeMetadata", True)
            
            if not documentList:
                return ActionResult.isFailure(
                    error="Document list reference is required"
                )
            
            if not prompt:
                return ActionResult.isFailure(
                    error="Prompt is required"
                )
            
            chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
            if not chatDocuments:
                return ActionResult.isFailure(
                    error="No documents found for the provided reference"
                )
            
            # Use new extraction service with ChatDocument objects
            try:
                # Build extraction options directly from AI planner parameters
                extraction_options = {
                    "prompt": prompt,
                    "operationType": operationType,
                    "processDocumentsIndividually": processDocumentsIndividually,
                    "chunkAllowed": chunkAllowed,
                    "mergeStrategy": mergeStrategy
                }
                
                # Add optional parameters if provided by AI planner
                if expectedDocumentFormats:
                    extraction_options["expectedDocumentFormats"] = expectedDocumentFormats
                
                if not includeMetadata:
                    extraction_options["includeMetadata"] = False
                
                # Use new extraction service API
                all_extracted_content = self.services.extraction.extractContent(
                    documents=chatDocuments,
                    options=extraction_options
                )
                
                logger.info(f"Extraction completed: {len(all_extracted_content)} documents processed")
                
            except Exception as e:
                logger.error(f"Extraction failed: {str(e)}")
                all_extracted_content = []
            
            if not all_extracted_content:
                return ActionResult.isFailure(
                    error="No content could be extracted from any documents"
                )
            
            # Process each document individually with its own format conversion
            output_documents = []
            
            for i, chatDocument in enumerate(chatDocuments):
                # Extract text content from this document using new ExtractedContent structure
                text_content = ""
                try:
                    ec = all_extracted_content[i] if i < len(all_extracted_content) else None
                    if ec and hasattr(ec, 'parts'):
                        text_parts = []
                        for part in ec.parts:
                            try:
                                if part.typeGroup in ("text", "table", "structure") and part.data:
                                    text_parts.append(part.data)
                            except Exception:
                                continue
                        text_content = "\n".join(text_parts)
                    else:
                        text_content = ""
                except Exception:
                    text_content = ""
                
                # Get the expected format for this document (or use default)
                target_format = None
                if expectedDocumentFormats and i < len(expectedDocumentFormats):
                    target_format = expectedDocumentFormats[i]
                elif expectedDocumentFormats and len(expectedDocumentFormats) > 0:
                    # If fewer formats than documents, use the last format for remaining documents
                    target_format = expectedDocumentFormats[-1]
                
                # Determine output format and fileName
                if target_format:
                    target_extension = target_format.get("extension", ".txt")
                    target_mime_type = target_format.get("mimeType", "text/plain")
                    
                    # Check if format conversion is needed
                    if target_extension not in [".txt", ".text"] or target_mime_type != "text/plain":
                        logger.info(f"Converting document {i+1} to format: {target_extension} ({target_mime_type})")
                        # Use AI to convert format
                        formatted_content = await self._convertContentToFormat(text_content, target_format)
                        final_content = formatted_content
                        final_mime_type = target_mime_type
                        final_extension = target_extension
                    else:
                        logger.info(f"Document {i+1}: No format conversion needed, using plain text")
                        final_content = text_content
                        final_mime_type = "text/plain"
                        final_extension = ".txt"
                else:
                    logger.info(f"Document {i+1}: No expected format specified, using plain text")
                    final_content = text_content
                    final_mime_type = "text/plain"
                    final_extension = ".txt"
                
                # Create output fileName based on original fileName and target format
                original_fileName = chatDocument.fileName
                base_name = original_fileName.rsplit('.', 1)[0] if '.' in original_fileName else original_fileName
                output_fileName = f"{base_name}_extracted_{self._format_timestamp_for_filename()}{final_extension}"
                
                # Create result data for this document using new structure
                result_data = {
                    "result": final_content,
                    "fileName": output_fileName,
                    "processedDocuments": 1,
                    "comment": f"Extracted from {original_fileName} using AI prompt"
                }
                
                logger.info(f"Created output document: {output_fileName} with {len(final_content)} characters")
                
                output_documents.append({
                    "documentName": output_fileName,
                    "documentData": result_data,
                    "mimeType": final_mime_type
                })
            
            return ActionResult.isSuccess(
                documents=output_documents
            )
        except Exception as e:
            logger.error(f"Error extracting content: {str(e)}")
            return ActionResult.isFailure(
                error=str(e)
            )

    @action
    async def generate(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        Convert TEXT-ONLY documents to target formats (NO AI usage).
        
        Parameters:
            documentList (list): TEXT-ONLY documents only
            expectedDocumentFormats (list): Target formats
            originalDocuments (list, optional): Original names
            includeMetadata (bool, optional): Include metadata (default: True)
            mergeDocuments (bool, optional): Merge all documents into single output (default: False)
        """
        try:
            document_list = parameters.get("documentList", [])
            if isinstance(document_list, str):
                document_list = [document_list]
            expected_document_formats = parameters.get("expectedDocumentFormats", [])
            original_documents = parameters.get("originalDocuments", [])
            include_metadata = parameters.get("includeMetadata", True)
            merge_documents = parameters.get("mergeDocuments", False)
            
            if not document_list:
                return ActionResult.isFailure(
                    error="Document list is required for generation"
                )
            
            if not expected_document_formats or len(expected_document_formats) == 0:
                return ActionResult.isFailure(
                    error="Expected document formats specification is required"
                )
            
            # Get chat documents for original documents list
            chat_documents = self.services.workflow.getChatDocumentsFromDocumentList(document_list)
            logger.info(f"Found {len(chat_documents)} chat documents")
            
            if not chat_documents:
                return ActionResult.isFailure(
                    error="No documents found for the provided documentList reference"
                )
            
            # Update original documents list if not provided
            if not original_documents:
                original_documents = [doc.fileName if hasattr(doc, 'fileName') else str(doc.id) for doc in chat_documents]
            
            # Extract content from all documents first
            document_contents = []
            for i, chat_document in enumerate(chat_documents):
                # Extract content from this document directly - NO AI, just read the data as-is
                # This ensures we get the original text content for format conversion
                content = ""
                if hasattr(chat_document, 'fileId') and chat_document.fileId:
                    try:
                        # Get file data directly without AI processing
                        file_data = self.services.workflow.getFileData(chat_document.fileId)
                        if file_data:
                            # Check if it's text data and convert to string
                            if isinstance(file_data, bytes):
                                try:
                                    # Try to decode as UTF-8 to check if it's text
                                    content = file_data.decode('utf-8')
                                    logger.info(f"Document {i+1} ({chat_document.fileName}): Successfully decoded as UTF-8 text")
                                except UnicodeDecodeError:
                                    logger.info(f"Document {i+1} ({chat_document.fileName}): Binary data, not text - skipping")
                                    continue
                            else:
                                # Already a string
                                content = str(file_data)
                                logger.info(f"Document {i+1} ({chat_document.fileName}): Already text data")
                        else:
                            logger.warning(f"Document {i+1} ({chat_document.fileName}): No file data found")
                            continue
                        
                        if not content.strip():
                            logger.info(f"Document {i+1} ({chat_document.fileName}): Empty text content, skipping")
                            continue
                            
                    except Exception as e:
                        logger.warning(f"Error reading document {i+1} ({chat_document.fileName}): {str(e)}")
                        continue
                else:
                    logger.warning(f"Document {i+1} has no fileId, skipping")
                    continue
                
                logger.info(f"Extracted content from document {i+1}: {len(content)} characters")
                
                document_contents.append({
                    "document": chat_document,
                    "content": content,
                    "index": i,
                    "original_name": original_documents[i] if i < len(original_documents) else f"document_{i+1}"
                })
            
            if not document_contents:
                return ActionResult.isFailure(
                    error="No valid text content could be extracted from any documents"
                )
            
            if merge_documents and len(document_contents) > 1:
                # Merge all documents into single output
                logger.info("Merging all documents into single output")
                return await self._mergeDocuments(document_contents, expected_document_formats, include_metadata)
            else:
                # Process each document individually with its own format conversion
                logger.info("Processing documents individually")
                output_documents = []
            
                for item in document_contents:
                    chat_document = item["document"]
                    content = item["content"]
                    i = item["index"]
                    original_name = item["original_name"]
                    
                    # Get the expected format for this document (or use default)
                    target_format = None
                    if i < len(expected_document_formats):
                        target_format = expected_document_formats[i]
                    elif len(expected_document_formats) > 0:
                        # If fewer formats than documents, use the last format for remaining documents
                        target_format = expected_document_formats[-1]
                    
                    if not target_format:
                        logger.warning(f"No expected format for document {i+1}, skipping")
                        continue
                    
                    # Use AI to convert format
                    formatted_content = await self._convertContentToFormat(content, target_format)
                    if not formatted_content:
                        logger.warning(f"Failed to format document {i+1}, skipping")
                        continue
                    
                    target_extension = target_format.get("extension", ".txt")
                    target_mime_type = target_format.get("mimeType", "text/plain")
                    
                    # Create output fileName
                    base_name = original_name.rsplit('.', 1)[0] if '.' in original_name else original_name
                    output_fileName = f"{base_name}_generated_{self._format_timestamp_for_filename()}{target_extension}"
                    
                    # Create result data using new structure
                    result_data = {
                        "result": formatted_content,
                        "fileName": output_fileName,
                        "processedDocuments": 1,
                        "comment": f"Generated from {original_name} in {target_extension} format"
                    }
                    
                    logger.info(f"Generated document: {output_fileName} with {len(formatted_content)} characters")
                    
                    output_documents.append({
                        "documentName": output_fileName,
                        "documentData": result_data,
                        "mimeType": target_mime_type
                    })
            
            if not output_documents:
                return ActionResult.isFailure(
                    error="No documents could be generated"
                )
            
            return ActionResult.isSuccess(
                documents=output_documents
            )
        except Exception as e:
            logger.error(f"Error generating document: {str(e)}")
            return ActionResult.isFailure(
                error=str(e)
            )

    async def _mergeDocuments(self, document_contents: List[Dict[str, Any]], 
                            expected_document_formats: List[Dict[str, Any]], 
                            include_metadata: bool) -> ActionResult:
        """
        Merge all documents into a single output document.
        """
        try:
            # Combine all document content
            combined_content_parts = []
            original_file_names = []
            
            for item in document_contents:
                chat_document = item["document"]
                content = item["content"]
                original_name = item["original_name"]
                
                if content.strip():
                    combined_content_parts.append(f"=== Document: {original_name} ===\n{content}\n")
                    original_file_names.append(original_name)
            
            if not combined_content_parts:
                return ActionResult.isFailure(
                    error="No content could be extracted from any documents for merging"
                )
            
            # Combine all content
            combined_content = "\n".join(combined_content_parts)
            logger.info(f"Combined content from {len(original_file_names)} documents: {len(combined_content)} characters")
            
            # Get the expected format for the merged output
            target_format = None
            if expected_document_formats and len(expected_document_formats) > 0:
                target_format = expected_document_formats[0]  # Use first format for merged output
            
            if not target_format:
                logger.warning("No expected format specified for merged output, using plain text")
                target_format = {"extension": ".txt", "mimeType": "text/plain"}
            
            # Use AI to convert format
            formatted_content = await self._convertContentToFormat(combined_content, target_format)
            if not formatted_content:
                logger.warning("Failed to format merged content, using raw content")
                formatted_content = combined_content
            
            target_extension = target_format.get("extension", ".txt")
            target_mime_type = target_format.get("mimeType", "text/plain")
            
            # Create output fileName for merged document
            timestamp = self._format_timestamp_for_filename()
            output_fileName = f"merged_documents_{timestamp}{target_extension}"
            
            # Create result data for merged document using new structure
            result_data = {
                "result": formatted_content,
                "fileName": output_fileName,
                "processedDocuments": len(document_contents),
                "comment": f"Merged {len(original_file_names)} documents into {target_extension} format"
            }
            
            logger.info(f"Created merged document: {output_fileName} with {len(formatted_content)} characters")
            
            return ActionResult.isSuccess(
                documents=[{
                    "documentName": output_fileName,
                    "documentData": result_data,
                    "mimeType": target_mime_type
                }]
            )
            
        except Exception as e:
            logger.error(f"Error merging documents: {str(e)}")
            return ActionResult.isFailure(
                error=f"Failed to merge documents: {str(e)}"
            )

    async def _convertContentToFormat(self, content: str, target_format: Dict[str, Any]) -> str:
        """
        Helper function to convert content to the specified format using AI.
        """
        try:
            extension = target_format.get("extension", ".txt")
            mime_type = target_format.get("mimeType", "text/plain")
            
            logger.info(f"Converting content to format: {extension} ({mime_type})")
            
            # Create AI prompt for format conversion
            format_prompts = {
                ".csv": f"""
                Convert the following content into a proper CSV format.
                
                Requirements:
                1. Output ONLY the CSV data without any markdown, code blocks, or additional text
                2. Use appropriate headers based on the content
                3. Ensure proper CSV formatting with commas and quotes where needed
                4. Make the data easily readable and importable into spreadsheet applications
                
                Content to convert:
                {content}
                
                Generate ONLY the CSV data:
                """,
                
                ".json": f"""
                Convert the following content into a proper JSON format.
                
                Requirements:
                1. Output ONLY the JSON data without any markdown, code blocks, or additional text
                2. Structure the data logically with appropriate keys and values
                3. Ensure valid JSON syntax
                4. Make the data easily parseable and readable
                
                Content to convert:
                {content}
                
                Generate ONLY the JSON data:
                """,
                
                ".xml": f"""
                Convert the following content into a proper XML format.
                
                Requirements:
                1. Output ONLY the XML data without any markdown, code blocks, or additional text
                2. Use appropriate XML tags and structure
                3. Ensure valid XML syntax
                4. Make the data easily parseable and readable
                
                Content to convert:
                {content}
                
                Generate ONLY the XML data:
                """,
                
                ".html": f"""
                Convert the following content into a proper HTML format.
                
                Requirements:
                1. Output ONLY the HTML data without any markdown, code blocks, or additional text
                2. Use appropriate HTML tags and structure
                3. Ensure valid HTML syntax
                4. Make the data easily readable in web browsers
                
                Content to convert:
                {content}
                
                Generate ONLY the HTML data:
                """,
                
                ".md": f"""
                Convert the following content into a proper Markdown format.
                
                Requirements:
                1. Output ONLY the Markdown data without any code blocks or additional text
                2. Use appropriate Markdown syntax for headers, lists, emphasis, etc.
                3. Structure the content logically
                4. Make the data easily readable and convertible to other formats
                
                Content to convert:
                {content}
                
                Generate ONLY the Markdown data:
                """
            }
            
            # Get the appropriate prompt for the target format
            if extension in format_prompts:
                ai_prompt = format_prompts[extension]
            else:
                # Generic format conversion
                ai_prompt = f"""
                Convert the following content into {extension.upper()} format.
                
                Requirements:
                1. Output ONLY the {extension.upper()} data without any markdown, code blocks, or additional text
                2. Use appropriate formatting for {extension.upper()} files
                3. Ensure the output is valid and usable
                4. Make the data easily readable and importable
                
                Content to convert:
                {content}
                
                Generate ONLY the {extension.upper()} data:
                """
            
            # Call AI to generate the formatted content
            logger.info(f"Calling AI for {extension} format conversion")
            formatted_content = await self.services.ai.callAi(
                prompt=ai_prompt,
                documents=None,
                options=AiCallOptions(
                    operationType=OperationType.GENERATE_CONTENT,
                    priority=Priority.SPEED,
                    compressPrompt=True,
                    compressContext=False,
                    maxCost=0.02
                )
            )
            
            if not formatted_content or formatted_content.strip() == "":
                logger.warning("AI format conversion failed, using fallback")
                return self._generateFallbackFormattedContent(content, extension, mime_type)
            
            # Clean up the AI response
            formatted_content = formatted_content.strip()
            
            # Remove markdown code blocks if present
            if formatted_content.startswith("```") and formatted_content.endswith("```"):
                lines = formatted_content.split('\n')
                if len(lines) > 2:
                    formatted_content = '\n'.join(lines[1:-1])
            
            # For HTML format, check if AI returned complete HTML document
            if extension == ".html" and (formatted_content.startswith('<!DOCTYPE') or formatted_content.startswith('<html')):
                return formatted_content
            
            return formatted_content
            
        except Exception as e:
            logger.error(f"Error in AI format conversion: {str(e)}")
            return self._generateFallbackFormattedContent(content, extension, mime_type)

    def _generateFallbackFormattedContent(self, content: str, extension: str, mime_type: str) -> str:
        """
        Generate fallback formatted content when AI conversion fails.
        """
        try:
            if extension == ".csv":
                # Simple CSV fallback - split by lines and create basic CSV
                lines = content.strip().split('\n')
                if lines:
                    # Create a simple CSV with line numbers and content
                    csv_lines = ["Line,Content"]
                    for i, line in enumerate(lines, 1):
                        # Escape quotes and wrap in quotes if comma present
                        if ',' in line:
                            line = f'"{line.replace(chr(34), chr(34) + chr(34))}"'
                        csv_lines.append(f"{i},{line}")
                    return '\n'.join(csv_lines)
                return "Line,Content\n1,No content available"
                
            elif extension == ".json":
                # Simple JSON fallback
                content_escaped = content.replace('"', '\\"')
                timestamp = self.services.utils.getUtcTimestamp()
                return f'{{"content": "{content_escaped}", "format": "json", "timestamp": {timestamp}}}'
                
            elif extension == ".xml":
                # Simple XML fallback
                timestamp = self.services.utils.getUtcTimestamp()
                return f'<?xml version="1.0" encoding="UTF-8"?>\n<document>\n<content>{content}</content>\n<format>xml</format>\n<timestamp>{timestamp}</timestamp>\n</document>'
                
            elif extension == ".html":
                # Simple HTML fallback
                timestamp = int(self.services.utils.getUtcTimestamp())
                return f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>Generated Document</title></head>\n<body>\n<pre>{content}</pre>\n<p><em>Generated on {timestamp}</em></p>\n</body>\n</html>'
                
            elif extension == ".md":
                # Simple Markdown fallback
                timestamp = int(self.services.utils.getUtcTimestamp())
                return f"# Generated Document\n\n{content}\n\n---\n*Generated on {timestamp}*"
                
            else:
                # Generic fallback - return content as-is
                return content
                
        except Exception as e:
            logger.error(f"Error in fallback format conversion: {str(e)}")
            return content

    @action
    async def generateReport(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        Generate HTML report from multiple documents using AI.
        
        Parameters:
            documentList (list): Document list reference(s) - List of document references to include in report
            prompt (str): AI prompt for report generation - Specific prompt describing what kind of report to generate
            title (str, optional): Report title - Title for the generated HTML report (default: "Summary Report")
            operationType (str, optional): Type of operation - Use 'generate_report', 'analyze_documents', etc. (default: 'generate_report')
            processDocumentsIndividually (bool, optional): Process each document separately - Set to True for individual processing (default: True)
            chunkAllowed (bool, optional): Allow content chunking - Set to True to allow AI service to chunk large content (default: True)
            mergeStrategy (dict, optional): Strategy for merging results - Specify how to merge content for report generation (default: concatenate)
            includeMetadata (bool, optional): Include document metadata - Set to True to include file metadata in results (default: True)
        """
        try:
            documentList = parameters.get("documentList")
            if isinstance(documentList, str):
                documentList = [documentList]
            prompt = parameters.get("prompt")
            title = parameters.get("title", "Summary Report")
            operationType = parameters.get("operationType", "generate_report")
            processDocumentsIndividually = parameters.get("processDocumentsIndividually", True)
            chunkAllowed = parameters.get("chunkAllowed", True)
            mergeStrategy = parameters.get("mergeStrategy", {
                "groupBy": "typeGroup",
                "orderBy": "id",
                "mergeType": "concatenate"
            })
            includeMetadata = parameters.get("includeMetadata", True)
            
            if not documentList:
                return ActionResult.isFailure(
                    error="Document list reference is required"
                )
            
            if not prompt:
                return ActionResult.isFailure(
                    error="Prompt is required to specify what kind of report to generate"
                )
            
            chatDocuments = self.services.workflow.getChatDocumentsFromDocumentList(documentList)
            logger.info(f"Retrieved {len(chatDocuments)} chat documents for report generation")
            
            if not chatDocuments:
                return ActionResult.isFailure(
                    error="No documents found for the provided reference"
                )
            
            # Generate HTML report
            html_content = await self._generateHtmlReport(chatDocuments, title, includeMetadata, prompt)
            
            # Create output fileName
            timestamp = int(self.services.utils.getUtcTimestamp())
            output_fileName = f"report_{self._format_timestamp_for_filename()}.html"
            
            result_data = {
                "result": html_content,
                "fileName": output_fileName,
                "processedDocuments": len(chatDocuments),
                "comment": f"HTML report generated with title: {title}"
            }
            
            logger.info(f"Generated HTML report: {output_fileName} with {len(html_content)} characters")
            
            return ActionResult.isSuccess(
                documents=[{
                    "documentName": output_fileName,
                    "documentData": result_data,
                    "mimeType": "text/html"
                }]
            )
        except Exception as e:
            logger.error(f"Error generating report: {str(e)}")
            return ActionResult.isFailure(
                error=str(e)
            )

    async def _generateHtmlReport(self, chatDocuments: List[Any], title: str, includeMetadata: bool, prompt: str) -> str:
        """
        Generate a comprehensive HTML report using AI from all input documents.
        """
        try:
            # Filter out empty documents and collect content
            validDocuments = []
            allContent = []
            
            for doc in chatDocuments:
                content = ""
                logger.info(f"Processing document: type={type(doc)}")
                
                # Use new extraction service for each document
                try:
                    # Build extraction options for report generation from AI planner parameters
                    extraction_options = {
                        "prompt": prompt,
                        "operationType": operationType,
                        "processDocumentsIndividually": processDocumentsIndividually,
                        "chunkAllowed": chunkAllowed,
                        "mergeStrategy": mergeStrategy
                    }
                    
                    # Add optional parameters if provided by AI planner
                    if not includeMetadata:
                        extraction_options["includeMetadata"] = False
                    
                    # Extract content using new service
                    extracted_list = self.services.extraction.extractContent(
                        documents=[doc],
                        options=extraction_options
                    )
                    
                    ec = extracted_list[0] if extracted_list else None
                    if ec and hasattr(ec, 'parts'):
                        for part in ec.parts:
                            try:
                                if part.typeGroup in ("text", "table", "structure") and part.data:
                                    content += part.data + " "
                            except Exception:
                                continue
                        if content.strip():
                            logger.info(f"  Retrieved content from file: {len(content)} characters")
                        else:
                            logger.info(f"  No readable text content found (binary file)")
                    else:
                        logger.info(f"  No content extracted (binary file)")
                except Exception as e:
                    logger.info(f"  Could not extract content (binary file): {str(e)}")
                
                # Skip empty documents
                if content and content.strip():
                    validDocuments.append(doc)
                    allContent.append(f"Document: {doc.fileName}\n{content}\n")
                    logger.info(f"  Added document to valid documents list")
                else:
                    logger.info(f"  Skipping document with no readable text content")
            
            if not validDocuments:
                # No readable content; return a minimal valid HTML document
                timestamp = int(self.services.utils.getUtcTimestamp())
                return f"<!DOCTYPE html><html><head><meta charset=\"UTF-8\"><title>{title}</title></head><body><h1>{title}</h1><p>Keine auswertbaren Inhalte gefunden.</p><p>Generated: {timestamp}</p></body></html>"
            
            # Create AI prompt for comprehensive report generation using user's prompt
            combinedContent = "\n\n".join(allContent)
            aiPrompt = f"""
{prompt}

Report Title: {title}

OUTPUT POLICY:
- Return ONLY a complete, raw HTML document.
- Start with: <!DOCTYPE html>
- Must include: <html>, <head> (with <meta charset="UTF-8"> and <title>), and <body>.
- The response must be valid, self-contained HTML suitable for saving as .html.

Structure:
- Title and short subtitle
- Executive summary
- Sections with clear headings
- Use tables for structured data when helpful
- Key findings and recommendations
- Generation date and number of documents

Quality and design requirements:
- Use clear, professional, and accessible styling in a <style> block
- Apply clean layout, spacing, and visual hierarchy for headings
- Keep HTML and CSS standards-compliant and lightweight

SOURCE DOCUMENT CONTENT:
---START---
{combinedContent}
---END---
            """
            
            # Call AI to generate the report
            logger.info(f"Generating AI report for {len(validDocuments)} documents")
            # Build ChatDocument list from chatDocuments
            documents = []
            try:
                for d in validDocuments:
                    try:
                        data = self.services.workflow.getFileData(d.fileId) if hasattr(d, 'fileId') else None
                        if data:
                            documents.append(ChatDocument(fileData=data, fileName=d.fileName, mimeType=d.mimeType))
                    except Exception:
                        continue
            except Exception:
                documents = None
            aiReport = await self.services.ai.callAi(
                prompt=aiPrompt,
                documents=documents or None,
                options=AiCallOptions(
                    operationType=OperationType.GENERATE_CONTENT,  # Using GENERATE_CONTENT for report generation
                    priority=Priority.QUALITY,
                    compressPrompt=False,
                    compressContext=True,
                    processDocumentsIndividually=True,
                    resultFormat="html",
                    processingMode="detailed",
                    maxCost=0.08,
                    maxProcessingTime=90
                )
            )
            
            # If AI call fails, return error - AI is crucial for report generation
            if not aiReport or aiReport.strip() == "":
                logger.error("AI report generation failed - AI is crucial for this action")
                raise Exception("AI report generation failed - AI is required for report generation")
            
            # Clean up the AI response and ensure it's valid HTML
            aiReport = aiReport.strip()

            # Normalize: strip code fences if present
            if aiReport.startswith("```") and aiReport.endswith("```"):
                lines = aiReport.split('\n')
                if len(lines) >= 2:
                    aiReport = '\n'.join(lines[1:-1]).strip()

            cleaned = aiReport.strip()

            # Return exactly what we have (no wrapping)
            return cleaned
                
        except Exception as e:
            logger.error(f"Error generating AI report: {str(e)}")
            # Re-raise the error - AI is crucial for report generation
            raise