"""
Prompt builder for AI document generation and extraction.
This module builds prompts for AI services to extract and generate documents.
"""

import json
import logging
from typing import Dict, Any, Optional, List, TYPE_CHECKING
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType

# Type hint for renderer parameter
if TYPE_CHECKING:
    from .renderers.rendererBaseTemplate import BaseRenderer
    _RendererLike = BaseRenderer
else:
    _RendererLike = Any

logger = logging.getLogger(__name__)

async def buildAdaptiveExtractionPrompt(
    outputFormat: str,
    userPrompt: str,
    title: str,
    promptAnalysis: Dict[str, Any],
    aiService=None,
    services=None
) -> str:
    """
    Build adaptive extraction prompt based on AI analysis.
    Uses multi-file or single-file approach based on analysis.
    """
    
    # Multi-file example data instead of schema
    multi_file_example = {
        "metadata": {
            "title": "Multi-Document Example",
            "splitStrategy": "by_section",
            "source_documents": ["doc_001"],
            "extraction_method": "ai_extraction"
        },
        "documents": [
            {
                "id": "doc_section_1",
                "title": "Section 1 Title",
                "filename": "section_1.xlsx",
                "sections": [
                    {
                        "id": "section_1",
                        "content_type": "heading",
                        "elements": [
                            {
                                "level": 1,
                                "text": "1. SECTION TITLE"
                            }
                        ],
                        "order": 1
                    },
                    {
                        "id": "section_2",
                        "content_type": "paragraph",
                        "elements": [
                            {
                                "text": "This is the actual content that should be extracted from the document."
                            }
                        ],
                        "order": 2
                    },
                    {
                        "id": "section_3",
                        "content_type": "table",
                        "elements": [
                            {
                                "headers": ["Column 1", "Column 2"],
                                "rows": [["Value 1", "Value 2"]]
                            }
                        ],
                        "order": 3
                    }
                ]
            }
        ]
    }
    
    # UNIFIED APPROACH: Always use multi-document format (single doc = multi with n=1)
    adaptive_prompt = f"""
{services.ai.sanitizePromptContent(userPrompt, 'userinput')}

You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.

TASK: Extract the actual content from the document and organize it into documents. For single documents, create one document entry. For multi-document requests, create multiple document entries.

REQUIREMENTS:
1. Analyze the document content provided in the context below
2. Identify distinct sections in the document (by headings, topics, or logical breaks)
3. Create one or more JSON document entries based on the content structure
4. Extract the real content from each section (headings, paragraphs, lists, etc.)
5. Generate appropriate filenames for each document

CRITICAL: You MUST return a JSON structure with a "documents" array, NOT a "sections" array.

OUTPUT FORMAT: Return only valid JSON in this exact structure:
{json.dumps(multi_file_example, indent=2)}

IMPORTANT: The JSON must have a "documents" key containing an array of document objects. Each document object must have:
- "id": unique identifier
- "title": document title
- "filename": appropriate filename for the document
- "sections": array of content sections

DO NOT return a JSON with "sections" at the root level. Return a JSON with "documents" at the root level.

INSTRUCTIONS:
- For single document requests: Create one document with all content in its sections
- For multi-document requests: Create multiple documents, each with relevant sections
- Use actual section titles, headings, and text from the document
- Create meaningful filenames based on content
- Ensure each section contains the complete content for that part
- Do not use generic placeholder text like "Section 1", "Section 2"
- Extract real headings, paragraphs, lists, and other content elements
- CRITICAL: Return JSON with "documents" array, not "sections" array

CONTEXT (Document Content):

Content Types to Extract:
1. Tables: Extract all rows and columns with proper headers
2. Lists: Extract all items with proper nesting  
3. Headings: Extract with appropriate levels
4. Paragraphs: Extract as structured text
5. Code: Extract code blocks with language identification
6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements

Image Analysis Requirements:
- If you cannot analyze an image for any reason, explain why in the JSON response
- Describe everything you see in the image
- Include all text content, tables, logos, graphics, layout, and visual elements
- If the image is too small, corrupted, or unclear, explain this
- Always provide feedback - never return empty responses

Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.

Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
""".strip()
    
    return adaptive_prompt

async def buildGenerationPrompt(
    outputFormat: str,
    userPrompt: str,
    title: str,
    aiService=None,
    services=None
) -> str:
    """Build generic extraction prompt that works for both single and multi-file."""
    
    # Use AI to determine the best approach
    if aiService:
        try:
            analysis_prompt = f"""
Analyze this user request and determine the best JSON structure for document extraction.

User request: "{userPrompt}"

Respond with JSON only:
{{
    "requires_multi_file": true/false,
    "recommended_schema": "single_document|multi_document",
    "split_approach": "description of how to organize content",
    "file_naming": "suggested naming pattern"
}}

Consider the user's intent and the most logical way to organize the extracted content.
"""
            
            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
            request_options = AiCallOptions()
            request_options.operationType = OperationType.GENERAL
            
            request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
            response = await aiService.aiObjects.call(request)
            
            if response and response.content:
                import re
                
                result = response.content.strip()
                json_match = re.search(r'\{.*\}', result, re.DOTALL)
                if json_match:
                    result = json_match.group(0)
                
                analysis = json.loads(result)
                
                # Use analysis to build appropriate prompt
                return await buildAdaptiveExtractionPrompt(
                    outputFormat, userPrompt, title, analysis, aiService, services
                )
        except Exception as e:
            services.utils.debugLogToFile(f"Generic prompt analysis failed: {str(e)}", "PROMPT_BUILDER")
    
    # Always use the proper generation prompt template with LOOP_INSTRUCTION
    result = f"""Generate structured JSON content for document creation.

USER REQUEST: "{userPrompt}"
DOCUMENT TITLE: "{title}"
TARGET FORMAT: {outputFormat}

Return ONLY this JSON structure:
{{
  "metadata": {{
    "title": "{title}",
    "splitStrategy": "single_document",
    "source_documents": [],
    "extraction_method": "ai_generation"
  }},
  "documents": [
    {{
      "id": "doc_1",
      "title": "{title}",
      "filename": "document.{outputFormat}",
      "sections": [
        {{
          "id": "section_1",
          "content_type": "heading",
          "elements": [
            {{
              "level": 1,
              "text": "1. SECTION TITLE"
            }}
          ],
          "order": 1
        }},
        {{
          "id": "section_2",
          "content_type": "paragraph",
          "elements": [
            {{
              "text": "This is the actual content that should be generated."
            }}
          ],
          "order": 2
        }}
      ]
    }}
  ],
  "continuation": null
}}

RULES:
- Fill sections with content based on the user request
- Use appropriate content_type: "heading", "paragraph", "table", "list"
- If content is too long: deliver partial result and set "continuation": "description of remaining content"
- If content fits: deliver complete result and set "continuation": null
- Split large content into multiple sections if needed

LOOP_INSTRUCTION
"""
    
    # Debug output
    if services:
        services.utils.debugLogToFile(f"GENERATION PROMPT: Generated successfully", "PROMPT_BUILDER")
    
    return result.strip()

async def buildExtractionPrompt(
    outputFormat: str,
    renderer: _RendererLike,
    userPrompt: str,
    title: str,
    aiService=None,
    services=None
) -> str:
    """
    Build the final extraction prompt by combining:
    - Parsed extraction intent from user prompt (using AI)
    - Generic cross-format instructions (filename header + real-data policy)
    - Format-specific guidelines snippet provided by the renderer

    The AI must place a single filename header at the very top:
    FILENAME: <safe-file-name-with-extension>
    followed by a blank line and then ONLY the document content according to the target format.
    """

    # Parse user prompt to separate extraction intent from generation format using AI
    extractionIntent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services)
    
    # Import JSON schema for structured output
    from .subJsonSchema import get_document_subJsonSchema
    jsonSchema = get_document_subJsonSchema()

    # Generic block for JSON extraction - use mixed example data showing different content types
    example_data = {
        "metadata": {
            "title": "Example Document",
            "author": "AI Assistant",
            "source_documents": ["document_001"],
            "extraction_method": "ai_extraction"
        },
        "sections": [
            {
                "id": "section_001",
                "content_type": "heading",
                "elements": [
                    {
                        "level": 1,
                        "text": "1. INTRODUCTION"
                    }
                ],
                "order": 1,
                "metadata": {}
            },
            {
                "id": "section_002",
                "content_type": "paragraph",
                "elements": [
                    {
                        "text": "This is a sample paragraph with actual content that should be extracted from the document."
                    }
                ],
                "order": 2,
                "metadata": {}
            },
            {
                "id": "section_003",
                "content_type": "table",
                "elements": [
                    {
                        "headers": ["Column 1", "Column 2", "Column 3"],
                        "rows": [
                            ["Value 1", "Value 2", "Value 3"],
                            ["Value 4", "Value 5", "Value 6"]
                        ]
                    }
                ],
                "order": 3,
                "metadata": {}
            }
        ],
        "summary": "",
        "tags": []
    }
    
    genericIntro = f"""
{extractionIntent}

You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.

TASK: Extract the actual content from the document and organize it into structured sections.

REQUIREMENTS:
1. Analyze the document content provided in the context below
2. Extract all content and organize it into logical sections
3. Create structured JSON with sections containing the extracted content
4. Preserve the original structure and data

OUTPUT FORMAT: Return only valid JSON in this exact structure:
{json.dumps(example_data, indent=2)}

Requirements:
- Preserve all original data - do not summarize or interpret
- Use the exact JSON format shown above
- Maintain data integrity and structure

Content Types to Extract:
1. Tables: Extract all rows and columns with proper headers
2. Lists: Extract all items with proper nesting  
3. Headings: Extract with appropriate levels
4. Paragraphs: Extract as structured text
5. Code: Extract code blocks with language identification
6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements

Image Analysis Requirements:
- If you cannot analyze an image for any reason, explain why in the JSON response
- Describe everything you see in the image
- Include all text content, tables, logos, graphics, layout, and visual elements
- If the image is too small, corrupted, or unclear, explain this
- Always provide feedback - never return empty responses

Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.

Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.

DO NOT return a schema description - return actual extracted content in the JSON format shown above.
"""

    # Get format-specific guidelines from renderer
    formatGuidelines = ""
    try:
        if hasattr(renderer, 'getExtractionGuidelines'):
            formatGuidelines = renderer.getExtractionGuidelines()
    except Exception:
        pass

    # Combine all parts
    finalPrompt = f"{genericIntro}\n\n{formatGuidelines}".strip()
    
    # Save extraction prompt to debug file
    services.utils.writeDebugFile(finalPrompt, "extraction_prompt")

    return finalPrompt


async def buildGenerationPrompt(
    outputFormat: str,
    userPrompt: str,
    title: str,
    aiService=None,
    services=None
) -> str:
    """
    Use AI to build the generation prompt based on user intent and format requirements.
    Focus on what's important for the user and how to structure the content.
    """
    if not aiService:
        # Fallback if no AI service available
        return f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content."
    
    try:
        # Protect userPrompt from injection
        safeUserPrompt = userPrompt.replace('"', '\\"').replace("'", "\\'").replace('\n', ' ').replace('\r', ' ')
        
        # Debug output
        services.utils.debugLogToFile(f"GENERATION PROMPT REQUEST: buildGenerationPrompt called with outputFormat='{outputFormat}', title='{title}'", "PROMPT_BUILDER")
        
        # Return static generation prompt template instead of calling AI
        services.utils.debugLogToFile("GENERATION PROMPT REQUEST: Using static template instead of AI call", "PROMPT_BUILDER")
        
        # Return static generation prompt template
        result = f"""Generate structured JSON content for document creation.

USER REQUEST: "{safeUserPrompt}"
DOCUMENT TITLE: "{title}"
TARGET FORMAT: {outputFormat}

Return ONLY this JSON structure:
{{
  "metadata": {{
    "title": "{title}",
    "splitStrategy": "single_document",
    "source_documents": [],
    "extraction_method": "ai_generation"
  }},
  "documents": [
    {{
      "id": "doc_1",
      "title": "{title}",
      "filename": "document.{outputFormat}",
      "sections": [
        {{
          "id": "section_1",
          "content_type": "heading",
          "elements": [
            {{
              "level": 1,
              "text": "1. SECTION TITLE"
            }}
          ],
          "order": 1
        }},
        {{
          "id": "section_2",
          "content_type": "paragraph",
          "elements": [
            {{
              "text": "This is the actual content that should be generated."
            }}
          ],
          "order": 2
        }}
      ]
    }}
  ],
  "continuation": null
}}

RULES:
- Fill sections with content based on the user request
- Use appropriate content_type: "heading", "paragraph", "table", "list"
- Split large content into multiple sections if needed

LOOP_INSTRUCTION
"""
        
        # Debug output
        services.utils.debugLogToFile(f"GENERATION PROMPT: Generated successfully", "PROMPT_BUILDER")
        
        return result.strip()
        
    except Exception as e:
        # Fallback on any error - preserve user prompt for language instructions
        services.utils.debugLogToFile(f"DEBUG: AI generation prompt failed: {str(e)}", "PROMPT_BUILDER")
        return f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content. User requirements: {userPrompt}"


async def _parseExtractionIntent(userPrompt: str, outputFormat: str, aiService=None, services=None) -> str:
    """
    Parse user prompt to extract the core extraction intent.
    """
    if not aiService:
        return f"Extract content from the provided documents and create a {outputFormat} report."
    
    try:
        analysis_prompt = f"""
Analyze this user request and extract the core extraction intent:

User request: "{userPrompt}"
Target format: {outputFormat}

Extract the main intent and requirements for document processing. Focus on:
1. What content needs to be extracted
2. How it should be organized
3. Any specific requirements or preferences

Respond with a clear, concise statement of the extraction intent.
"""
        
        from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
        request_options = AiCallOptions()
        request_options.operationType = OperationType.GENERAL
        
        request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
        response = await aiService.aiObjects.call(request)
        
        if response and response.content:
            return response.content.strip()
        else:
            return f"Extract content from the provided documents and create a {outputFormat} report."
            
    except Exception as e:
        services.utils.debugLogToFile(f"Extraction intent analysis failed: {str(e)}", "PROMPT_BUILDER")
        return f"Extract content from the provided documents and create a {outputFormat} report."