gateway/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py
2025-12-23 00:34:15 +01:00

316 lines
13 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Document Purpose Analyzer for hierarchical document generation.
Uses AI to analyze user prompt and determine purpose for each document.
"""
import logging
import json
from typing import Dict, Any, List, Optional
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
logger = logging.getLogger(__name__)
class DocumentPurposeAnalyzer:
"""Analyzes user prompt and documents to determine document purposes"""
def __init__(self, services: Any):
self.services = services
async def analyzeDocumentPurposes(
self,
userPrompt: str,
chatDocuments: List[ChatDocument],
actionContext: str = "generateDocument"
) -> Dict[str, Any]:
"""
Use AI to analyze user prompt and determine purpose for each document.
Args:
userPrompt: User's original prompt
chatDocuments: List of ChatDocument objects to analyze
actionContext: Action name (e.g., "generateDocument", "extractData")
Returns:
{
"document_purposes": [
{
"document_id": "...",
"purpose": "extract_text_content" | "include_image" | ...,
"reasoning": "...",
"extractionPrompt": "..." (if purpose requires extraction),
"processingNotes": "..."
}
],
"overall_intent": "..."
}
"""
try:
if not chatDocuments:
return {
"document_purposes": [],
"overall_intent": "No documents provided"
}
# Create document metadata list for AI analysis
documentMetadata = []
for doc in chatDocuments:
docInfo = {
"document_id": doc.id,
"fileName": doc.fileName,
"mimeType": doc.mimeType,
"fileSize": doc.fileSize
}
documentMetadata.append(docInfo)
# Create analysis prompt
analysisPrompt = self._createAnalysisPrompt(
userPrompt=userPrompt,
actionContext=actionContext,
documentMetadata=documentMetadata
)
# Debug: Log purpose analysis prompt
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
self.services.utils.writeDebugFile(
analysisPrompt,
"document_purpose_analysis_prompt"
)
except Exception as e:
logger.debug(f"Could not write debug file for purpose analysis prompt: {e}")
# Call AI for analysis
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_GENERATE,
resultFormat="json"
)
aiResponse = await self.services.ai.callAiContent(
prompt=analysisPrompt,
options=options,
outputFormat="json"
)
# Debug: Log purpose analysis response
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
try:
responseContent = aiResponse.content if aiResponse and aiResponse.content else ''
responseMetadata = {
"status": aiResponse.status if aiResponse else "N/A",
"error": aiResponse.error if aiResponse else "N/A",
"documents_count": len(aiResponse.documents) if aiResponse and aiResponse.documents else 0
}
self.services.utils.writeDebugFile(
f"Response Content:\n{responseContent}\n\nResponse Metadata:\n{json.dumps(responseMetadata, indent=2)}",
"document_purpose_analysis_response"
)
except Exception as e:
logger.debug(f"Could not write debug file for purpose analysis response: {e}")
if not aiResponse or not aiResponse.content:
logger.warning("AI purpose analysis returned empty response, using defaults")
return self._createDefaultPurposes(chatDocuments, actionContext)
# Extract and parse JSON
extractedJson = self.services.utils.jsonExtractString(aiResponse.content)
if not extractedJson:
logger.warning("No JSON found in purpose analysis response, using defaults")
return self._createDefaultPurposes(chatDocuments, actionContext)
try:
analysisResult = json.loads(extractedJson)
# Validate structure
if "document_purposes" not in analysisResult:
logger.warning("Invalid analysis result structure, using defaults")
return self._createDefaultPurposes(chatDocuments, actionContext)
# Ensure all documents have purposes
analyzedIds = {dp.get("document_id") for dp in analysisResult.get("document_purposes", [])}
for doc in chatDocuments:
if doc.id not in analyzedIds:
logger.warning(f"Document {doc.id} not in analysis result, adding default purpose")
defaultPurpose = self._determineDefaultPurpose(doc, actionContext)
analysisResult["document_purposes"].append({
"document_id": doc.id,
"purpose": defaultPurpose,
"reasoning": f"Default purpose based on document type and action context",
"extractionPrompt": None,
"processingNotes": None
})
return analysisResult
except json.JSONDecodeError as e:
logger.error(f"Failed to parse purpose analysis JSON: {str(e)}")
logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500]}")
return self._createDefaultPurposes(chatDocuments, actionContext)
except Exception as e:
logger.error(f"Error analyzing document purposes: {str(e)}")
return self._createDefaultPurposes(chatDocuments, actionContext)
def _createAnalysisPrompt(
self,
userPrompt: str,
actionContext: str,
documentMetadata: List[Dict[str, Any]]
) -> str:
"""Create AI prompt for document purpose analysis"""
# Format document list
docListText = ""
for i, docInfo in enumerate(documentMetadata, 1):
docListText += f"\n{i}. Document ID: {docInfo['document_id']}\n"
docListText += f" File Name: {docInfo['fileName']}\n"
docListText += f" MIME Type: {docInfo['mimeType']}\n"
docListText += f" File Size: {docInfo['fileSize']} bytes\n"
# Get user language
userLanguage = self._getUserLanguage()
prompt = f"""{'='*80}
DOCUMENT PURPOSE ANALYSIS
{'='*80}
USER PROMPT:
{userPrompt}
ACTION CONTEXT: {actionContext}
DOCUMENTS PROVIDED:
{docListText}
{'='*80}
TASK: For each document, determine its purpose based on:
1. User prompt intent (what the user wants to do)
2. Action context (what action is being performed)
3. Document type (mimeType - is it text, image, etc.)
4. Document metadata (fileName, size)
AVAILABLE PURPOSES:
- "extract_text_content": Extract text content for use in document generation
- "include_image": Include the image directly in the generated document (for images)
- "analyze_image_vision": Analyze image with vision AI to extract text/information (for images with text/charts)
- "use_as_template": Use document structure/layout as template for generation
- "use_as_reference": Use as background context/reference without detailed extraction
- "extract_data": Extract structured data (key-value pairs, entities, fields)
- "attach": Document is an attachment - don't process, just attach to output
- "convert_format": Convert document format (for convert actions)
- "translate": Translate document content (for translate actions)
- "summarize": Create summary of document (for summarize actions)
- "compare": Compare documents (for comparison actions)
- "merge": Merge documents (for merge actions)
- "extract_tables_charts": Extract tables and charts specifically
- "use_for_styling": Use document for styling/formatting reference only
- "extract_metadata": Extract only document metadata
CRITICAL RULES:
1. For images (mimeType starts with "image/"):
- If user wants to "include" or "show" images → "include_image"
- If user wants to "analyze", "read text", or "extract text" from images → "analyze_image_vision"
- Default for images in generateDocument → "include_image"
2. For text documents in generateDocument:
- If user mentions "template" or "structure""use_as_template"
- If user mentions "reference" or "context""use_as_reference"
- Default → "extract_text_content"
3. Consider action context:
- generateDocument: Usually "extract_text_content" or "include_image"
- extractData: Usually "extract_data"
- translateDocument: Usually "translate"
- summarizeDocument: Usually "summarize"
4. Return ONLY valid JSON following this structure:
{{
"document_purposes": [
{{
"document_id": "document_id_here",
"purpose": "extract_text_content",
"reasoning": "Brief explanation in language '{userLanguage}'",
"extractionPrompt": "Specific extraction prompt if purpose requires extraction, otherwise null",
"processingNotes": "Any special processing requirements or null"
}}
],
"overall_intent": "Summary of how documents should be used together in language '{userLanguage}'"
}}
5. All content must be in the language '{userLanguage}'
6. Return ONLY the JSON structure. No explanations before or after.
Return ONLY the JSON structure.
"""
return prompt
def _createDefaultPurposes(
self,
chatDocuments: List[ChatDocument],
actionContext: str
) -> Dict[str, Any]:
"""Create default purposes when AI analysis fails"""
purposes = []
for doc in chatDocuments:
purpose = self._determineDefaultPurpose(doc, actionContext)
purposes.append({
"document_id": doc.id,
"purpose": purpose,
"reasoning": f"Default purpose based on document type ({doc.mimeType}) and action context ({actionContext})",
"extractionPrompt": None,
"processingNotes": None
})
return {
"document_purposes": purposes,
"overall_intent": f"Default processing for {len(chatDocuments)} document(s) in {actionContext} action"
}
def _determineDefaultPurpose(
self,
doc: ChatDocument,
actionContext: str
) -> str:
"""Determine default purpose based on document type and action context"""
mimeType = doc.mimeType or ""
# Image documents
if mimeType.startswith("image/"):
if actionContext == "generateDocument":
return "include_image"
elif actionContext in ["extractData", "process"]:
return "analyze_image_vision"
else:
return "include_image" # Default for images
# Action-specific defaults
if actionContext == "extractData":
return "extract_data"
elif actionContext == "translateDocument":
return "translate"
elif actionContext == "summarizeDocument":
return "summarize"
elif actionContext == "convertDocument" or actionContext == "convert":
return "convert_format"
elif actionContext == "generateDocument":
return "extract_text_content"
else:
# Default for other actions
return "extract_text_content"
def _getUserLanguage(self) -> str:
"""Get user language for document generation"""
try:
if self.services:
if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage:
return self.services.currentUserLanguage
elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'):
return self.services.user.language
except Exception:
pass
return 'en' # Default fallback