316 lines
13 KiB
Python
316 lines
13 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Document Purpose Analyzer for hierarchical document generation.
|
|
Uses AI to analyze user prompt and determine purpose for each document.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
from typing import Dict, Any, List, Optional
|
|
from modules.datamodels.datamodelChat import ChatDocument
|
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class DocumentPurposeAnalyzer:
|
|
"""Analyzes user prompt and documents to determine document purposes"""
|
|
|
|
def __init__(self, services: Any):
|
|
self.services = services
|
|
|
|
async def analyzeDocumentPurposes(
|
|
self,
|
|
userPrompt: str,
|
|
chatDocuments: List[ChatDocument],
|
|
actionContext: str = "generateDocument"
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Use AI to analyze user prompt and determine purpose for each document.
|
|
|
|
Args:
|
|
userPrompt: User's original prompt
|
|
chatDocuments: List of ChatDocument objects to analyze
|
|
actionContext: Action name (e.g., "generateDocument", "extractData")
|
|
|
|
Returns:
|
|
{
|
|
"document_purposes": [
|
|
{
|
|
"document_id": "...",
|
|
"purpose": "extract_text_content" | "include_image" | ...,
|
|
"reasoning": "...",
|
|
"extractionPrompt": "..." (if purpose requires extraction),
|
|
"processingNotes": "..."
|
|
}
|
|
],
|
|
"overall_intent": "..."
|
|
}
|
|
"""
|
|
try:
|
|
if not chatDocuments:
|
|
return {
|
|
"document_purposes": [],
|
|
"overall_intent": "No documents provided"
|
|
}
|
|
|
|
# Create document metadata list for AI analysis
|
|
documentMetadata = []
|
|
for doc in chatDocuments:
|
|
docInfo = {
|
|
"document_id": doc.id,
|
|
"fileName": doc.fileName,
|
|
"mimeType": doc.mimeType,
|
|
"fileSize": doc.fileSize
|
|
}
|
|
documentMetadata.append(docInfo)
|
|
|
|
# Create analysis prompt
|
|
analysisPrompt = self._createAnalysisPrompt(
|
|
userPrompt=userPrompt,
|
|
actionContext=actionContext,
|
|
documentMetadata=documentMetadata
|
|
)
|
|
|
|
# Debug: Log purpose analysis prompt
|
|
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
|
|
try:
|
|
self.services.utils.writeDebugFile(
|
|
analysisPrompt,
|
|
"document_purpose_analysis_prompt"
|
|
)
|
|
except Exception as e:
|
|
logger.debug(f"Could not write debug file for purpose analysis prompt: {e}")
|
|
|
|
# Call AI for analysis
|
|
options = AiCallOptions(
|
|
operationType=OperationTypeEnum.DATA_GENERATE,
|
|
resultFormat="json"
|
|
)
|
|
|
|
aiResponse = await self.services.ai.callAiContent(
|
|
prompt=analysisPrompt,
|
|
options=options,
|
|
outputFormat="json"
|
|
)
|
|
|
|
# Debug: Log purpose analysis response
|
|
if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'):
|
|
try:
|
|
responseContent = aiResponse.content if aiResponse and aiResponse.content else ''
|
|
responseMetadata = {
|
|
"status": aiResponse.status if aiResponse else "N/A",
|
|
"error": aiResponse.error if aiResponse else "N/A",
|
|
"documents_count": len(aiResponse.documents) if aiResponse and aiResponse.documents else 0
|
|
}
|
|
self.services.utils.writeDebugFile(
|
|
f"Response Content:\n{responseContent}\n\nResponse Metadata:\n{json.dumps(responseMetadata, indent=2)}",
|
|
"document_purpose_analysis_response"
|
|
)
|
|
except Exception as e:
|
|
logger.debug(f"Could not write debug file for purpose analysis response: {e}")
|
|
|
|
if not aiResponse or not aiResponse.content:
|
|
logger.warning("AI purpose analysis returned empty response, using defaults")
|
|
return self._createDefaultPurposes(chatDocuments, actionContext)
|
|
|
|
# Extract and parse JSON
|
|
extractedJson = self.services.utils.jsonExtractString(aiResponse.content)
|
|
if not extractedJson:
|
|
logger.warning("No JSON found in purpose analysis response, using defaults")
|
|
return self._createDefaultPurposes(chatDocuments, actionContext)
|
|
|
|
try:
|
|
analysisResult = json.loads(extractedJson)
|
|
|
|
# Validate structure
|
|
if "document_purposes" not in analysisResult:
|
|
logger.warning("Invalid analysis result structure, using defaults")
|
|
return self._createDefaultPurposes(chatDocuments, actionContext)
|
|
|
|
# Ensure all documents have purposes
|
|
analyzedIds = {dp.get("document_id") for dp in analysisResult.get("document_purposes", [])}
|
|
for doc in chatDocuments:
|
|
if doc.id not in analyzedIds:
|
|
logger.warning(f"Document {doc.id} not in analysis result, adding default purpose")
|
|
defaultPurpose = self._determineDefaultPurpose(doc, actionContext)
|
|
analysisResult["document_purposes"].append({
|
|
"document_id": doc.id,
|
|
"purpose": defaultPurpose,
|
|
"reasoning": f"Default purpose based on document type and action context",
|
|
"extractionPrompt": None,
|
|
"processingNotes": None
|
|
})
|
|
|
|
return analysisResult
|
|
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"Failed to parse purpose analysis JSON: {str(e)}")
|
|
logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500]}")
|
|
return self._createDefaultPurposes(chatDocuments, actionContext)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error analyzing document purposes: {str(e)}")
|
|
return self._createDefaultPurposes(chatDocuments, actionContext)
|
|
|
|
def _createAnalysisPrompt(
|
|
self,
|
|
userPrompt: str,
|
|
actionContext: str,
|
|
documentMetadata: List[Dict[str, Any]]
|
|
) -> str:
|
|
"""Create AI prompt for document purpose analysis"""
|
|
|
|
# Format document list
|
|
docListText = ""
|
|
for i, docInfo in enumerate(documentMetadata, 1):
|
|
docListText += f"\n{i}. Document ID: {docInfo['document_id']}\n"
|
|
docListText += f" File Name: {docInfo['fileName']}\n"
|
|
docListText += f" MIME Type: {docInfo['mimeType']}\n"
|
|
docListText += f" File Size: {docInfo['fileSize']} bytes\n"
|
|
|
|
# Get user language
|
|
userLanguage = self._getUserLanguage()
|
|
|
|
prompt = f"""{'='*80}
|
|
DOCUMENT PURPOSE ANALYSIS
|
|
{'='*80}
|
|
|
|
USER PROMPT:
|
|
{userPrompt}
|
|
|
|
ACTION CONTEXT: {actionContext}
|
|
|
|
DOCUMENTS PROVIDED:
|
|
{docListText}
|
|
{'='*80}
|
|
|
|
TASK: For each document, determine its purpose based on:
|
|
1. User prompt intent (what the user wants to do)
|
|
2. Action context (what action is being performed)
|
|
3. Document type (mimeType - is it text, image, etc.)
|
|
4. Document metadata (fileName, size)
|
|
|
|
AVAILABLE PURPOSES:
|
|
- "extract_text_content": Extract text content for use in document generation
|
|
- "include_image": Include the image directly in the generated document (for images)
|
|
- "analyze_image_vision": Analyze image with vision AI to extract text/information (for images with text/charts)
|
|
- "use_as_template": Use document structure/layout as template for generation
|
|
- "use_as_reference": Use as background context/reference without detailed extraction
|
|
- "extract_data": Extract structured data (key-value pairs, entities, fields)
|
|
- "attach": Document is an attachment - don't process, just attach to output
|
|
- "convert_format": Convert document format (for convert actions)
|
|
- "translate": Translate document content (for translate actions)
|
|
- "summarize": Create summary of document (for summarize actions)
|
|
- "compare": Compare documents (for comparison actions)
|
|
- "merge": Merge documents (for merge actions)
|
|
- "extract_tables_charts": Extract tables and charts specifically
|
|
- "use_for_styling": Use document for styling/formatting reference only
|
|
- "extract_metadata": Extract only document metadata
|
|
|
|
CRITICAL RULES:
|
|
1. For images (mimeType starts with "image/"):
|
|
- If user wants to "include" or "show" images → "include_image"
|
|
- If user wants to "analyze", "read text", or "extract text" from images → "analyze_image_vision"
|
|
- Default for images in generateDocument → "include_image"
|
|
|
|
2. For text documents in generateDocument:
|
|
- If user mentions "template" or "structure" → "use_as_template"
|
|
- If user mentions "reference" or "context" → "use_as_reference"
|
|
- Default → "extract_text_content"
|
|
|
|
3. Consider action context:
|
|
- generateDocument: Usually "extract_text_content" or "include_image"
|
|
- extractData: Usually "extract_data"
|
|
- translateDocument: Usually "translate"
|
|
- summarizeDocument: Usually "summarize"
|
|
|
|
4. Return ONLY valid JSON following this structure:
|
|
{{
|
|
"document_purposes": [
|
|
{{
|
|
"document_id": "document_id_here",
|
|
"purpose": "extract_text_content",
|
|
"reasoning": "Brief explanation in language '{userLanguage}'",
|
|
"extractionPrompt": "Specific extraction prompt if purpose requires extraction, otherwise null",
|
|
"processingNotes": "Any special processing requirements or null"
|
|
}}
|
|
],
|
|
"overall_intent": "Summary of how documents should be used together in language '{userLanguage}'"
|
|
}}
|
|
|
|
5. All content must be in the language '{userLanguage}'
|
|
6. Return ONLY the JSON structure. No explanations before or after.
|
|
|
|
Return ONLY the JSON structure.
|
|
"""
|
|
return prompt
|
|
|
|
def _createDefaultPurposes(
|
|
self,
|
|
chatDocuments: List[ChatDocument],
|
|
actionContext: str
|
|
) -> Dict[str, Any]:
|
|
"""Create default purposes when AI analysis fails"""
|
|
purposes = []
|
|
|
|
for doc in chatDocuments:
|
|
purpose = self._determineDefaultPurpose(doc, actionContext)
|
|
purposes.append({
|
|
"document_id": doc.id,
|
|
"purpose": purpose,
|
|
"reasoning": f"Default purpose based on document type ({doc.mimeType}) and action context ({actionContext})",
|
|
"extractionPrompt": None,
|
|
"processingNotes": None
|
|
})
|
|
|
|
return {
|
|
"document_purposes": purposes,
|
|
"overall_intent": f"Default processing for {len(chatDocuments)} document(s) in {actionContext} action"
|
|
}
|
|
|
|
def _determineDefaultPurpose(
|
|
self,
|
|
doc: ChatDocument,
|
|
actionContext: str
|
|
) -> str:
|
|
"""Determine default purpose based on document type and action context"""
|
|
mimeType = doc.mimeType or ""
|
|
|
|
# Image documents
|
|
if mimeType.startswith("image/"):
|
|
if actionContext == "generateDocument":
|
|
return "include_image"
|
|
elif actionContext in ["extractData", "process"]:
|
|
return "analyze_image_vision"
|
|
else:
|
|
return "include_image" # Default for images
|
|
|
|
# Action-specific defaults
|
|
if actionContext == "extractData":
|
|
return "extract_data"
|
|
elif actionContext == "translateDocument":
|
|
return "translate"
|
|
elif actionContext == "summarizeDocument":
|
|
return "summarize"
|
|
elif actionContext == "convertDocument" or actionContext == "convert":
|
|
return "convert_format"
|
|
elif actionContext == "generateDocument":
|
|
return "extract_text_content"
|
|
else:
|
|
# Default for other actions
|
|
return "extract_text_content"
|
|
|
|
def _getUserLanguage(self) -> str:
|
|
"""Get user language for document generation"""
|
|
try:
|
|
if self.services:
|
|
if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage:
|
|
return self.services.currentUserLanguage
|
|
elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'):
|
|
return self.services.user.language
|
|
except Exception:
|
|
pass
|
|
return 'en' # Default fallback
|
|
|