gateway/modules/services/serviceGeneration/mainServiceGeneration.py

584 lines
No EOL
26 KiB
Python

import logging
import uuid
import time
from typing import Any, Dict, List, Optional, Union, Tuple
from modules.datamodels.datamodelChat import ChatDocument
from modules.datamodels.datamodelAi import AiCallResponse
from modules.interfaces.interfaceAiObjects import aiModels
from modules.services.serviceGeneration.subDocumentUtility import (
getFileExtension,
getMimeTypeFromExtension,
detectMimeTypeFromContent,
detectMimeTypeFromData,
convertDocumentDataToString
)
logger = logging.getLogger(__name__)
class GenerationService:
def __init__(self, serviceCenter=None):
# Directly use interfaces from the provided service center (no self.service calls)
self.services = serviceCenter
self.interfaceDbComponent = getattr(serviceCenter, 'interfaceDbComponent', None) if serviceCenter else None
self.interfaceDbChat = getattr(serviceCenter, 'interfaceDbChat', None) if serviceCenter else None
self.workflow = getattr(serviceCenter, 'workflow', None) if serviceCenter else None
def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]:
"""
Process documents produced by AI actions and convert them to ChatDocument format.
This function handles AI-generated document data, not document references.
Returns a list of processed document dictionaries.
"""
try:
# Read documents from the standard documents field (not data.documents)
documents = action_result.documents if action_result and hasattr(action_result, 'documents') else []
if not documents:
logger.info(f"No documents found in action_result.documents for {action.execMethod}.{action.execAction}")
return []
logger.info(f"Processing {len(documents)} documents from action_result.documents")
# Process each document from the AI action result
processed_documents = []
for doc in documents:
processed_doc = self.processSingleDocument(doc, action)
if processed_doc:
processed_documents.append(processed_doc)
logger.info(f"Successfully processed {len(processed_documents)} documents")
return processed_documents
except Exception as e:
logger.error(f"Error processing action result documents: {str(e)}")
return []
def processSingleDocument(self, doc: Any, action) -> Optional[Dict[str, Any]]:
"""Process a single document from action result with simplified logic"""
try:
# ActionDocument objects have documentName, documentData, and mimeType
mime_type = doc.mimeType
if mime_type == "application/octet-stream":
content = doc.documentData
# Detect MIME without relying on a service center
mime_type = detectMimeTypeFromContent(content, doc.documentName)
return {
'fileName': doc.documentName,
'fileSize': len(str(doc.documentData)),
'mimeType': mime_type,
'content': doc.documentData,
'document': doc
}
except Exception as e:
logger.error(f"Error processing single document: {str(e)}")
return None
def createDocumentsFromActionResult(self, action_result, action, workflow, message_id=None) -> List[Any]:
"""
Create actual document objects from action result and store them in the system.
Returns a list of created document objects with proper workflow context.
"""
try:
logger.info(f"Creating documents from action result for {action.execMethod}.{action.execAction}")
logger.info(f"Action result documents count: {len(action_result.documents) if action_result.documents else 0}")
processed_docs = self.processActionResultDocuments(action_result, action, workflow)
logger.info(f"Processed {len(processed_docs)} documents")
created_documents = []
for i, doc_data in enumerate(processed_docs):
try:
document_name = doc_data['fileName']
document_data = doc_data['content']
mime_type = doc_data['mimeType']
logger.info(f"Creating document {i+1}: {document_name} (mime: {mime_type}, content length: {len(str(document_data))})")
# Convert document data to string content
content = convertDocumentDataToString(document_data, getFileExtension(document_name))
# Skip empty or minimal content
minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
logger.warning(f"Empty or minimal content for document {document_name}, skipping")
continue
logger.info(f"Document {document_name} has content: {len(content)} characters")
# Normalize file extension based on mime type if missing or incorrect
try:
mime_to_ext = {
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
"application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
"application/pdf": ".pdf",
"text/html": ".html",
"text/markdown": ".md",
"text/plain": ".txt",
"application/json": ".json",
}
expected_ext = mime_to_ext.get(mime_type)
if expected_ext:
if not document_name.lower().endswith(expected_ext):
# Append/replace extension to match mime type
if "." in document_name:
document_name = document_name.rsplit(".", 1)[0] + expected_ext
else:
document_name = document_name + expected_ext
except Exception:
pass
# Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text
base64encoded = False
try:
binary_mime_types = {
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/pdf",
}
if isinstance(document_data, str) and mime_type in binary_mime_types:
base64encoded = True
except Exception:
base64encoded = False
# Create document with file in one step using interfaces directly
document = self._createDocument(
fileName=document_name,
mimeType=mime_type,
content=content,
base64encoded=base64encoded,
messageId=message_id
)
if document:
# Set workflow context on the document if possible
self._setDocumentWorkflowContext(document, action, workflow)
created_documents.append(document)
logger.info(f"Successfully created ChatDocument: {document_name} (ID: {document.id if hasattr(document, 'id') else 'N/A'}, fileId: {document.fileId if hasattr(document, 'fileId') else 'N/A'})")
else:
logger.error(f"Failed to create ChatDocument object for {document_name}")
except Exception as e:
logger.error(f"Error creating document {doc_data.get('fileName', 'unknown')}: {str(e)}")
continue
logger.info(f"Successfully created {len(created_documents)} documents")
return created_documents
except Exception as e:
logger.error(f"Error creating documents from action result: {str(e)}")
return []
def _setDocumentWorkflowContext(self, document, action, workflow):
"""Set workflow context on a document for proper routing and labeling"""
try:
# Get current workflow context directly from workflow object
workflow_context = self._getWorkflowContext(workflow)
workflow_stats = self._getWorkflowStats(workflow)
current_round = workflow_context.get('currentRound', 0)
current_task = workflow_context.get('currentTask', 0)
current_action = workflow_context.get('currentAction', 0)
# Try to set workflow context attributes if they exist
if hasattr(document, 'roundNumber'):
document.roundNumber = current_round
if hasattr(document, 'taskNumber'):
document.taskNumber = current_task
if hasattr(document, 'actionNumber'):
document.actionNumber = current_action
if hasattr(document, 'actionId'):
document.actionId = action.id if hasattr(action, 'id') else None
# Set additional workflow metadata if available
if hasattr(document, 'workflowId'):
document.workflowId = workflow_stats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None)
if hasattr(document, 'workflowStatus'):
document.workflowStatus = workflow_stats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown')
logger.debug(f"Set workflow context on document: Round {current_round}, Task {current_task}, Action {current_action}")
logger.debug(f"Document workflow metadata: ID={document.workflowId if hasattr(document, 'workflowId') else 'N/A'}, Status={document.workflowStatus if hasattr(document, 'workflowStatus') else 'N/A'}")
except Exception as e:
logger.warning(f"Could not set workflow context on document: {str(e)}")
def _createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> Optional[ChatDocument]:
"""Create file and ChatDocument using interfaces without service indirection."""
try:
if not self.interfaceDbComponent:
logger.error("Component interface not available for document creation")
return None
# Convert content to bytes
if base64encoded:
import base64
content_bytes = base64.b64decode(content)
else:
content_bytes = content.encode('utf-8')
# Create file and store data
file_item = self.interfaceDbComponent.createFile(
name=fileName,
mimeType=mimeType,
content=content_bytes
)
self.interfaceDbComponent.createFileData(file_item.id, content_bytes)
# Collect file info
file_info = self._getFileInfo(file_item.id)
if not file_info:
logger.error(f"Could not get file info for fileId: {file_item.id}")
return None
# Build ChatDocument
document = ChatDocument(
id=str(uuid.uuid4()),
messageId=messageId or "",
fileId=file_item.id,
fileName=file_info.get("fileName", fileName),
fileSize=file_info.get("size", 0),
mimeType=file_info.get("mimeType", mimeType)
)
# Ensure document can access component interface later
if hasattr(document, 'setComponentInterface') and self.interfaceDbComponent:
try:
document.setComponentInterface(self.interfaceDbComponent)
except Exception:
pass
return document
except Exception as e:
logger.error(f"Error creating document: {str(e)}")
return None
def _getFileInfo(self, fileId: str) -> Optional[Dict[str, Any]]:
try:
if not self.interfaceDbComponent:
return None
file_item = self.interfaceDbComponent.getFile(fileId)
if file_item:
return {
"id": file_item.id,
"fileName": file_item.fileName,
"size": file_item.fileSize,
"mimeType": file_item.mimeType,
"fileHash": getattr(file_item, 'fileHash', None),
"creationDate": getattr(file_item, 'creationDate', None)
}
return None
except Exception as e:
logger.error(f"Error getting file info for {fileId}: {str(e)}")
return None
def _getWorkflowContext(self, workflow) -> Dict[str, int]:
try:
return {
'currentRound': getattr(workflow, 'currentRound', 0),
'currentTask': getattr(workflow, 'currentTask', 0),
'currentAction': getattr(workflow, 'currentAction', 0)
}
except Exception:
return {'currentRound': 0, 'currentTask': 0, 'currentAction': 0}
def _getWorkflowStats(self, workflow) -> Dict[str, Any]:
try:
context = self._getWorkflowContext(workflow)
return {
'currentRound': context['currentRound'],
'currentTask': context['currentTask'],
'currentAction': context['currentAction'],
'totalTasks': getattr(workflow, 'totalTasks', 0),
'totalActions': getattr(workflow, 'totalActions', 0),
'workflowStatus': getattr(workflow, 'status', 'unknown'),
'workflowId': getattr(workflow, 'id', 'unknown')
}
except Exception:
return {
'currentRound': 0,
'currentTask': 0,
'currentAction': 0,
'totalTasks': 0,
'totalActions': 0,
'workflowStatus': 'unknown',
'workflowId': 'unknown'
}
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str]:
"""
Render extracted JSON content to the specified output format.
Args:
extractedContent: Structured JSON document from AI extraction
outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
title: Report title
userPrompt: User's original prompt for report generation
aiService: AI service instance for generation prompt creation
Returns:
tuple: (rendered_content, mime_type)
"""
try:
# Validate JSON input
if not isinstance(extractedContent, dict):
raise ValueError("extractedContent must be a JSON dictionary")
if "sections" not in extractedContent:
raise ValueError("extractedContent must contain 'sections' field")
# Remove extra debug file writes for render inputs per simplification
# Get the appropriate renderer for the format
renderer = self._getFormatRenderer(outputFormat)
if not renderer:
raise ValueError(f"Unsupported output format: {outputFormat}")
# Render the JSON content directly (AI generation handled by main service)
renderedContent, mimeType = await renderer.render(extractedContent, title, userPrompt, aiService)
# Remove extra debug output file writes
logger.info(f"Successfully rendered JSON report to {outputFormat} format: {len(renderedContent)} characters")
return renderedContent, mimeType
except Exception as e:
logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}")
raise
async def getAdaptiveExtractionPrompt(
self,
outputFormat: str,
userPrompt: str,
title: str,
promptAnalysis: Dict[str, Any],
aiService=None
) -> str:
"""Get adaptive extraction prompt based on AI analysis."""
from .subPromptBuilder import buildAdaptiveExtractionPrompt
return await buildAdaptiveExtractionPrompt(
outputFormat=outputFormat,
userPrompt=userPrompt,
title=title,
promptAnalysis=promptAnalysis,
aiService=aiService,
services=self.services
)
async def getGenerationPrompt(
self,
outputFormat: str,
userPrompt: str,
title: str,
aiService=None
) -> str:
"""Get generation prompt for enhancing extracted JSON content."""
from .subPromptBuilder import buildGenerationPrompt
return await buildGenerationPrompt(
outputFormat=outputFormat,
userPrompt=userPrompt,
title=title,
aiService=aiService,
services=self.services
)
async def getGenericExtractionPrompt(
self,
outputFormat: str,
userPrompt: str,
title: str,
aiService=None
) -> str:
"""Get generic extraction prompt that works for both single and multi-file."""
from .subPromptBuilder import buildGenericExtractionPrompt
return await buildGenericExtractionPrompt(
outputFormat=outputFormat,
userPrompt=userPrompt,
title=title,
aiService=aiService,
services=self.services
)
async def getExtractionPrompt(self, outputFormat: str, userPrompt: str, title: str, aiService=None) -> str:
"""
Get the format-specific extraction prompt for AI content extraction.
Args:
outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
userPrompt: User's original prompt for report generation
title: Report title
aiService: AI service instance for intent extraction
Returns:
str: Format-specific prompt for AI extraction
"""
try:
# Get the appropriate renderer for the format
renderer = self._getFormatRenderer(outputFormat)
if not renderer:
raise ValueError(f"Unsupported output format: {outputFormat}")
# Build centralized prompt with generic rules + format-specific guidelines
from .subPromptBuilder import buildExtractionPrompt
extractionPrompt = await buildExtractionPrompt(
outputFormat=outputFormat,
renderer=renderer,
userPrompt=userPrompt,
title=title,
aiService=aiService,
services=self.services
)
logger.info(f"Generated {outputFormat}-specific extraction prompt: {len(extractionPrompt)} characters")
return extractionPrompt
except Exception as e:
logger.error(f"Error getting extraction prompt for {outputFormat}: {str(e)}")
raise
async def renderAdaptiveReport(
self,
extractedContent: Dict[str, Any],
outputFormat: str,
title: str,
userPrompt: str = None,
aiService=None,
isMultiFile: bool = False
) -> Union[Tuple[str, str], List[Dict[str, Any]]]:
"""Render report adaptively based on content structure."""
# Start timing for generation
startTime = time.time()
try:
if isMultiFile and "documents" in extractedContent:
result = await self._renderMultiFileReport(
extractedContent, outputFormat, title, userPrompt, aiService
)
else:
result = await self._renderSingleFileReport(
extractedContent, outputFormat, title, userPrompt, aiService
)
# Calculate timing and emit stats
endTime = time.time()
processingTime = endTime - startTime
# Calculate bytes (rough estimation)
if isinstance(result, tuple):
content, mime_type = result
bytesReceived = len(content.encode('utf-8')) if isinstance(content, str) else len(content)
elif isinstance(result, list):
bytesReceived = sum(len(str(doc).encode('utf-8')) for doc in result)
else:
bytesReceived = len(str(result).encode('utf-8'))
# Use internal generation model for pricing
modelName = "internal_generation"
priceUsd = aiModels[modelName]["calculatePriceUsd"](processingTime, 0, bytesReceived)
aiResponse = AiCallResponse(
content="", # No content for generation stats needed
modelName=modelName,
priceUsd=priceUsd,
processingTime=processingTime,
bytesSent=0, # Input is already processed
bytesReceived=bytesReceived,
errorCount=0
)
self.services.workflow.storeWorkflowStat(
self.services.workflow,
aiResponse,
f"generation.render.{outputFormat}"
)
return result
except Exception as e:
# Calculate timing for error case
endTime = time.time()
processingTime = endTime - startTime
# Use internal generation model for pricing
modelName = "internal_generation"
priceUsd = aiModels[modelName]["calculatePriceUsd"](processingTime, 0, 0)
aiResponse = AiCallResponse(
content="", # No content for generation stats needed
modelName=modelName,
priceUsd=priceUsd,
processingTime=processingTime,
bytesSent=0,
bytesReceived=0,
errorCount=1
)
self.services.workflow.storeWorkflowStat(
self.services.workflow,
aiResponse,
f"generation.render.{outputFormat}"
)
raise
async def _renderMultiFileReport(
self,
extractedContent: Dict[str, Any],
outputFormat: str,
title: str,
userPrompt: str = None,
aiService=None
) -> List[Dict[str, Any]]:
"""Render multiple documents from extracted content."""
generated_documents = []
for doc_data in extractedContent.get("documents", []):
# Use existing single-file renderer for each document
renderer = self._getFormatRenderer(outputFormat)
if not renderer:
continue
# Render individual document
rendered_content, mime_type = await renderer.render(
extractedContent={"sections": doc_data["sections"]},
title=doc_data["title"],
userPrompt=userPrompt,
aiService=aiService
)
generated_documents.append({
"filename": doc_data["filename"],
"content": rendered_content,
"mime_type": mime_type,
"title": doc_data["title"]
})
return generated_documents
async def _renderSingleFileReport(
self,
extractedContent: Dict[str, Any],
outputFormat: str,
title: str,
userPrompt: str = None,
aiService=None
) -> Tuple[str, str]:
"""Render single file report (existing functionality)."""
# Use existing renderReport method
return await self.renderReport(
extractedContent, outputFormat, title, userPrompt, aiService
)
def _getFormatRenderer(self, output_format: str):
"""Get the appropriate renderer for the specified format using auto-discovery."""
try:
from .renderers.registry import get_renderer
renderer = get_renderer(output_format, services=self.services)
if renderer:
return renderer
# Fallback to text renderer if no specific renderer found
logger.warning(f"No renderer found for format {output_format}, falling back to text")
fallback_renderer = get_renderer('text', services=self.services)
if fallback_renderer:
return fallback_renderer
logger.error("Even text renderer fallback failed")
return None
except Exception as e:
logger.error(f"Error getting renderer for {output_format}: {str(e)}")
return None