refactored document handling
This commit is contained in:
parent
02d34b914e
commit
4e15be8296
9 changed files with 483 additions and 493 deletions
|
|
@ -1,124 +0,0 @@
|
||||||
# Contains all document creation functions extracted from managerChat.py
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import json
|
|
||||||
from typing import Dict, Any, Optional, List, Union
|
|
||||||
from datetime import datetime, UTC
|
|
||||||
|
|
||||||
class DocumentCreator:
|
|
||||||
def __init__(self, service):
|
|
||||||
self.service = service
|
|
||||||
|
|
||||||
def getFileExtension(self, filename: str) -> str:
|
|
||||||
"""Extract file extension from filename"""
|
|
||||||
return self.service.getFileExtension(filename)
|
|
||||||
|
|
||||||
def getMimeType(self, extension: str) -> str:
|
|
||||||
"""Get MIME type based on file extension"""
|
|
||||||
return self.service.getMimeTypeFromExtension(extension)
|
|
||||||
|
|
||||||
def detectMimeTypeFromContent(self, content: Any, filename: str) -> str:
|
|
||||||
"""
|
|
||||||
Detect MIME type from content and filename using service center.
|
|
||||||
Only returns a detected MIME type if it's better than application/octet-stream.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
if isinstance(content, str):
|
|
||||||
file_bytes = content.encode('utf-8')
|
|
||||||
elif isinstance(content, dict):
|
|
||||||
file_bytes = json.dumps(content, ensure_ascii=False).encode('utf-8')
|
|
||||||
else:
|
|
||||||
file_bytes = str(content).encode('utf-8')
|
|
||||||
detected_mime_type = self.service.detectContentTypeFromData(file_bytes, filename)
|
|
||||||
if detected_mime_type != "application/octet-stream":
|
|
||||||
return detected_mime_type
|
|
||||||
return "application/octet-stream"
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"Error in MIME type detection for {filename}: {str(e)}")
|
|
||||||
return 'application/octet-stream'
|
|
||||||
|
|
||||||
def detectMimeTypeFromDocument(self, document: Any, filename: str) -> str:
|
|
||||||
"""
|
|
||||||
Detect MIME type from document object using service center.
|
|
||||||
Only returns a detected MIME type if it's better than application/octet-stream.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
content = getattr(document, 'content', '')
|
|
||||||
if isinstance(content, str):
|
|
||||||
file_bytes = content.encode('utf-8')
|
|
||||||
else:
|
|
||||||
file_bytes = str(content).encode('utf-8')
|
|
||||||
detected_mime_type = self.service.detectContentTypeFromData(file_bytes, filename)
|
|
||||||
if detected_mime_type != "application/octet-stream":
|
|
||||||
return detected_mime_type
|
|
||||||
return "application/octet-stream"
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"Error in MIME type detection for document {filename}: {str(e)}")
|
|
||||||
return 'application/octet-stream'
|
|
||||||
|
|
||||||
def convertDocumentDataToString(self, document_data: Dict[str, Any], file_extension: str) -> str:
|
|
||||||
"""Convert document data to string content based on file type with enhanced processing"""
|
|
||||||
try:
|
|
||||||
if document_data is None:
|
|
||||||
return ""
|
|
||||||
if isinstance(document_data, str):
|
|
||||||
return document_data
|
|
||||||
if isinstance(document_data, dict):
|
|
||||||
if file_extension == 'json':
|
|
||||||
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
|
||||||
elif file_extension in ['txt', 'md', 'html', 'css', 'js', 'py']:
|
|
||||||
text_fields = ['content', 'text', 'data', 'result', 'summary', 'extracted_content', 'table_data']
|
|
||||||
for field in text_fields:
|
|
||||||
if field in document_data:
|
|
||||||
content = document_data[field]
|
|
||||||
if isinstance(content, str):
|
|
||||||
return content
|
|
||||||
elif isinstance(content, (dict, list)):
|
|
||||||
return json.dumps(content, indent=2, ensure_ascii=False)
|
|
||||||
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
|
||||||
elif file_extension == 'csv':
|
|
||||||
csv_fields = ['table_data', 'csv_data', 'rows', 'data', 'content', 'text']
|
|
||||||
for field in csv_fields:
|
|
||||||
if field in document_data:
|
|
||||||
content = document_data[field]
|
|
||||||
if isinstance(content, str):
|
|
||||||
return content
|
|
||||||
elif isinstance(content, list):
|
|
||||||
if content and isinstance(content[0], (list, dict)):
|
|
||||||
import csv
|
|
||||||
import io
|
|
||||||
output = io.StringIO()
|
|
||||||
if isinstance(content[0], dict):
|
|
||||||
if content:
|
|
||||||
fieldnames = content[0].keys()
|
|
||||||
writer = csv.DictWriter(output, fieldnames=fieldnames)
|
|
||||||
writer.writeheader()
|
|
||||||
writer.writerows(content)
|
|
||||||
else:
|
|
||||||
writer = csv.writer(output)
|
|
||||||
writer.writerows(content)
|
|
||||||
return output.getvalue()
|
|
||||||
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
|
||||||
else:
|
|
||||||
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
|
||||||
elif isinstance(document_data, list):
|
|
||||||
if file_extension == 'csv':
|
|
||||||
import csv
|
|
||||||
import io
|
|
||||||
output = io.StringIO()
|
|
||||||
if document_data and isinstance(document_data[0], dict):
|
|
||||||
fieldnames = document_data[0].keys()
|
|
||||||
writer = csv.DictWriter(output, fieldnames=fieldnames)
|
|
||||||
writer.writeheader()
|
|
||||||
writer.writerows(document_data)
|
|
||||||
else:
|
|
||||||
writer = csv.writer(output)
|
|
||||||
writer.writerows(document_data)
|
|
||||||
return output.getvalue()
|
|
||||||
else:
|
|
||||||
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
|
||||||
else:
|
|
||||||
return str(document_data)
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"Error converting document data to string: {str(e)}")
|
|
||||||
return str(document_data)
|
|
||||||
|
|
@ -9,6 +9,13 @@ from pathlib import Path
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import uuid
|
import uuid
|
||||||
|
from .documentUtility import (
|
||||||
|
getFileExtension,
|
||||||
|
getMimeTypeFromExtension,
|
||||||
|
detectMimeTypeFromContent,
|
||||||
|
detectMimeTypeFromData,
|
||||||
|
convertDocumentDataToString
|
||||||
|
)
|
||||||
|
|
||||||
from modules.interfaces.interfaceChatModel import (
|
from modules.interfaces.interfaceChatModel import (
|
||||||
ExtractedContent,
|
ExtractedContent,
|
||||||
|
|
@ -29,7 +36,7 @@ class FileProcessingError(Exception):
|
||||||
"""Custom exception for file processing errors."""
|
"""Custom exception for file processing errors."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class DocumentProcessor:
|
class DocumentExtraction:
|
||||||
"""Processor for handling document operations and content extraction."""
|
"""Processor for handling document operations and content extraction."""
|
||||||
|
|
||||||
def __init__(self, serviceCenter=None):
|
def __init__(self, serviceCenter=None):
|
||||||
|
|
@ -133,17 +140,13 @@ class DocumentProcessor:
|
||||||
# Decode base64 if needed
|
# Decode base64 if needed
|
||||||
if base64Encoded:
|
if base64Encoded:
|
||||||
fileData = base64.b64decode(fileData)
|
fileData = base64.b64decode(fileData)
|
||||||
|
# Use documentUtility for mime type detection
|
||||||
# Detect content type if needed
|
|
||||||
if mimeType == "application/octet-stream":
|
if mimeType == "application/octet-stream":
|
||||||
mimeType = self._serviceCenter.detectContentTypeFromData(fileData, filename)
|
mimeType = detectMimeTypeFromData(fileData, filename, self._serviceCenter)
|
||||||
|
|
||||||
# Process document based on type
|
# Process document based on type
|
||||||
if mimeType not in self.supportedTypes:
|
if mimeType not in self.supportedTypes:
|
||||||
# Fallback to binary processing
|
|
||||||
contentItems = await self._processBinary(fileData, filename, mimeType)
|
contentItems = await self._processBinary(fileData, filename, mimeType)
|
||||||
else:
|
else:
|
||||||
# Process document based on type
|
|
||||||
processor = self.supportedTypes[mimeType]
|
processor = self.supportedTypes[mimeType]
|
||||||
contentItems = await processor(fileData, filename, mimeType)
|
contentItems = await processor(fileData, filename, mimeType)
|
||||||
|
|
||||||
|
|
@ -171,13 +174,15 @@ class DocumentProcessor:
|
||||||
"""Process text document"""
|
"""Process text document"""
|
||||||
try:
|
try:
|
||||||
content = fileData.decode('utf-8')
|
content = fileData.decode('utf-8')
|
||||||
|
# Use documentUtility for mime type
|
||||||
|
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
|
||||||
return [ContentItem(
|
return [ContentItem(
|
||||||
label="main",
|
label="main",
|
||||||
data=content,
|
data=content,
|
||||||
metadata=ContentMetadata(
|
metadata=ContentMetadata(
|
||||||
size=len(content.encode('utf-8')),
|
size=len(content.encode('utf-8')),
|
||||||
pages=1,
|
pages=1,
|
||||||
mimeType="text/plain",
|
mimeType=mime_type,
|
||||||
base64Encoded=False
|
base64Encoded=False
|
||||||
)
|
)
|
||||||
)]
|
)]
|
||||||
|
|
@ -189,13 +194,14 @@ class DocumentProcessor:
|
||||||
"""Process CSV document"""
|
"""Process CSV document"""
|
||||||
try:
|
try:
|
||||||
content = fileData.decode('utf-8')
|
content = fileData.decode('utf-8')
|
||||||
|
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
|
||||||
return [ContentItem(
|
return [ContentItem(
|
||||||
label="main",
|
label="main",
|
||||||
data=content,
|
data=content,
|
||||||
metadata=ContentMetadata(
|
metadata=ContentMetadata(
|
||||||
size=len(content.encode('utf-8')),
|
size=len(content.encode('utf-8')),
|
||||||
pages=1,
|
pages=1,
|
||||||
mimeType="text/csv",
|
mimeType=mime_type,
|
||||||
base64Encoded=False
|
base64Encoded=False
|
||||||
)
|
)
|
||||||
)]
|
)]
|
||||||
|
|
@ -207,16 +213,15 @@ class DocumentProcessor:
|
||||||
"""Process JSON document"""
|
"""Process JSON document"""
|
||||||
try:
|
try:
|
||||||
content = fileData.decode('utf-8')
|
content = fileData.decode('utf-8')
|
||||||
# Parse JSON to validate
|
|
||||||
jsonData = json.loads(content)
|
jsonData = json.loads(content)
|
||||||
|
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
|
||||||
return [ContentItem(
|
return [ContentItem(
|
||||||
label="main",
|
label="main",
|
||||||
data=content,
|
data=content,
|
||||||
metadata=ContentMetadata(
|
metadata=ContentMetadata(
|
||||||
size=len(content.encode('utf-8')),
|
size=len(content.encode('utf-8')),
|
||||||
pages=1,
|
pages=1,
|
||||||
mimeType="application/json",
|
mimeType=mime_type,
|
||||||
base64Encoded=False
|
base64Encoded=False
|
||||||
)
|
)
|
||||||
)]
|
)]
|
||||||
|
|
@ -228,13 +233,14 @@ class DocumentProcessor:
|
||||||
"""Process XML document"""
|
"""Process XML document"""
|
||||||
try:
|
try:
|
||||||
content = fileData.decode('utf-8')
|
content = fileData.decode('utf-8')
|
||||||
|
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
|
||||||
return [ContentItem(
|
return [ContentItem(
|
||||||
label="main",
|
label="main",
|
||||||
data=content,
|
data=content,
|
||||||
metadata=ContentMetadata(
|
metadata=ContentMetadata(
|
||||||
size=len(content.encode('utf-8')),
|
size=len(content.encode('utf-8')),
|
||||||
pages=1,
|
pages=1,
|
||||||
mimeType="application/xml",
|
mimeType=mime_type,
|
||||||
base64Encoded=False
|
base64Encoded=False
|
||||||
)
|
)
|
||||||
)]
|
)]
|
||||||
|
|
@ -246,13 +252,14 @@ class DocumentProcessor:
|
||||||
"""Process HTML document"""
|
"""Process HTML document"""
|
||||||
try:
|
try:
|
||||||
content = fileData.decode('utf-8')
|
content = fileData.decode('utf-8')
|
||||||
|
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
|
||||||
return [ContentItem(
|
return [ContentItem(
|
||||||
label="main",
|
label="main",
|
||||||
data=content,
|
data=content,
|
||||||
metadata=ContentMetadata(
|
metadata=ContentMetadata(
|
||||||
size=len(content.encode('utf-8')),
|
size=len(content.encode('utf-8')),
|
||||||
pages=1,
|
pages=1,
|
||||||
mimeType="text/html",
|
mimeType=mime_type,
|
||||||
base64Encoded=False
|
base64Encoded=False
|
||||||
)
|
)
|
||||||
)]
|
)]
|
||||||
|
|
@ -264,15 +271,14 @@ class DocumentProcessor:
|
||||||
"""Process SVG document"""
|
"""Process SVG document"""
|
||||||
try:
|
try:
|
||||||
content = fileData.decode('utf-8')
|
content = fileData.decode('utf-8')
|
||||||
# Check if it's actually SVG
|
|
||||||
isSvg = "<svg" in content.lower()
|
isSvg = "<svg" in content.lower()
|
||||||
|
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
|
||||||
return [ContentItem(
|
return [ContentItem(
|
||||||
label="main",
|
label="main",
|
||||||
data=content if isSvg else None,
|
data=content if isSvg else None,
|
||||||
metadata=ContentMetadata(
|
metadata=ContentMetadata(
|
||||||
size=len(content.encode('utf-8')),
|
size=len(content.encode('utf-8')),
|
||||||
mimeType="image/svg+xml",
|
mimeType=mime_type,
|
||||||
base64Encoded=False,
|
base64Encoded=False,
|
||||||
error=None if isSvg else "Invalid SVG content"
|
error=None if isSvg else "Invalid SVG content"
|
||||||
)
|
)
|
||||||
163
modules/chat/documents/documentGeneration.py
Normal file
163
modules/chat/documents/documentGeneration.py
Normal file
|
|
@ -0,0 +1,163 @@
|
||||||
|
import logging
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
from .documentUtility import (
|
||||||
|
getFileExtension,
|
||||||
|
getMimeTypeFromExtension,
|
||||||
|
detectMimeTypeFromContent,
|
||||||
|
detectMimeTypeFromData,
|
||||||
|
convertDocumentDataToString
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class DocumentGenerator:
|
||||||
|
def __init__(self, service):
|
||||||
|
self.service = service
|
||||||
|
|
||||||
|
def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Main function to process documents from an action result.
|
||||||
|
Returns a list of processed document dictionaries.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
documents = action_result.data.get("documents", [])
|
||||||
|
processed_documents = []
|
||||||
|
for doc in documents:
|
||||||
|
processed_doc = self.processSingleDocument(doc, action)
|
||||||
|
if processed_doc:
|
||||||
|
processed_documents.append(processed_doc)
|
||||||
|
return processed_documents
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing action result documents: {str(e)}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def processSingleDocument(self, doc: Any, action) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Process a single document from action result"""
|
||||||
|
try:
|
||||||
|
if hasattr(doc, 'filename') and doc.filename:
|
||||||
|
# Document object with filename attribute
|
||||||
|
mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
|
||||||
|
if mime_type == "application/octet-stream":
|
||||||
|
content = getattr(doc, 'content', '')
|
||||||
|
mime_type = detectMimeTypeFromContent(content, doc.filename, self.service)
|
||||||
|
return {
|
||||||
|
'filename': doc.filename,
|
||||||
|
'fileSize': getattr(doc, 'fileSize', 0),
|
||||||
|
'mimeType': mime_type,
|
||||||
|
'content': getattr(doc, 'content', ''),
|
||||||
|
'document': doc
|
||||||
|
}
|
||||||
|
elif isinstance(doc, dict):
|
||||||
|
# Dictionary format document
|
||||||
|
filename = doc.get('documentName', doc.get('filename', \
|
||||||
|
f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"))
|
||||||
|
fileSize = doc.get('fileSize', len(str(doc.get('documentData', ''))))
|
||||||
|
mimeType = doc.get('mimeType', 'application/octet-stream')
|
||||||
|
if mimeType == "application/octet-stream":
|
||||||
|
document_data = doc.get('documentData', '')
|
||||||
|
mimeType = detectMimeTypeFromContent(document_data, filename, self.service)
|
||||||
|
return {
|
||||||
|
'filename': filename,
|
||||||
|
'fileSize': fileSize,
|
||||||
|
'mimeType': mimeType,
|
||||||
|
'content': doc.get('documentData', ''),
|
||||||
|
'document': doc
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# Unknown document type
|
||||||
|
logger.warning(f"Unknown document type for action {action.execMethod}.{action.execAction}: {type(doc)}")
|
||||||
|
filename = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
|
||||||
|
mimeType = detectMimeTypeFromContent(doc, filename, self.service)
|
||||||
|
return {
|
||||||
|
'filename': filename,
|
||||||
|
'fileSize': 0,
|
||||||
|
'mimeType': mimeType,
|
||||||
|
'content': str(doc),
|
||||||
|
'document': doc
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing single document: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def createDocumentsFromActionResult(self, action_result, action, workflow) -> List[Any]:
|
||||||
|
"""
|
||||||
|
Create actual document objects from action result and store them in the system.
|
||||||
|
Returns a list of created document objects.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
processed_docs = self.processActionResultDocuments(action_result, action, workflow)
|
||||||
|
created_documents = []
|
||||||
|
for doc_data in processed_docs:
|
||||||
|
try:
|
||||||
|
document_name = doc_data['filename']
|
||||||
|
document_data = doc_data['content']
|
||||||
|
mime_type = doc_data['mimeType']
|
||||||
|
# Convert document data to string content
|
||||||
|
content = convertDocumentDataToString(document_data, getFileExtension(document_name))
|
||||||
|
# Skip empty or minimal content
|
||||||
|
minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
|
||||||
|
if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
|
||||||
|
logger.warning(f"Empty or minimal content for document {document_name}, skipping")
|
||||||
|
continue
|
||||||
|
# Create file in system
|
||||||
|
file_id = self.service.createFile(
|
||||||
|
fileName=document_name,
|
||||||
|
mimeType=mime_type,
|
||||||
|
content=content,
|
||||||
|
base64encoded=False
|
||||||
|
)
|
||||||
|
if not file_id:
|
||||||
|
logger.error(f"Failed to create file for document {document_name}")
|
||||||
|
continue
|
||||||
|
# Create document object
|
||||||
|
document = self.service.createDocument(
|
||||||
|
fileName=document_name,
|
||||||
|
mimeType=mime_type,
|
||||||
|
content=content,
|
||||||
|
base64encoded=False
|
||||||
|
)
|
||||||
|
if document:
|
||||||
|
created_documents.append(document)
|
||||||
|
logger.info(f"Created document: {document_name} with file ID: {file_id} and MIME type: {mime_type}")
|
||||||
|
else:
|
||||||
|
logger.error(f"Failed to create ChatDocument object for {document_name}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating document {doc_data.get('filename', 'unknown')}: {str(e)}")
|
||||||
|
continue
|
||||||
|
return created_documents
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating documents from action result: {str(e)}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_delivered_files_and_formats(documents):
|
||||||
|
delivered_files = []
|
||||||
|
delivered_formats = []
|
||||||
|
for doc in documents:
|
||||||
|
if hasattr(doc, 'filename'):
|
||||||
|
delivered_files.append(doc.filename)
|
||||||
|
file_extension = getFileExtension(doc.filename)
|
||||||
|
mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
|
||||||
|
delivered_formats.append({
|
||||||
|
'filename': doc.filename,
|
||||||
|
'extension': file_extension,
|
||||||
|
'mimeType': mime_type
|
||||||
|
})
|
||||||
|
elif isinstance(doc, dict) and 'filename' in doc:
|
||||||
|
delivered_files.append(doc['filename'])
|
||||||
|
file_extension = getFileExtension(doc['filename'])
|
||||||
|
mime_type = doc.get('mimeType', 'application/octet-stream')
|
||||||
|
delivered_formats.append({
|
||||||
|
'filename': doc['filename'],
|
||||||
|
'extension': file_extension,
|
||||||
|
'mimeType': mime_type
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
delivered_files.append(f"document_{len(delivered_files)}")
|
||||||
|
delivered_formats.append({
|
||||||
|
'filename': f"document_{len(delivered_files)}",
|
||||||
|
'extension': 'unknown',
|
||||||
|
'mimeType': 'application/octet-stream'
|
||||||
|
})
|
||||||
|
return delivered_files, delivered_formats
|
||||||
132
modules/chat/documents/documentUtility.py
Normal file
132
modules/chat/documents/documentUtility.py
Normal file
|
|
@ -0,0 +1,132 @@
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def getFileExtension(filename: str) -> str:
|
||||||
|
"""Extract file extension from filename"""
|
||||||
|
if '.' in filename:
|
||||||
|
return filename.rsplit('.', 1)[-1].lower()
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def getMimeTypeFromExtension(extension: str, service=None) -> str:
|
||||||
|
"""Get MIME type based on file extension. Optionally use a service for mapping."""
|
||||||
|
if service:
|
||||||
|
return service.getMimeTypeFromExtension(extension)
|
||||||
|
# Fallback mapping
|
||||||
|
mapping = {
|
||||||
|
'txt': 'text/plain',
|
||||||
|
'md': 'text/markdown',
|
||||||
|
'html': 'text/html',
|
||||||
|
'css': 'text/css',
|
||||||
|
'js': 'application/javascript',
|
||||||
|
'json': 'application/json',
|
||||||
|
'csv': 'text/csv',
|
||||||
|
'xml': 'application/xml',
|
||||||
|
'py': 'text/x-python',
|
||||||
|
'pdf': 'application/pdf',
|
||||||
|
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||||
|
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||||
|
'png': 'image/png',
|
||||||
|
'jpg': 'image/jpeg',
|
||||||
|
'jpeg': 'image/jpeg',
|
||||||
|
'gif': 'image/gif',
|
||||||
|
'svg': 'image/svg+xml',
|
||||||
|
}
|
||||||
|
return mapping.get(extension.lower(), 'application/octet-stream')
|
||||||
|
|
||||||
|
def detectMimeTypeFromData(file_bytes: bytes, filename: str, service=None) -> str:
|
||||||
|
"""Detect MIME type from file bytes and filename using a service if provided."""
|
||||||
|
try:
|
||||||
|
if service:
|
||||||
|
detected = service.detectContentTypeFromData(file_bytes, filename)
|
||||||
|
if detected and detected != 'application/octet-stream':
|
||||||
|
return detected
|
||||||
|
# Fallback: guess from extension
|
||||||
|
ext = getFileExtension(filename)
|
||||||
|
return getMimeTypeFromExtension(ext, service)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error in MIME type detection for {filename}: {str(e)}")
|
||||||
|
return 'application/octet-stream'
|
||||||
|
|
||||||
|
def detectMimeTypeFromContent(content: Any, filename: str, service=None) -> str:
|
||||||
|
"""Detect MIME type from content and filename using a service if provided."""
|
||||||
|
try:
|
||||||
|
if isinstance(content, str):
|
||||||
|
file_bytes = content.encode('utf-8')
|
||||||
|
elif isinstance(content, dict):
|
||||||
|
file_bytes = json.dumps(content, ensure_ascii=False).encode('utf-8')
|
||||||
|
else:
|
||||||
|
file_bytes = str(content).encode('utf-8')
|
||||||
|
return detectMimeTypeFromData(file_bytes, filename, service)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error in MIME type detection for {filename}: {str(e)}")
|
||||||
|
return 'application/octet-stream'
|
||||||
|
|
||||||
|
def convertDocumentDataToString(document_data: Any, file_extension: str) -> str:
|
||||||
|
"""Convert document data to string content based on file type with enhanced processing."""
|
||||||
|
try:
|
||||||
|
if document_data is None:
|
||||||
|
return ""
|
||||||
|
if isinstance(document_data, str):
|
||||||
|
return document_data
|
||||||
|
if isinstance(document_data, dict):
|
||||||
|
if file_extension == 'json':
|
||||||
|
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
||||||
|
elif file_extension in ['txt', 'md', 'html', 'css', 'js', 'py']:
|
||||||
|
text_fields = ['content', 'text', 'data', 'result', 'summary', 'extracted_content', 'table_data']
|
||||||
|
for field in text_fields:
|
||||||
|
if field in document_data:
|
||||||
|
content = document_data[field]
|
||||||
|
if isinstance(content, str):
|
||||||
|
return content
|
||||||
|
elif isinstance(content, (dict, list)):
|
||||||
|
return json.dumps(content, indent=2, ensure_ascii=False)
|
||||||
|
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
||||||
|
elif file_extension == 'csv':
|
||||||
|
csv_fields = ['table_data', 'csv_data', 'rows', 'data', 'content', 'text']
|
||||||
|
for field in csv_fields:
|
||||||
|
if field in document_data:
|
||||||
|
content = document_data[field]
|
||||||
|
if isinstance(content, str):
|
||||||
|
return content
|
||||||
|
elif isinstance(content, list):
|
||||||
|
if content and isinstance(content[0], (list, dict)):
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
output = io.StringIO()
|
||||||
|
if isinstance(content[0], dict):
|
||||||
|
if content:
|
||||||
|
fieldnames = content[0].keys()
|
||||||
|
writer = csv.DictWriter(output, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerows(content)
|
||||||
|
else:
|
||||||
|
writer = csv.writer(output)
|
||||||
|
writer.writerows(content)
|
||||||
|
return output.getvalue()
|
||||||
|
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
||||||
|
else:
|
||||||
|
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
||||||
|
elif isinstance(document_data, list):
|
||||||
|
if file_extension == 'csv':
|
||||||
|
import csv
|
||||||
|
import io
|
||||||
|
output = io.StringIO()
|
||||||
|
if document_data and isinstance(document_data[0], dict):
|
||||||
|
fieldnames = document_data[0].keys()
|
||||||
|
writer = csv.DictWriter(output, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerows(document_data)
|
||||||
|
else:
|
||||||
|
writer = csv.writer(output)
|
||||||
|
writer.writerows(document_data)
|
||||||
|
return output.getvalue()
|
||||||
|
else:
|
||||||
|
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
||||||
|
else:
|
||||||
|
return str(document_data)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error converting document data to string: {str(e)}")
|
||||||
|
return str(document_data)
|
||||||
|
|
@ -7,8 +7,8 @@ import time
|
||||||
from typing import Dict, Any, Optional, List, Union
|
from typing import Dict, Any, Optional, List, Union
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
from modules.interfaces.interfaceChatModel import ReviewResult, ActionResult
|
from modules.interfaces.interfaceChatModel import ReviewResult, ActionResult
|
||||||
from modules.chat.documents.documentCreation import DocumentCreator
|
|
||||||
from .promptFactory import createResultReviewPrompt
|
from .promptFactory import createResultReviewPrompt
|
||||||
|
from modules.chat.documents.documentGeneration import DocumentGenerator
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -16,12 +16,72 @@ class HandlingActions:
|
||||||
def __init__(self, service, chatInterface):
|
def __init__(self, service, chatInterface):
|
||||||
self.service = service
|
self.service = service
|
||||||
self.chatInterface = chatInterface
|
self.chatInterface = chatInterface
|
||||||
self.documentCreator = DocumentCreator(self.service)
|
self.documentGenerator = DocumentGenerator(service)
|
||||||
|
|
||||||
|
async def executeSingleAction(self, action, workflow):
|
||||||
|
"""Execute a single action and return ActionResult with enhanced document processing"""
|
||||||
|
try:
|
||||||
|
enhanced_parameters = action.execParameters.copy()
|
||||||
|
if action.expectedDocumentFormats:
|
||||||
|
enhanced_parameters['expectedDocumentFormats'] = action.expectedDocumentFormats
|
||||||
|
logger.info(f"Action {action.execMethod}.{action.execAction} expects formats: {action.expectedDocumentFormats}")
|
||||||
|
result = await self.service.executeAction(
|
||||||
|
methodName=action.execMethod,
|
||||||
|
actionName=action.execAction,
|
||||||
|
parameters=enhanced_parameters
|
||||||
|
)
|
||||||
|
result_label = action.execResultLabel
|
||||||
|
if result.success:
|
||||||
|
action.setSuccess()
|
||||||
|
action.result = result.data.get("result", "")
|
||||||
|
action.execResultLabel = result_label
|
||||||
|
await self.createActionMessage(action, result, workflow, result_label)
|
||||||
|
else:
|
||||||
|
action.setError(result.error or "Action execution failed")
|
||||||
|
processed_documents = self.documentGenerator.processActionResultDocuments(result, action, workflow)
|
||||||
|
return ActionResult(
|
||||||
|
success=result.success,
|
||||||
|
data={
|
||||||
|
"result": result.data.get("result", ""),
|
||||||
|
"documents": processed_documents,
|
||||||
|
"actionId": action.id,
|
||||||
|
"actionMethod": action.execMethod,
|
||||||
|
"actionName": action.execAction,
|
||||||
|
"resultLabel": result_label
|
||||||
|
},
|
||||||
|
metadata={
|
||||||
|
"actionId": action.id,
|
||||||
|
"actionMethod": action.execMethod,
|
||||||
|
"actionName": action.execAction,
|
||||||
|
"resultLabel": result_label
|
||||||
|
},
|
||||||
|
validation=[],
|
||||||
|
error=result.error or ""
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error executing single action: {str(e)}")
|
||||||
|
action.setError(str(e))
|
||||||
|
return ActionResult(
|
||||||
|
success=False,
|
||||||
|
data={
|
||||||
|
"actionId": action.id,
|
||||||
|
"actionMethod": action.execMethod,
|
||||||
|
"actionName": action.execAction,
|
||||||
|
"documents": []
|
||||||
|
},
|
||||||
|
metadata={
|
||||||
|
"actionId": action.id,
|
||||||
|
"actionMethod": action.execMethod,
|
||||||
|
"actionName": action.execAction
|
||||||
|
},
|
||||||
|
validation=[],
|
||||||
|
error=str(e)
|
||||||
|
)
|
||||||
|
|
||||||
async def validateActionResult(self, action_result, action, context) -> dict:
|
async def validateActionResult(self, action_result, action, context) -> dict:
|
||||||
try:
|
try:
|
||||||
prompt = self._createGenericValidationPrompt(action_result, action, context)
|
prompt = self._createGenericValidationPrompt(action_result, action, context)
|
||||||
response = await self._callAIWithCircuitBreaker(prompt, "action_validation")
|
response = await self.service.callAiTextAdvanced(prompt, "action_validation")
|
||||||
validation = self._parseValidationResponse(response)
|
validation = self._parseValidationResponse(response)
|
||||||
validation['action_id'] = action.id
|
validation['action_id'] = action.id
|
||||||
validation['action_method'] = action.execMethod
|
validation['action_method'] = action.execMethod
|
||||||
|
|
@ -41,6 +101,73 @@ class HandlingActions:
|
||||||
'result_label': action.execResultLabel
|
'result_label': action.execResultLabel
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async def createActionMessage(self, action, result, workflow, result_label=None):
|
||||||
|
"""Create and store a message for the action result in the workflow with enhanced document processing"""
|
||||||
|
try:
|
||||||
|
if result_label is None:
|
||||||
|
result_label = action.execResultLabel
|
||||||
|
message_data = {
|
||||||
|
"workflowId": workflow.id,
|
||||||
|
"role": "assistant",
|
||||||
|
"message": f"Executed action {action.execMethod}.{action.execAction}",
|
||||||
|
"status": "step",
|
||||||
|
"sequenceNr": len(workflow.messages) + 1,
|
||||||
|
"publishedAt": datetime.now(UTC).isoformat(),
|
||||||
|
"actionId": action.id,
|
||||||
|
"actionMethod": action.execMethod,
|
||||||
|
"actionName": action.execAction,
|
||||||
|
"documentsLabel": result_label,
|
||||||
|
"documents": []
|
||||||
|
}
|
||||||
|
# Use the local createDocumentsFromActionResult method
|
||||||
|
created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow)
|
||||||
|
message_data["documents"] = created_documents
|
||||||
|
message = self.chatInterface.createWorkflowMessage(message_data)
|
||||||
|
if message:
|
||||||
|
workflow.messages.append(message)
|
||||||
|
logger.info(f"Created action message for {action.execMethod}.{action.execAction} with {len(created_documents)} documents")
|
||||||
|
logger.debug(f"WORKFLOW STATE after createActionMessage: id={id(workflow)}, message_count={len(workflow.messages)}")
|
||||||
|
for idx, msg in enumerate(workflow.messages):
|
||||||
|
label = getattr(msg, 'documentsLabel', None)
|
||||||
|
docs = getattr(msg, 'documents', None)
|
||||||
|
logger.debug(f" Message {idx}: label='{label}', documents_count={len(docs) if docs else 0}")
|
||||||
|
else:
|
||||||
|
logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating action message: {str(e)}")
|
||||||
|
|
||||||
|
def parseActionResponse(self, response: str) -> list:
|
||||||
|
try:
|
||||||
|
json_start = response.find('{')
|
||||||
|
json_end = response.rfind('}') + 1
|
||||||
|
if json_start == -1 or json_end == 0:
|
||||||
|
raise ValueError("No JSON found in response")
|
||||||
|
json_str = response[json_start:json_end]
|
||||||
|
action_data = json.loads(json_str)
|
||||||
|
if 'actions' not in action_data:
|
||||||
|
raise ValueError("Action response missing 'actions' field")
|
||||||
|
return action_data['actions']
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error parsing action response: {str(e)}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def parseReviewResponse(self, response: str) -> dict:
|
||||||
|
try:
|
||||||
|
json_start = response.find('{')
|
||||||
|
json_end = response.rfind('}') + 1
|
||||||
|
if json_start == -1 or json_end == 0:
|
||||||
|
raise ValueError("No JSON found in response")
|
||||||
|
json_str = response[json_start:json_end]
|
||||||
|
review = json.loads(json_str)
|
||||||
|
if 'status' not in review:
|
||||||
|
raise ValueError("Review response missing 'status' field")
|
||||||
|
return review
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error parsing review response: {str(e)}")
|
||||||
|
return {'status': 'failed', 'reason': f'Parse error: {str(e)}'}
|
||||||
|
|
||||||
|
# Internal helper methods
|
||||||
|
|
||||||
def _createGenericValidationPrompt(self, action_result, action, context) -> str:
|
def _createGenericValidationPrompt(self, action_result, action, context) -> str:
|
||||||
success = action_result.success
|
success = action_result.success
|
||||||
result_data = action_result.data
|
result_data = action_result.data
|
||||||
|
|
@ -54,35 +181,9 @@ class HandlingActions:
|
||||||
expected_document_formats = action.expectedDocumentFormats or []
|
expected_document_formats = action.expectedDocumentFormats or []
|
||||||
actual_result_label = result_data.get("resultLabel", "") if isinstance(result_data, dict) else ""
|
actual_result_label = result_data.get("resultLabel", "") if isinstance(result_data, dict) else ""
|
||||||
result_label_match = actual_result_label == expected_result_label
|
result_label_match = actual_result_label == expected_result_label
|
||||||
delivered_files = []
|
# Use DocumentGenerator for file/format extraction
|
||||||
delivered_formats = []
|
delivered_files, delivered_formats = DocumentGenerator.get_delivered_files_and_formats(documents)
|
||||||
content_items = []
|
content_items = []
|
||||||
for doc in documents:
|
|
||||||
if hasattr(doc, 'filename'):
|
|
||||||
delivered_files.append(doc.filename)
|
|
||||||
file_extension = self._getFileExtension(doc.filename)
|
|
||||||
mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
|
|
||||||
delivered_formats.append({
|
|
||||||
'filename': doc.filename,
|
|
||||||
'extension': file_extension,
|
|
||||||
'mimeType': mime_type
|
|
||||||
})
|
|
||||||
elif isinstance(doc, dict) and 'filename' in doc:
|
|
||||||
delivered_files.append(doc['filename'])
|
|
||||||
file_extension = self._getFileExtension(doc['filename'])
|
|
||||||
mime_type = doc.get('mimeType', 'application/octet-stream')
|
|
||||||
delivered_formats.append({
|
|
||||||
'filename': doc['filename'],
|
|
||||||
'extension': file_extension,
|
|
||||||
'mimeType': mime_type
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
delivered_files.append(f"document_{len(delivered_files)}")
|
|
||||||
delivered_formats.append({
|
|
||||||
'filename': f"document_{len(delivered_files)}",
|
|
||||||
'extension': 'unknown',
|
|
||||||
'mimeType': 'application/octet-stream'
|
|
||||||
})
|
|
||||||
if isinstance(result_data, dict):
|
if isinstance(result_data, dict):
|
||||||
if 'extractedContent' in result_data:
|
if 'extractedContent' in result_data:
|
||||||
extracted_content = result_data['extractedContent']
|
extracted_content = result_data['extractedContent']
|
||||||
|
|
@ -128,305 +229,4 @@ class HandlingActions:
|
||||||
'quality_score': 5,
|
'quality_score': 5,
|
||||||
'missing_elements': [],
|
'missing_elements': [],
|
||||||
'suggested_retry_approach': ''
|
'suggested_retry_approach': ''
|
||||||
}
|
}
|
||||||
|
|
||||||
async def executeSingleAction(self, action, workflow):
|
|
||||||
"""Execute a single action and return ActionResult with enhanced document processing"""
|
|
||||||
try:
|
|
||||||
# Use DocumentCreator methods
|
|
||||||
# Enhance parameters with expected document formats if specified
|
|
||||||
enhanced_parameters = action.execParameters.copy()
|
|
||||||
if action.expectedDocumentFormats:
|
|
||||||
enhanced_parameters['expectedDocumentFormats'] = action.expectedDocumentFormats
|
|
||||||
logger.info(f"Action {action.execMethod}.{action.execAction} expects formats: {action.expectedDocumentFormats}")
|
|
||||||
result = await self.service.executeAction(
|
|
||||||
methodName=action.execMethod,
|
|
||||||
actionName=action.execAction,
|
|
||||||
parameters=enhanced_parameters
|
|
||||||
)
|
|
||||||
result_label = action.execResultLabel
|
|
||||||
if result.success:
|
|
||||||
action.setSuccess()
|
|
||||||
action.result = result.data.get("result", "")
|
|
||||||
action.execResultLabel = result_label
|
|
||||||
await self.createActionMessage(action, result, workflow, result_label)
|
|
||||||
else:
|
|
||||||
action.setError(result.error or "Action execution failed")
|
|
||||||
documents = result.data.get("documents", [])
|
|
||||||
processed_documents = []
|
|
||||||
for doc in documents:
|
|
||||||
if hasattr(doc, 'filename') and doc.filename:
|
|
||||||
mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
|
|
||||||
if mime_type == "application/octet-stream":
|
|
||||||
mime_type = self.documentCreator.detectMimeTypeFromDocument(doc, doc.filename)
|
|
||||||
processed_documents.append({
|
|
||||||
'filename': doc.filename,
|
|
||||||
'fileSize': getattr(doc, 'fileSize', 0),
|
|
||||||
'mimeType': mime_type,
|
|
||||||
'content': getattr(doc, 'content', ''),
|
|
||||||
'document': doc
|
|
||||||
})
|
|
||||||
elif isinstance(doc, dict):
|
|
||||||
filename = doc.get('documentName', doc.get('filename', f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"))
|
|
||||||
fileSize = doc.get('fileSize', len(str(doc.get('documentData', ''))))
|
|
||||||
mimeType = doc.get('mimeType', 'application/octet-stream')
|
|
||||||
if mimeType == "application/octet-stream":
|
|
||||||
document_data = doc.get('documentData', '')
|
|
||||||
mimeType = self.documentCreator.detectMimeTypeFromContent(document_data, filename)
|
|
||||||
processed_documents.append({
|
|
||||||
'filename': filename,
|
|
||||||
'fileSize': fileSize,
|
|
||||||
'mimeType': mimeType,
|
|
||||||
'content': doc.get('documentData', ''),
|
|
||||||
'document': doc
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
logger.warning(f"Unknown document type for action {action.execMethod}.{action.execAction}: {type(doc)}")
|
|
||||||
filename = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
|
|
||||||
mimeType = 'application/octet-stream'
|
|
||||||
mimeType = self.documentCreator.detectMimeTypeFromContent(doc, filename)
|
|
||||||
processed_documents.append({
|
|
||||||
'filename': filename,
|
|
||||||
'fileSize': 0,
|
|
||||||
'mimeType': mimeType,
|
|
||||||
'content': str(doc),
|
|
||||||
'document': doc
|
|
||||||
})
|
|
||||||
return ActionResult(
|
|
||||||
success=result.success,
|
|
||||||
data={
|
|
||||||
"result": result.data.get("result", ""),
|
|
||||||
"documents": processed_documents,
|
|
||||||
"actionId": action.id,
|
|
||||||
"actionMethod": action.execMethod,
|
|
||||||
"actionName": action.execAction,
|
|
||||||
"resultLabel": result_label
|
|
||||||
},
|
|
||||||
metadata={
|
|
||||||
"actionId": action.id,
|
|
||||||
"actionMethod": action.execMethod,
|
|
||||||
"actionName": action.execAction,
|
|
||||||
"resultLabel": result_label
|
|
||||||
},
|
|
||||||
validation=[],
|
|
||||||
error=result.error or ""
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error executing single action: {str(e)}")
|
|
||||||
action.setError(str(e))
|
|
||||||
return ActionResult(
|
|
||||||
success=False,
|
|
||||||
data={
|
|
||||||
"actionId": action.id,
|
|
||||||
"actionMethod": action.execMethod,
|
|
||||||
"actionName": action.execAction,
|
|
||||||
"documents": []
|
|
||||||
},
|
|
||||||
metadata={
|
|
||||||
"actionId": action.id,
|
|
||||||
"actionMethod": action.execMethod,
|
|
||||||
"actionName": action.execAction
|
|
||||||
},
|
|
||||||
validation=[],
|
|
||||||
error=str(e)
|
|
||||||
)
|
|
||||||
|
|
||||||
async def createActionMessage(self, action, result, workflow, result_label=None):
|
|
||||||
"""Create and store a message for the action result in the workflow with enhanced document processing"""
|
|
||||||
try:
|
|
||||||
# Use DocumentCreator methods
|
|
||||||
result_data = result.data if hasattr(result, 'data') else {}
|
|
||||||
documents_data = result_data.get("documents", [])
|
|
||||||
if result_label is None:
|
|
||||||
result_label = action.execResultLabel
|
|
||||||
message_data = {
|
|
||||||
"workflowId": workflow.id,
|
|
||||||
"role": "assistant",
|
|
||||||
"message": f"Executed action {action.execMethod}.{action.execAction}",
|
|
||||||
"status": "step",
|
|
||||||
"sequenceNr": len(workflow.messages) + 1,
|
|
||||||
"publishedAt": datetime.now(UTC).isoformat(),
|
|
||||||
"actionId": action.id,
|
|
||||||
"actionMethod": action.execMethod,
|
|
||||||
"actionName": action.execAction,
|
|
||||||
"documentsLabel": result_label, # Use intent label from action definition
|
|
||||||
"documents": []
|
|
||||||
}
|
|
||||||
if documents_data:
|
|
||||||
processed_documents = []
|
|
||||||
for doc_data in documents_data:
|
|
||||||
try:
|
|
||||||
if isinstance(doc_data, dict):
|
|
||||||
document_name = doc_data.get("documentName", doc_data.get("filename", f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"))
|
|
||||||
document_data = doc_data.get("documentData", {})
|
|
||||||
file_size = doc_data.get("fileSize", 0)
|
|
||||||
mime_type = doc_data.get("mimeType", "application/octet-stream")
|
|
||||||
elif hasattr(doc_data, 'filename'):
|
|
||||||
document_name = doc_data.filename
|
|
||||||
document_data = getattr(doc_data, 'content', {})
|
|
||||||
file_size = getattr(doc_data, 'fileSize', 0)
|
|
||||||
mime_type = getattr(doc_data, 'mimeType', "application/octet-stream")
|
|
||||||
else:
|
|
||||||
document_name = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
|
|
||||||
document_data = doc_data
|
|
||||||
file_size = len(str(doc_data))
|
|
||||||
mime_type = "application/octet-stream"
|
|
||||||
if mime_type == "application/octet-stream":
|
|
||||||
mime_type = self.documentCreator.detectMimeTypeFromContent(document_data, document_name)
|
|
||||||
content = self.documentCreator.convertDocumentDataToString(document_data, self.documentCreator.getFileExtension(document_name))
|
|
||||||
minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
|
|
||||||
if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
|
|
||||||
logger.warning(f"Empty or minimal content for document {document_name}, skipping")
|
|
||||||
continue
|
|
||||||
file_id = self.service.createFile(
|
|
||||||
fileName=document_name,
|
|
||||||
mimeType=mime_type,
|
|
||||||
content=content,
|
|
||||||
base64encoded=False
|
|
||||||
)
|
|
||||||
if not file_id:
|
|
||||||
logger.error(f"Failed to create file for document {document_name}")
|
|
||||||
continue
|
|
||||||
document = self.service.createDocument(
|
|
||||||
fileName=document_name,
|
|
||||||
mimeType=mime_type,
|
|
||||||
content=content,
|
|
||||||
base64encoded=False
|
|
||||||
)
|
|
||||||
if document:
|
|
||||||
processed_documents.append(document)
|
|
||||||
logger.info(f"Created document: {document_name} with file ID: {file_id} and MIME type: {mime_type}")
|
|
||||||
else:
|
|
||||||
logger.error(f"Failed to create ChatDocument object for {document_name}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error processing document {getattr(doc_data, 'documentName', 'unknown') if isinstance(doc_data, dict) else 'unknown'}: {str(e)}")
|
|
||||||
continue
|
|
||||||
message_data["documents"] = processed_documents
|
|
||||||
message = self.chatInterface.createWorkflowMessage(message_data)
|
|
||||||
if message:
|
|
||||||
workflow.messages.append(message)
|
|
||||||
logger.info(f"Created action message for {action.execMethod}.{action.execAction} with {len(message_data.get('documents', []))} documents")
|
|
||||||
logger.debug(f"WORKFLOW STATE after createActionMessage: id={id(workflow)}, message_count={len(workflow.messages)}")
|
|
||||||
for idx, msg in enumerate(workflow.messages):
|
|
||||||
label = getattr(msg, 'documentsLabel', None)
|
|
||||||
docs = getattr(msg, 'documents', None)
|
|
||||||
logger.debug(f" Message {idx}: label='{label}', documents_count={len(docs) if docs else 0}")
|
|
||||||
else:
|
|
||||||
logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error creating action message: {str(e)}")
|
|
||||||
|
|
||||||
async def performTaskReview(self, review_context) -> 'ReviewResult':
|
|
||||||
"""Perform AI-based task review with enhanced retry logic"""
|
|
||||||
try:
|
|
||||||
# Prepare prompt for result review
|
|
||||||
prompt = await createResultReviewPrompt(self, review_context)
|
|
||||||
|
|
||||||
# Call AI with circuit breaker
|
|
||||||
response = await self._callAIWithCircuitBreaker(prompt, "result_review")
|
|
||||||
|
|
||||||
# Parse review result
|
|
||||||
review_dict = self._parseReviewResponse(response)
|
|
||||||
|
|
||||||
# Add default values for missing fields
|
|
||||||
review_dict.setdefault('status', 'unknown')
|
|
||||||
review_dict.setdefault('reason', 'No reason provided')
|
|
||||||
review_dict.setdefault('quality_score', 5)
|
|
||||||
|
|
||||||
# Enhanced retry logic based on result quality
|
|
||||||
if review_dict.get('status') == 'retry':
|
|
||||||
# Analyze the specific issues for better retry guidance
|
|
||||||
action_results = review_context.action_results or []
|
|
||||||
if action_results:
|
|
||||||
# Check for common issues that warrant retry
|
|
||||||
# Only consider empty results a problem if there are no documents produced
|
|
||||||
has_empty_results = any(
|
|
||||||
not result.data.get('result', '').strip() and
|
|
||||||
not result.data.get('documents') and
|
|
||||||
not result.data.get('documents')
|
|
||||||
for result in action_results
|
|
||||||
if result.success
|
|
||||||
)
|
|
||||||
|
|
||||||
has_incomplete_metadata = any(
|
|
||||||
any(doc.get('filename') == 'unknown' for doc in result.data.get('documents', []) or [])
|
|
||||||
for result in action_results
|
|
||||||
if result.success
|
|
||||||
)
|
|
||||||
|
|
||||||
if has_empty_results:
|
|
||||||
review_dict['improvements'] = (review_dict.get('improvements', '') +
|
|
||||||
" Ensure the document extraction returns actual content, not empty results. " +
|
|
||||||
"Check if the AI prompt is specific enough to extract meaningful data.")
|
|
||||||
|
|
||||||
if has_incomplete_metadata:
|
|
||||||
review_dict['improvements'] = (review_dict.get('improvements', '') +
|
|
||||||
" Ensure proper document metadata is extracted including filename, size, and mime type. " +
|
|
||||||
"The document processing should provide complete file information.")
|
|
||||||
|
|
||||||
# If we have specific issues, adjust quality score
|
|
||||||
if has_empty_results or has_incomplete_metadata:
|
|
||||||
review_dict['quality_score'] = max(1, review_dict.get('quality_score', 5) - 2)
|
|
||||||
|
|
||||||
# Create ReviewResult model
|
|
||||||
return ReviewResult(
|
|
||||||
status=review_dict.get('status', 'unknown'),
|
|
||||||
reason=review_dict.get('reason', 'No reason provided'),
|
|
||||||
improvements=review_dict.get('improvements', []),
|
|
||||||
quality_score=review_dict.get('quality_score', 5),
|
|
||||||
missing_outputs=review_dict.get('missing_outputs', []),
|
|
||||||
met_criteria=review_dict.get('met_criteria', []),
|
|
||||||
unmet_criteria=review_dict.get('unmet_criteria', []),
|
|
||||||
confidence=review_dict.get('confidence', 0.5)
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error performing task review: {str(e)}")
|
|
||||||
return ReviewResult(
|
|
||||||
status='success', # Default to success to avoid blocking workflow
|
|
||||||
reason=f'Review failed: {str(e)}',
|
|
||||||
quality_score=5,
|
|
||||||
confidence=0.5
|
|
||||||
)
|
|
||||||
|
|
||||||
def parseActionResponse(self, response: str) -> list:
|
|
||||||
"""Parse AI response into action list"""
|
|
||||||
try:
|
|
||||||
json_start = response.find('{')
|
|
||||||
json_end = response.rfind('}') + 1
|
|
||||||
if json_start == -1 or json_end == 0:
|
|
||||||
raise ValueError("No JSON found in response")
|
|
||||||
json_str = response[json_start:json_end]
|
|
||||||
action_data = json.loads(json_str)
|
|
||||||
if 'actions' not in action_data:
|
|
||||||
raise ValueError("Action response missing 'actions' field")
|
|
||||||
return action_data['actions']
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error parsing action response: {str(e)}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def parseReviewResponse(self, response: str) -> dict:
|
|
||||||
"""Parse AI response into review result"""
|
|
||||||
try:
|
|
||||||
json_start = response.find('{')
|
|
||||||
json_end = response.rfind('}') + 1
|
|
||||||
if json_start == -1 or json_end == 0:
|
|
||||||
raise ValueError("No JSON found in response")
|
|
||||||
json_str = response[json_start:json_end]
|
|
||||||
review = json.loads(json_str)
|
|
||||||
if 'status' not in review:
|
|
||||||
raise ValueError("Review response missing 'status' field")
|
|
||||||
return review
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error parsing review response: {str(e)}")
|
|
||||||
return {'status': 'failed', 'reason': f'Parse error: {str(e)}'}
|
|
||||||
|
|
||||||
# Utility method for file extension
|
|
||||||
def _getFileExtension(self, filename):
|
|
||||||
if '.' in filename:
|
|
||||||
return filename.rsplit('.', 1)[-1].lower()
|
|
||||||
return ''
|
|
||||||
|
|
||||||
# Placeholder methods for AI and prompt logic (to be implemented or injected)
|
|
||||||
async def _callAIWithCircuitBreaker(self, prompt, purpose):
|
|
||||||
raise NotImplementedError("_callAIWithCircuitBreaker must be implemented in the subclass or injected.")
|
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ class HandlingTasks:
|
||||||
prompt = await self.service.callAiTextAdvanced(
|
prompt = await self.service.callAiTextAdvanced(
|
||||||
createTaskPlanningPrompt(self, {
|
createTaskPlanningPrompt(self, {
|
||||||
'user_request': userInput,
|
'user_request': userInput,
|
||||||
'available_documents': self._getAvailableDocuments(workflow),
|
'available_documents': self.service.getAvailableDocuments(workflow),
|
||||||
'workflow_id': workflow.id
|
'workflow_id': workflow.id
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
|
|
@ -55,7 +55,7 @@ class HandlingTasks:
|
||||||
task_step=task_step,
|
task_step=task_step,
|
||||||
workflow=workflow,
|
workflow=workflow,
|
||||||
workflow_id=workflow.id,
|
workflow_id=workflow.id,
|
||||||
available_documents=self._getAvailableDocuments(workflow),
|
available_documents=self.service.getAvailableDocuments(workflow),
|
||||||
previous_results=previous_results or [],
|
previous_results=previous_results or [],
|
||||||
improvements=[],
|
improvements=[],
|
||||||
retry_count=0,
|
retry_count=0,
|
||||||
|
|
@ -205,13 +205,7 @@ class HandlingTasks:
|
||||||
return {'error': str(e)}
|
return {'error': str(e)}
|
||||||
|
|
||||||
# --- Helper and validation methods (unchanged, but can be inlined or made private) ---
|
# --- Helper and validation methods (unchanged, but can be inlined or made private) ---
|
||||||
def _getAvailableDocuments(self, workflow):
|
|
||||||
documents = []
|
|
||||||
for message in workflow.messages:
|
|
||||||
for doc in message.documents:
|
|
||||||
documents.append(doc.filename)
|
|
||||||
return documents
|
|
||||||
|
|
||||||
def _parseTaskPlanResponse(self, response: str) -> dict:
|
def _parseTaskPlanResponse(self, response: str) -> dict:
|
||||||
try:
|
try:
|
||||||
json_start = response.find('{')
|
json_start = response.find('{')
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ from modules.interfaces.interfaceChatObjects import getInterface as getChatObjec
|
||||||
from modules.interfaces.interfaceChatModel import ActionResult
|
from modules.interfaces.interfaceChatModel import ActionResult
|
||||||
from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects
|
from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects
|
||||||
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
|
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
|
||||||
from gateway.modules.chat.documents.documentProcessing import DocumentProcessor
|
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||||
from modules.chat.methodBase import MethodBase
|
from modules.chat.methodBase import MethodBase
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
|
|
@ -37,7 +37,7 @@ class ServiceCenter:
|
||||||
self.interfaceComponent = getComponentObjects(currentUser)
|
self.interfaceComponent = getComponentObjects(currentUser)
|
||||||
self.interfaceApp = getAppObjects(currentUser)
|
self.interfaceApp = getAppObjects(currentUser)
|
||||||
self.interfaceAiCalls = AiCalls()
|
self.interfaceAiCalls = AiCalls()
|
||||||
self.documentProcessor = DocumentProcessor(self)
|
self.documentProcessor = DocumentExtraction(self)
|
||||||
|
|
||||||
# Initialize methods catalog
|
# Initialize methods catalog
|
||||||
self.methods = {}
|
self.methods = {}
|
||||||
|
|
@ -259,6 +259,15 @@ class ServiceCenter:
|
||||||
return filename.split('.')[-1].lower()
|
return filename.split('.')[-1].lower()
|
||||||
return "txt" # Default to text
|
return "txt" # Default to text
|
||||||
|
|
||||||
|
def getFileExtension(self, filename):
|
||||||
|
"""
|
||||||
|
Extract file extension from filename (without dot, lowercased).
|
||||||
|
Returns empty string if no extension is found.
|
||||||
|
"""
|
||||||
|
if '.' in filename:
|
||||||
|
return filename.rsplit('.', 1)[-1].lower()
|
||||||
|
return ''
|
||||||
|
|
||||||
# ===== Functions =====
|
# ===== Functions =====
|
||||||
|
|
||||||
def extractContent(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
def extractContent(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
||||||
|
|
@ -859,6 +868,22 @@ Please provide a clear summary of this message."""
|
||||||
logger.error(f"Error calculating user input size: {str(e)}")
|
logger.error(f"Error calculating user input size: {str(e)}")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
def getAvailableDocuments(self, workflow) -> List[str]:
|
||||||
|
"""
|
||||||
|
Get list of available document filenames from workflow.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
workflow: ChatWorkflow object
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[str]: List of document filenames
|
||||||
|
"""
|
||||||
|
documents = []
|
||||||
|
for message in workflow.messages:
|
||||||
|
for doc in message.documents:
|
||||||
|
documents.append(doc.filename)
|
||||||
|
return documents
|
||||||
|
|
||||||
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
|
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
|
||||||
"""Execute a method action"""
|
"""Execute a method action"""
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,5 @@
|
||||||
|
|
||||||
TODO
|
TODO
|
||||||
- refactory of chat manager
|
|
||||||
|
|
||||||
|
|
||||||
- to put document modules into documents--> creation, extraction -> adapt references over global search
|
|
||||||
|
|
||||||
|
|
||||||
- neutralizer to activate AND put back placeholders to the returned data
|
- neutralizer to activate AND put back placeholders to the returned data
|
||||||
- referenceHandling and authentication for connections in the method actions
|
- referenceHandling and authentication for connections in the method actions
|
||||||
- check methods
|
- check methods
|
||||||
|
|
|
||||||
|
|
@ -177,7 +177,7 @@ class ServiceCenter:
|
||||||
self.tasks: Dict[str, AgentTask] = {}
|
self.tasks: Dict[str, AgentTask] = {}
|
||||||
self.promptManager = AIPromptManager()
|
self.promptManager = AIPromptManager()
|
||||||
self.taskStateManager = TaskStateManager()
|
self.taskStateManager = TaskStateManager()
|
||||||
self.documentProcessor = DocumentProcessor()
|
self.documentProcessor = DocumentExtraction()
|
||||||
|
|
||||||
async def execute_task(self, task: AgentTask) -> None:
|
async def execute_task(self, task: AgentTask) -> None:
|
||||||
"""Execute task with improved error handling and timeout"""
|
"""Execute task with improved error handling and timeout"""
|
||||||
|
|
@ -304,7 +304,7 @@ class DocumentContext(BaseModel):
|
||||||
relevantSections: List[str]
|
relevantSections: List[str]
|
||||||
processingStatus: Dict[str, str]
|
processingStatus: Dict[str, str]
|
||||||
|
|
||||||
class DocumentProcessor:
|
class DocumentExtraction:
|
||||||
"""Processes documents with context awareness"""
|
"""Processes documents with context awareness"""
|
||||||
|
|
||||||
def process_with_context(self, doc: Dict, context: DocumentContext) -> Dict:
|
def process_with_context(self, doc: Dict, context: DocumentContext) -> Dict:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue