refactored document handling
This commit is contained in:
parent
02d34b914e
commit
4e15be8296
9 changed files with 483 additions and 493 deletions
|
|
@ -1,124 +0,0 @@
|
|||
# Contains all document creation functions extracted from managerChat.py
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import Dict, Any, Optional, List, Union
|
||||
from datetime import datetime, UTC
|
||||
|
||||
class DocumentCreator:
|
||||
def __init__(self, service):
|
||||
self.service = service
|
||||
|
||||
def getFileExtension(self, filename: str) -> str:
|
||||
"""Extract file extension from filename"""
|
||||
return self.service.getFileExtension(filename)
|
||||
|
||||
def getMimeType(self, extension: str) -> str:
|
||||
"""Get MIME type based on file extension"""
|
||||
return self.service.getMimeTypeFromExtension(extension)
|
||||
|
||||
def detectMimeTypeFromContent(self, content: Any, filename: str) -> str:
|
||||
"""
|
||||
Detect MIME type from content and filename using service center.
|
||||
Only returns a detected MIME type if it's better than application/octet-stream.
|
||||
"""
|
||||
try:
|
||||
if isinstance(content, str):
|
||||
file_bytes = content.encode('utf-8')
|
||||
elif isinstance(content, dict):
|
||||
file_bytes = json.dumps(content, ensure_ascii=False).encode('utf-8')
|
||||
else:
|
||||
file_bytes = str(content).encode('utf-8')
|
||||
detected_mime_type = self.service.detectContentTypeFromData(file_bytes, filename)
|
||||
if detected_mime_type != "application/octet-stream":
|
||||
return detected_mime_type
|
||||
return "application/octet-stream"
|
||||
except Exception as e:
|
||||
logging.warning(f"Error in MIME type detection for {filename}: {str(e)}")
|
||||
return 'application/octet-stream'
|
||||
|
||||
def detectMimeTypeFromDocument(self, document: Any, filename: str) -> str:
|
||||
"""
|
||||
Detect MIME type from document object using service center.
|
||||
Only returns a detected MIME type if it's better than application/octet-stream.
|
||||
"""
|
||||
try:
|
||||
content = getattr(document, 'content', '')
|
||||
if isinstance(content, str):
|
||||
file_bytes = content.encode('utf-8')
|
||||
else:
|
||||
file_bytes = str(content).encode('utf-8')
|
||||
detected_mime_type = self.service.detectContentTypeFromData(file_bytes, filename)
|
||||
if detected_mime_type != "application/octet-stream":
|
||||
return detected_mime_type
|
||||
return "application/octet-stream"
|
||||
except Exception as e:
|
||||
logging.warning(f"Error in MIME type detection for document {filename}: {str(e)}")
|
||||
return 'application/octet-stream'
|
||||
|
||||
def convertDocumentDataToString(self, document_data: Dict[str, Any], file_extension: str) -> str:
|
||||
"""Convert document data to string content based on file type with enhanced processing"""
|
||||
try:
|
||||
if document_data is None:
|
||||
return ""
|
||||
if isinstance(document_data, str):
|
||||
return document_data
|
||||
if isinstance(document_data, dict):
|
||||
if file_extension == 'json':
|
||||
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
||||
elif file_extension in ['txt', 'md', 'html', 'css', 'js', 'py']:
|
||||
text_fields = ['content', 'text', 'data', 'result', 'summary', 'extracted_content', 'table_data']
|
||||
for field in text_fields:
|
||||
if field in document_data:
|
||||
content = document_data[field]
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
elif isinstance(content, (dict, list)):
|
||||
return json.dumps(content, indent=2, ensure_ascii=False)
|
||||
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
||||
elif file_extension == 'csv':
|
||||
csv_fields = ['table_data', 'csv_data', 'rows', 'data', 'content', 'text']
|
||||
for field in csv_fields:
|
||||
if field in document_data:
|
||||
content = document_data[field]
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
elif isinstance(content, list):
|
||||
if content and isinstance(content[0], (list, dict)):
|
||||
import csv
|
||||
import io
|
||||
output = io.StringIO()
|
||||
if isinstance(content[0], dict):
|
||||
if content:
|
||||
fieldnames = content[0].keys()
|
||||
writer = csv.DictWriter(output, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(content)
|
||||
else:
|
||||
writer = csv.writer(output)
|
||||
writer.writerows(content)
|
||||
return output.getvalue()
|
||||
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
||||
else:
|
||||
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
||||
elif isinstance(document_data, list):
|
||||
if file_extension == 'csv':
|
||||
import csv
|
||||
import io
|
||||
output = io.StringIO()
|
||||
if document_data and isinstance(document_data[0], dict):
|
||||
fieldnames = document_data[0].keys()
|
||||
writer = csv.DictWriter(output, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(document_data)
|
||||
else:
|
||||
writer = csv.writer(output)
|
||||
writer.writerows(document_data)
|
||||
return output.getvalue()
|
||||
else:
|
||||
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
||||
else:
|
||||
return str(document_data)
|
||||
except Exception as e:
|
||||
logging.error(f"Error converting document data to string: {str(e)}")
|
||||
return str(document_data)
|
||||
|
|
@ -9,6 +9,13 @@ from pathlib import Path
|
|||
import xml.etree.ElementTree as ET
|
||||
from bs4 import BeautifulSoup
|
||||
import uuid
|
||||
from .documentUtility import (
|
||||
getFileExtension,
|
||||
getMimeTypeFromExtension,
|
||||
detectMimeTypeFromContent,
|
||||
detectMimeTypeFromData,
|
||||
convertDocumentDataToString
|
||||
)
|
||||
|
||||
from modules.interfaces.interfaceChatModel import (
|
||||
ExtractedContent,
|
||||
|
|
@ -29,7 +36,7 @@ class FileProcessingError(Exception):
|
|||
"""Custom exception for file processing errors."""
|
||||
pass
|
||||
|
||||
class DocumentProcessor:
|
||||
class DocumentExtraction:
|
||||
"""Processor for handling document operations and content extraction."""
|
||||
|
||||
def __init__(self, serviceCenter=None):
|
||||
|
|
@ -133,17 +140,13 @@ class DocumentProcessor:
|
|||
# Decode base64 if needed
|
||||
if base64Encoded:
|
||||
fileData = base64.b64decode(fileData)
|
||||
|
||||
# Detect content type if needed
|
||||
# Use documentUtility for mime type detection
|
||||
if mimeType == "application/octet-stream":
|
||||
mimeType = self._serviceCenter.detectContentTypeFromData(fileData, filename)
|
||||
|
||||
mimeType = detectMimeTypeFromData(fileData, filename, self._serviceCenter)
|
||||
# Process document based on type
|
||||
if mimeType not in self.supportedTypes:
|
||||
# Fallback to binary processing
|
||||
contentItems = await self._processBinary(fileData, filename, mimeType)
|
||||
else:
|
||||
# Process document based on type
|
||||
processor = self.supportedTypes[mimeType]
|
||||
contentItems = await processor(fileData, filename, mimeType)
|
||||
|
||||
|
|
@ -171,13 +174,15 @@ class DocumentProcessor:
|
|||
"""Process text document"""
|
||||
try:
|
||||
content = fileData.decode('utf-8')
|
||||
# Use documentUtility for mime type
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content,
|
||||
metadata=ContentMetadata(
|
||||
size=len(content.encode('utf-8')),
|
||||
pages=1,
|
||||
mimeType="text/plain",
|
||||
mimeType=mime_type,
|
||||
base64Encoded=False
|
||||
)
|
||||
)]
|
||||
|
|
@ -189,13 +194,14 @@ class DocumentProcessor:
|
|||
"""Process CSV document"""
|
||||
try:
|
||||
content = fileData.decode('utf-8')
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content,
|
||||
metadata=ContentMetadata(
|
||||
size=len(content.encode('utf-8')),
|
||||
pages=1,
|
||||
mimeType="text/csv",
|
||||
mimeType=mime_type,
|
||||
base64Encoded=False
|
||||
)
|
||||
)]
|
||||
|
|
@ -207,16 +213,15 @@ class DocumentProcessor:
|
|||
"""Process JSON document"""
|
||||
try:
|
||||
content = fileData.decode('utf-8')
|
||||
# Parse JSON to validate
|
||||
jsonData = json.loads(content)
|
||||
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content,
|
||||
metadata=ContentMetadata(
|
||||
size=len(content.encode('utf-8')),
|
||||
pages=1,
|
||||
mimeType="application/json",
|
||||
mimeType=mime_type,
|
||||
base64Encoded=False
|
||||
)
|
||||
)]
|
||||
|
|
@ -228,13 +233,14 @@ class DocumentProcessor:
|
|||
"""Process XML document"""
|
||||
try:
|
||||
content = fileData.decode('utf-8')
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content,
|
||||
metadata=ContentMetadata(
|
||||
size=len(content.encode('utf-8')),
|
||||
pages=1,
|
||||
mimeType="application/xml",
|
||||
mimeType=mime_type,
|
||||
base64Encoded=False
|
||||
)
|
||||
)]
|
||||
|
|
@ -246,13 +252,14 @@ class DocumentProcessor:
|
|||
"""Process HTML document"""
|
||||
try:
|
||||
content = fileData.decode('utf-8')
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content,
|
||||
metadata=ContentMetadata(
|
||||
size=len(content.encode('utf-8')),
|
||||
pages=1,
|
||||
mimeType="text/html",
|
||||
mimeType=mime_type,
|
||||
base64Encoded=False
|
||||
)
|
||||
)]
|
||||
|
|
@ -264,15 +271,14 @@ class DocumentProcessor:
|
|||
"""Process SVG document"""
|
||||
try:
|
||||
content = fileData.decode('utf-8')
|
||||
# Check if it's actually SVG
|
||||
isSvg = "<svg" in content.lower()
|
||||
|
||||
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter)
|
||||
return [ContentItem(
|
||||
label="main",
|
||||
data=content if isSvg else None,
|
||||
metadata=ContentMetadata(
|
||||
size=len(content.encode('utf-8')),
|
||||
mimeType="image/svg+xml",
|
||||
mimeType=mime_type,
|
||||
base64Encoded=False,
|
||||
error=None if isSvg else "Invalid SVG content"
|
||||
)
|
||||
163
modules/chat/documents/documentGeneration.py
Normal file
163
modules/chat/documents/documentGeneration.py
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
from datetime import datetime, UTC
|
||||
from .documentUtility import (
|
||||
getFileExtension,
|
||||
getMimeTypeFromExtension,
|
||||
detectMimeTypeFromContent,
|
||||
detectMimeTypeFromData,
|
||||
convertDocumentDataToString
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DocumentGenerator:
|
||||
def __init__(self, service):
|
||||
self.service = service
|
||||
|
||||
def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Main function to process documents from an action result.
|
||||
Returns a list of processed document dictionaries.
|
||||
"""
|
||||
try:
|
||||
documents = action_result.data.get("documents", [])
|
||||
processed_documents = []
|
||||
for doc in documents:
|
||||
processed_doc = self.processSingleDocument(doc, action)
|
||||
if processed_doc:
|
||||
processed_documents.append(processed_doc)
|
||||
return processed_documents
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing action result documents: {str(e)}")
|
||||
return []
|
||||
|
||||
def processSingleDocument(self, doc: Any, action) -> Optional[Dict[str, Any]]:
|
||||
"""Process a single document from action result"""
|
||||
try:
|
||||
if hasattr(doc, 'filename') and doc.filename:
|
||||
# Document object with filename attribute
|
||||
mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
|
||||
if mime_type == "application/octet-stream":
|
||||
content = getattr(doc, 'content', '')
|
||||
mime_type = detectMimeTypeFromContent(content, doc.filename, self.service)
|
||||
return {
|
||||
'filename': doc.filename,
|
||||
'fileSize': getattr(doc, 'fileSize', 0),
|
||||
'mimeType': mime_type,
|
||||
'content': getattr(doc, 'content', ''),
|
||||
'document': doc
|
||||
}
|
||||
elif isinstance(doc, dict):
|
||||
# Dictionary format document
|
||||
filename = doc.get('documentName', doc.get('filename', \
|
||||
f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"))
|
||||
fileSize = doc.get('fileSize', len(str(doc.get('documentData', ''))))
|
||||
mimeType = doc.get('mimeType', 'application/octet-stream')
|
||||
if mimeType == "application/octet-stream":
|
||||
document_data = doc.get('documentData', '')
|
||||
mimeType = detectMimeTypeFromContent(document_data, filename, self.service)
|
||||
return {
|
||||
'filename': filename,
|
||||
'fileSize': fileSize,
|
||||
'mimeType': mimeType,
|
||||
'content': doc.get('documentData', ''),
|
||||
'document': doc
|
||||
}
|
||||
else:
|
||||
# Unknown document type
|
||||
logger.warning(f"Unknown document type for action {action.execMethod}.{action.execAction}: {type(doc)}")
|
||||
filename = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
|
||||
mimeType = detectMimeTypeFromContent(doc, filename, self.service)
|
||||
return {
|
||||
'filename': filename,
|
||||
'fileSize': 0,
|
||||
'mimeType': mimeType,
|
||||
'content': str(doc),
|
||||
'document': doc
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing single document: {str(e)}")
|
||||
return None
|
||||
|
||||
def createDocumentsFromActionResult(self, action_result, action, workflow) -> List[Any]:
|
||||
"""
|
||||
Create actual document objects from action result and store them in the system.
|
||||
Returns a list of created document objects.
|
||||
"""
|
||||
try:
|
||||
processed_docs = self.processActionResultDocuments(action_result, action, workflow)
|
||||
created_documents = []
|
||||
for doc_data in processed_docs:
|
||||
try:
|
||||
document_name = doc_data['filename']
|
||||
document_data = doc_data['content']
|
||||
mime_type = doc_data['mimeType']
|
||||
# Convert document data to string content
|
||||
content = convertDocumentDataToString(document_data, getFileExtension(document_name))
|
||||
# Skip empty or minimal content
|
||||
minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
|
||||
if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
|
||||
logger.warning(f"Empty or minimal content for document {document_name}, skipping")
|
||||
continue
|
||||
# Create file in system
|
||||
file_id = self.service.createFile(
|
||||
fileName=document_name,
|
||||
mimeType=mime_type,
|
||||
content=content,
|
||||
base64encoded=False
|
||||
)
|
||||
if not file_id:
|
||||
logger.error(f"Failed to create file for document {document_name}")
|
||||
continue
|
||||
# Create document object
|
||||
document = self.service.createDocument(
|
||||
fileName=document_name,
|
||||
mimeType=mime_type,
|
||||
content=content,
|
||||
base64encoded=False
|
||||
)
|
||||
if document:
|
||||
created_documents.append(document)
|
||||
logger.info(f"Created document: {document_name} with file ID: {file_id} and MIME type: {mime_type}")
|
||||
else:
|
||||
logger.error(f"Failed to create ChatDocument object for {document_name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating document {doc_data.get('filename', 'unknown')}: {str(e)}")
|
||||
continue
|
||||
return created_documents
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating documents from action result: {str(e)}")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def get_delivered_files_and_formats(documents):
|
||||
delivered_files = []
|
||||
delivered_formats = []
|
||||
for doc in documents:
|
||||
if hasattr(doc, 'filename'):
|
||||
delivered_files.append(doc.filename)
|
||||
file_extension = getFileExtension(doc.filename)
|
||||
mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
|
||||
delivered_formats.append({
|
||||
'filename': doc.filename,
|
||||
'extension': file_extension,
|
||||
'mimeType': mime_type
|
||||
})
|
||||
elif isinstance(doc, dict) and 'filename' in doc:
|
||||
delivered_files.append(doc['filename'])
|
||||
file_extension = getFileExtension(doc['filename'])
|
||||
mime_type = doc.get('mimeType', 'application/octet-stream')
|
||||
delivered_formats.append({
|
||||
'filename': doc['filename'],
|
||||
'extension': file_extension,
|
||||
'mimeType': mime_type
|
||||
})
|
||||
else:
|
||||
delivered_files.append(f"document_{len(delivered_files)}")
|
||||
delivered_formats.append({
|
||||
'filename': f"document_{len(delivered_files)}",
|
||||
'extension': 'unknown',
|
||||
'mimeType': 'application/octet-stream'
|
||||
})
|
||||
return delivered_files, delivered_formats
|
||||
132
modules/chat/documents/documentUtility.py
Normal file
132
modules/chat/documents/documentUtility.py
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
import json
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def getFileExtension(filename: str) -> str:
|
||||
"""Extract file extension from filename"""
|
||||
if '.' in filename:
|
||||
return filename.rsplit('.', 1)[-1].lower()
|
||||
return ''
|
||||
|
||||
def getMimeTypeFromExtension(extension: str, service=None) -> str:
|
||||
"""Get MIME type based on file extension. Optionally use a service for mapping."""
|
||||
if service:
|
||||
return service.getMimeTypeFromExtension(extension)
|
||||
# Fallback mapping
|
||||
mapping = {
|
||||
'txt': 'text/plain',
|
||||
'md': 'text/markdown',
|
||||
'html': 'text/html',
|
||||
'css': 'text/css',
|
||||
'js': 'application/javascript',
|
||||
'json': 'application/json',
|
||||
'csv': 'text/csv',
|
||||
'xml': 'application/xml',
|
||||
'py': 'text/x-python',
|
||||
'pdf': 'application/pdf',
|
||||
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'png': 'image/png',
|
||||
'jpg': 'image/jpeg',
|
||||
'jpeg': 'image/jpeg',
|
||||
'gif': 'image/gif',
|
||||
'svg': 'image/svg+xml',
|
||||
}
|
||||
return mapping.get(extension.lower(), 'application/octet-stream')
|
||||
|
||||
def detectMimeTypeFromData(file_bytes: bytes, filename: str, service=None) -> str:
|
||||
"""Detect MIME type from file bytes and filename using a service if provided."""
|
||||
try:
|
||||
if service:
|
||||
detected = service.detectContentTypeFromData(file_bytes, filename)
|
||||
if detected and detected != 'application/octet-stream':
|
||||
return detected
|
||||
# Fallback: guess from extension
|
||||
ext = getFileExtension(filename)
|
||||
return getMimeTypeFromExtension(ext, service)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error in MIME type detection for {filename}: {str(e)}")
|
||||
return 'application/octet-stream'
|
||||
|
||||
def detectMimeTypeFromContent(content: Any, filename: str, service=None) -> str:
|
||||
"""Detect MIME type from content and filename using a service if provided."""
|
||||
try:
|
||||
if isinstance(content, str):
|
||||
file_bytes = content.encode('utf-8')
|
||||
elif isinstance(content, dict):
|
||||
file_bytes = json.dumps(content, ensure_ascii=False).encode('utf-8')
|
||||
else:
|
||||
file_bytes = str(content).encode('utf-8')
|
||||
return detectMimeTypeFromData(file_bytes, filename, service)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error in MIME type detection for {filename}: {str(e)}")
|
||||
return 'application/octet-stream'
|
||||
|
||||
def convertDocumentDataToString(document_data: Any, file_extension: str) -> str:
|
||||
"""Convert document data to string content based on file type with enhanced processing."""
|
||||
try:
|
||||
if document_data is None:
|
||||
return ""
|
||||
if isinstance(document_data, str):
|
||||
return document_data
|
||||
if isinstance(document_data, dict):
|
||||
if file_extension == 'json':
|
||||
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
||||
elif file_extension in ['txt', 'md', 'html', 'css', 'js', 'py']:
|
||||
text_fields = ['content', 'text', 'data', 'result', 'summary', 'extracted_content', 'table_data']
|
||||
for field in text_fields:
|
||||
if field in document_data:
|
||||
content = document_data[field]
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
elif isinstance(content, (dict, list)):
|
||||
return json.dumps(content, indent=2, ensure_ascii=False)
|
||||
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
||||
elif file_extension == 'csv':
|
||||
csv_fields = ['table_data', 'csv_data', 'rows', 'data', 'content', 'text']
|
||||
for field in csv_fields:
|
||||
if field in document_data:
|
||||
content = document_data[field]
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
elif isinstance(content, list):
|
||||
if content and isinstance(content[0], (list, dict)):
|
||||
import csv
|
||||
import io
|
||||
output = io.StringIO()
|
||||
if isinstance(content[0], dict):
|
||||
if content:
|
||||
fieldnames = content[0].keys()
|
||||
writer = csv.DictWriter(output, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(content)
|
||||
else:
|
||||
writer = csv.writer(output)
|
||||
writer.writerows(content)
|
||||
return output.getvalue()
|
||||
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
||||
else:
|
||||
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
||||
elif isinstance(document_data, list):
|
||||
if file_extension == 'csv':
|
||||
import csv
|
||||
import io
|
||||
output = io.StringIO()
|
||||
if document_data and isinstance(document_data[0], dict):
|
||||
fieldnames = document_data[0].keys()
|
||||
writer = csv.DictWriter(output, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(document_data)
|
||||
else:
|
||||
writer = csv.writer(output)
|
||||
writer.writerows(document_data)
|
||||
return output.getvalue()
|
||||
else:
|
||||
return json.dumps(document_data, indent=2, ensure_ascii=False)
|
||||
else:
|
||||
return str(document_data)
|
||||
except Exception as e:
|
||||
logger.error(f"Error converting document data to string: {str(e)}")
|
||||
return str(document_data)
|
||||
|
|
@ -7,8 +7,8 @@ import time
|
|||
from typing import Dict, Any, Optional, List, Union
|
||||
from datetime import datetime, UTC
|
||||
from modules.interfaces.interfaceChatModel import ReviewResult, ActionResult
|
||||
from modules.chat.documents.documentCreation import DocumentCreator
|
||||
from .promptFactory import createResultReviewPrompt
|
||||
from modules.chat.documents.documentGeneration import DocumentGenerator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -16,12 +16,72 @@ class HandlingActions:
|
|||
def __init__(self, service, chatInterface):
|
||||
self.service = service
|
||||
self.chatInterface = chatInterface
|
||||
self.documentCreator = DocumentCreator(self.service)
|
||||
self.documentGenerator = DocumentGenerator(service)
|
||||
|
||||
async def executeSingleAction(self, action, workflow):
|
||||
"""Execute a single action and return ActionResult with enhanced document processing"""
|
||||
try:
|
||||
enhanced_parameters = action.execParameters.copy()
|
||||
if action.expectedDocumentFormats:
|
||||
enhanced_parameters['expectedDocumentFormats'] = action.expectedDocumentFormats
|
||||
logger.info(f"Action {action.execMethod}.{action.execAction} expects formats: {action.expectedDocumentFormats}")
|
||||
result = await self.service.executeAction(
|
||||
methodName=action.execMethod,
|
||||
actionName=action.execAction,
|
||||
parameters=enhanced_parameters
|
||||
)
|
||||
result_label = action.execResultLabel
|
||||
if result.success:
|
||||
action.setSuccess()
|
||||
action.result = result.data.get("result", "")
|
||||
action.execResultLabel = result_label
|
||||
await self.createActionMessage(action, result, workflow, result_label)
|
||||
else:
|
||||
action.setError(result.error or "Action execution failed")
|
||||
processed_documents = self.documentGenerator.processActionResultDocuments(result, action, workflow)
|
||||
return ActionResult(
|
||||
success=result.success,
|
||||
data={
|
||||
"result": result.data.get("result", ""),
|
||||
"documents": processed_documents,
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"resultLabel": result_label
|
||||
},
|
||||
metadata={
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"resultLabel": result_label
|
||||
},
|
||||
validation=[],
|
||||
error=result.error or ""
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing single action: {str(e)}")
|
||||
action.setError(str(e))
|
||||
return ActionResult(
|
||||
success=False,
|
||||
data={
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"documents": []
|
||||
},
|
||||
metadata={
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction
|
||||
},
|
||||
validation=[],
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def validateActionResult(self, action_result, action, context) -> dict:
|
||||
try:
|
||||
prompt = self._createGenericValidationPrompt(action_result, action, context)
|
||||
response = await self._callAIWithCircuitBreaker(prompt, "action_validation")
|
||||
response = await self.service.callAiTextAdvanced(prompt, "action_validation")
|
||||
validation = self._parseValidationResponse(response)
|
||||
validation['action_id'] = action.id
|
||||
validation['action_method'] = action.execMethod
|
||||
|
|
@ -41,6 +101,73 @@ class HandlingActions:
|
|||
'result_label': action.execResultLabel
|
||||
}
|
||||
|
||||
async def createActionMessage(self, action, result, workflow, result_label=None):
|
||||
"""Create and store a message for the action result in the workflow with enhanced document processing"""
|
||||
try:
|
||||
if result_label is None:
|
||||
result_label = action.execResultLabel
|
||||
message_data = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": f"Executed action {action.execMethod}.{action.execAction}",
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": datetime.now(UTC).isoformat(),
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"documentsLabel": result_label,
|
||||
"documents": []
|
||||
}
|
||||
# Use the local createDocumentsFromActionResult method
|
||||
created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow)
|
||||
message_data["documents"] = created_documents
|
||||
message = self.chatInterface.createWorkflowMessage(message_data)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Created action message for {action.execMethod}.{action.execAction} with {len(created_documents)} documents")
|
||||
logger.debug(f"WORKFLOW STATE after createActionMessage: id={id(workflow)}, message_count={len(workflow.messages)}")
|
||||
for idx, msg in enumerate(workflow.messages):
|
||||
label = getattr(msg, 'documentsLabel', None)
|
||||
docs = getattr(msg, 'documents', None)
|
||||
logger.debug(f" Message {idx}: label='{label}', documents_count={len(docs) if docs else 0}")
|
||||
else:
|
||||
logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating action message: {str(e)}")
|
||||
|
||||
def parseActionResponse(self, response: str) -> list:
|
||||
try:
|
||||
json_start = response.find('{')
|
||||
json_end = response.rfind('}') + 1
|
||||
if json_start == -1 or json_end == 0:
|
||||
raise ValueError("No JSON found in response")
|
||||
json_str = response[json_start:json_end]
|
||||
action_data = json.loads(json_str)
|
||||
if 'actions' not in action_data:
|
||||
raise ValueError("Action response missing 'actions' field")
|
||||
return action_data['actions']
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing action response: {str(e)}")
|
||||
return []
|
||||
|
||||
def parseReviewResponse(self, response: str) -> dict:
|
||||
try:
|
||||
json_start = response.find('{')
|
||||
json_end = response.rfind('}') + 1
|
||||
if json_start == -1 or json_end == 0:
|
||||
raise ValueError("No JSON found in response")
|
||||
json_str = response[json_start:json_end]
|
||||
review = json.loads(json_str)
|
||||
if 'status' not in review:
|
||||
raise ValueError("Review response missing 'status' field")
|
||||
return review
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing review response: {str(e)}")
|
||||
return {'status': 'failed', 'reason': f'Parse error: {str(e)}'}
|
||||
|
||||
# Internal helper methods
|
||||
|
||||
def _createGenericValidationPrompt(self, action_result, action, context) -> str:
|
||||
success = action_result.success
|
||||
result_data = action_result.data
|
||||
|
|
@ -54,35 +181,9 @@ class HandlingActions:
|
|||
expected_document_formats = action.expectedDocumentFormats or []
|
||||
actual_result_label = result_data.get("resultLabel", "") if isinstance(result_data, dict) else ""
|
||||
result_label_match = actual_result_label == expected_result_label
|
||||
delivered_files = []
|
||||
delivered_formats = []
|
||||
# Use DocumentGenerator for file/format extraction
|
||||
delivered_files, delivered_formats = DocumentGenerator.get_delivered_files_and_formats(documents)
|
||||
content_items = []
|
||||
for doc in documents:
|
||||
if hasattr(doc, 'filename'):
|
||||
delivered_files.append(doc.filename)
|
||||
file_extension = self._getFileExtension(doc.filename)
|
||||
mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
|
||||
delivered_formats.append({
|
||||
'filename': doc.filename,
|
||||
'extension': file_extension,
|
||||
'mimeType': mime_type
|
||||
})
|
||||
elif isinstance(doc, dict) and 'filename' in doc:
|
||||
delivered_files.append(doc['filename'])
|
||||
file_extension = self._getFileExtension(doc['filename'])
|
||||
mime_type = doc.get('mimeType', 'application/octet-stream')
|
||||
delivered_formats.append({
|
||||
'filename': doc['filename'],
|
||||
'extension': file_extension,
|
||||
'mimeType': mime_type
|
||||
})
|
||||
else:
|
||||
delivered_files.append(f"document_{len(delivered_files)}")
|
||||
delivered_formats.append({
|
||||
'filename': f"document_{len(delivered_files)}",
|
||||
'extension': 'unknown',
|
||||
'mimeType': 'application/octet-stream'
|
||||
})
|
||||
if isinstance(result_data, dict):
|
||||
if 'extractedContent' in result_data:
|
||||
extracted_content = result_data['extractedContent']
|
||||
|
|
@ -128,305 +229,4 @@ class HandlingActions:
|
|||
'quality_score': 5,
|
||||
'missing_elements': [],
|
||||
'suggested_retry_approach': ''
|
||||
}
|
||||
|
||||
async def executeSingleAction(self, action, workflow):
|
||||
"""Execute a single action and return ActionResult with enhanced document processing"""
|
||||
try:
|
||||
# Use DocumentCreator methods
|
||||
# Enhance parameters with expected document formats if specified
|
||||
enhanced_parameters = action.execParameters.copy()
|
||||
if action.expectedDocumentFormats:
|
||||
enhanced_parameters['expectedDocumentFormats'] = action.expectedDocumentFormats
|
||||
logger.info(f"Action {action.execMethod}.{action.execAction} expects formats: {action.expectedDocumentFormats}")
|
||||
result = await self.service.executeAction(
|
||||
methodName=action.execMethod,
|
||||
actionName=action.execAction,
|
||||
parameters=enhanced_parameters
|
||||
)
|
||||
result_label = action.execResultLabel
|
||||
if result.success:
|
||||
action.setSuccess()
|
||||
action.result = result.data.get("result", "")
|
||||
action.execResultLabel = result_label
|
||||
await self.createActionMessage(action, result, workflow, result_label)
|
||||
else:
|
||||
action.setError(result.error or "Action execution failed")
|
||||
documents = result.data.get("documents", [])
|
||||
processed_documents = []
|
||||
for doc in documents:
|
||||
if hasattr(doc, 'filename') and doc.filename:
|
||||
mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
|
||||
if mime_type == "application/octet-stream":
|
||||
mime_type = self.documentCreator.detectMimeTypeFromDocument(doc, doc.filename)
|
||||
processed_documents.append({
|
||||
'filename': doc.filename,
|
||||
'fileSize': getattr(doc, 'fileSize', 0),
|
||||
'mimeType': mime_type,
|
||||
'content': getattr(doc, 'content', ''),
|
||||
'document': doc
|
||||
})
|
||||
elif isinstance(doc, dict):
|
||||
filename = doc.get('documentName', doc.get('filename', f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"))
|
||||
fileSize = doc.get('fileSize', len(str(doc.get('documentData', ''))))
|
||||
mimeType = doc.get('mimeType', 'application/octet-stream')
|
||||
if mimeType == "application/octet-stream":
|
||||
document_data = doc.get('documentData', '')
|
||||
mimeType = self.documentCreator.detectMimeTypeFromContent(document_data, filename)
|
||||
processed_documents.append({
|
||||
'filename': filename,
|
||||
'fileSize': fileSize,
|
||||
'mimeType': mimeType,
|
||||
'content': doc.get('documentData', ''),
|
||||
'document': doc
|
||||
})
|
||||
else:
|
||||
logger.warning(f"Unknown document type for action {action.execMethod}.{action.execAction}: {type(doc)}")
|
||||
filename = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
|
||||
mimeType = 'application/octet-stream'
|
||||
mimeType = self.documentCreator.detectMimeTypeFromContent(doc, filename)
|
||||
processed_documents.append({
|
||||
'filename': filename,
|
||||
'fileSize': 0,
|
||||
'mimeType': mimeType,
|
||||
'content': str(doc),
|
||||
'document': doc
|
||||
})
|
||||
return ActionResult(
|
||||
success=result.success,
|
||||
data={
|
||||
"result": result.data.get("result", ""),
|
||||
"documents": processed_documents,
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"resultLabel": result_label
|
||||
},
|
||||
metadata={
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"resultLabel": result_label
|
||||
},
|
||||
validation=[],
|
||||
error=result.error or ""
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing single action: {str(e)}")
|
||||
action.setError(str(e))
|
||||
return ActionResult(
|
||||
success=False,
|
||||
data={
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"documents": []
|
||||
},
|
||||
metadata={
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction
|
||||
},
|
||||
validation=[],
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def createActionMessage(self, action, result, workflow, result_label=None):
|
||||
"""Create and store a message for the action result in the workflow with enhanced document processing"""
|
||||
try:
|
||||
# Use DocumentCreator methods
|
||||
result_data = result.data if hasattr(result, 'data') else {}
|
||||
documents_data = result_data.get("documents", [])
|
||||
if result_label is None:
|
||||
result_label = action.execResultLabel
|
||||
message_data = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": f"Executed action {action.execMethod}.{action.execAction}",
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": datetime.now(UTC).isoformat(),
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"documentsLabel": result_label, # Use intent label from action definition
|
||||
"documents": []
|
||||
}
|
||||
if documents_data:
|
||||
processed_documents = []
|
||||
for doc_data in documents_data:
|
||||
try:
|
||||
if isinstance(doc_data, dict):
|
||||
document_name = doc_data.get("documentName", doc_data.get("filename", f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"))
|
||||
document_data = doc_data.get("documentData", {})
|
||||
file_size = doc_data.get("fileSize", 0)
|
||||
mime_type = doc_data.get("mimeType", "application/octet-stream")
|
||||
elif hasattr(doc_data, 'filename'):
|
||||
document_name = doc_data.filename
|
||||
document_data = getattr(doc_data, 'content', {})
|
||||
file_size = getattr(doc_data, 'fileSize', 0)
|
||||
mime_type = getattr(doc_data, 'mimeType', "application/octet-stream")
|
||||
else:
|
||||
document_name = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
|
||||
document_data = doc_data
|
||||
file_size = len(str(doc_data))
|
||||
mime_type = "application/octet-stream"
|
||||
if mime_type == "application/octet-stream":
|
||||
mime_type = self.documentCreator.detectMimeTypeFromContent(document_data, document_name)
|
||||
content = self.documentCreator.convertDocumentDataToString(document_data, self.documentCreator.getFileExtension(document_name))
|
||||
minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
|
||||
if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
|
||||
logger.warning(f"Empty or minimal content for document {document_name}, skipping")
|
||||
continue
|
||||
file_id = self.service.createFile(
|
||||
fileName=document_name,
|
||||
mimeType=mime_type,
|
||||
content=content,
|
||||
base64encoded=False
|
||||
)
|
||||
if not file_id:
|
||||
logger.error(f"Failed to create file for document {document_name}")
|
||||
continue
|
||||
document = self.service.createDocument(
|
||||
fileName=document_name,
|
||||
mimeType=mime_type,
|
||||
content=content,
|
||||
base64encoded=False
|
||||
)
|
||||
if document:
|
||||
processed_documents.append(document)
|
||||
logger.info(f"Created document: {document_name} with file ID: {file_id} and MIME type: {mime_type}")
|
||||
else:
|
||||
logger.error(f"Failed to create ChatDocument object for {document_name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing document {getattr(doc_data, 'documentName', 'unknown') if isinstance(doc_data, dict) else 'unknown'}: {str(e)}")
|
||||
continue
|
||||
message_data["documents"] = processed_documents
|
||||
message = self.chatInterface.createWorkflowMessage(message_data)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Created action message for {action.execMethod}.{action.execAction} with {len(message_data.get('documents', []))} documents")
|
||||
logger.debug(f"WORKFLOW STATE after createActionMessage: id={id(workflow)}, message_count={len(workflow.messages)}")
|
||||
for idx, msg in enumerate(workflow.messages):
|
||||
label = getattr(msg, 'documentsLabel', None)
|
||||
docs = getattr(msg, 'documents', None)
|
||||
logger.debug(f" Message {idx}: label='{label}', documents_count={len(docs) if docs else 0}")
|
||||
else:
|
||||
logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating action message: {str(e)}")
|
||||
|
||||
async def performTaskReview(self, review_context) -> 'ReviewResult':
|
||||
"""Perform AI-based task review with enhanced retry logic"""
|
||||
try:
|
||||
# Prepare prompt for result review
|
||||
prompt = await createResultReviewPrompt(self, review_context)
|
||||
|
||||
# Call AI with circuit breaker
|
||||
response = await self._callAIWithCircuitBreaker(prompt, "result_review")
|
||||
|
||||
# Parse review result
|
||||
review_dict = self._parseReviewResponse(response)
|
||||
|
||||
# Add default values for missing fields
|
||||
review_dict.setdefault('status', 'unknown')
|
||||
review_dict.setdefault('reason', 'No reason provided')
|
||||
review_dict.setdefault('quality_score', 5)
|
||||
|
||||
# Enhanced retry logic based on result quality
|
||||
if review_dict.get('status') == 'retry':
|
||||
# Analyze the specific issues for better retry guidance
|
||||
action_results = review_context.action_results or []
|
||||
if action_results:
|
||||
# Check for common issues that warrant retry
|
||||
# Only consider empty results a problem if there are no documents produced
|
||||
has_empty_results = any(
|
||||
not result.data.get('result', '').strip() and
|
||||
not result.data.get('documents') and
|
||||
not result.data.get('documents')
|
||||
for result in action_results
|
||||
if result.success
|
||||
)
|
||||
|
||||
has_incomplete_metadata = any(
|
||||
any(doc.get('filename') == 'unknown' for doc in result.data.get('documents', []) or [])
|
||||
for result in action_results
|
||||
if result.success
|
||||
)
|
||||
|
||||
if has_empty_results:
|
||||
review_dict['improvements'] = (review_dict.get('improvements', '') +
|
||||
" Ensure the document extraction returns actual content, not empty results. " +
|
||||
"Check if the AI prompt is specific enough to extract meaningful data.")
|
||||
|
||||
if has_incomplete_metadata:
|
||||
review_dict['improvements'] = (review_dict.get('improvements', '') +
|
||||
" Ensure proper document metadata is extracted including filename, size, and mime type. " +
|
||||
"The document processing should provide complete file information.")
|
||||
|
||||
# If we have specific issues, adjust quality score
|
||||
if has_empty_results or has_incomplete_metadata:
|
||||
review_dict['quality_score'] = max(1, review_dict.get('quality_score', 5) - 2)
|
||||
|
||||
# Create ReviewResult model
|
||||
return ReviewResult(
|
||||
status=review_dict.get('status', 'unknown'),
|
||||
reason=review_dict.get('reason', 'No reason provided'),
|
||||
improvements=review_dict.get('improvements', []),
|
||||
quality_score=review_dict.get('quality_score', 5),
|
||||
missing_outputs=review_dict.get('missing_outputs', []),
|
||||
met_criteria=review_dict.get('met_criteria', []),
|
||||
unmet_criteria=review_dict.get('unmet_criteria', []),
|
||||
confidence=review_dict.get('confidence', 0.5)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error performing task review: {str(e)}")
|
||||
return ReviewResult(
|
||||
status='success', # Default to success to avoid blocking workflow
|
||||
reason=f'Review failed: {str(e)}',
|
||||
quality_score=5,
|
||||
confidence=0.5
|
||||
)
|
||||
|
||||
def parseActionResponse(self, response: str) -> list:
|
||||
"""Parse AI response into action list"""
|
||||
try:
|
||||
json_start = response.find('{')
|
||||
json_end = response.rfind('}') + 1
|
||||
if json_start == -1 or json_end == 0:
|
||||
raise ValueError("No JSON found in response")
|
||||
json_str = response[json_start:json_end]
|
||||
action_data = json.loads(json_str)
|
||||
if 'actions' not in action_data:
|
||||
raise ValueError("Action response missing 'actions' field")
|
||||
return action_data['actions']
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing action response: {str(e)}")
|
||||
return []
|
||||
|
||||
def parseReviewResponse(self, response: str) -> dict:
|
||||
"""Parse AI response into review result"""
|
||||
try:
|
||||
json_start = response.find('{')
|
||||
json_end = response.rfind('}') + 1
|
||||
if json_start == -1 or json_end == 0:
|
||||
raise ValueError("No JSON found in response")
|
||||
json_str = response[json_start:json_end]
|
||||
review = json.loads(json_str)
|
||||
if 'status' not in review:
|
||||
raise ValueError("Review response missing 'status' field")
|
||||
return review
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing review response: {str(e)}")
|
||||
return {'status': 'failed', 'reason': f'Parse error: {str(e)}'}
|
||||
|
||||
# Utility method for file extension
|
||||
def _getFileExtension(self, filename):
|
||||
if '.' in filename:
|
||||
return filename.rsplit('.', 1)[-1].lower()
|
||||
return ''
|
||||
|
||||
# Placeholder methods for AI and prompt logic (to be implemented or injected)
|
||||
async def _callAIWithCircuitBreaker(self, prompt, purpose):
|
||||
raise NotImplementedError("_callAIWithCircuitBreaker must be implemented in the subclass or injected.")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ class HandlingTasks:
|
|||
prompt = await self.service.callAiTextAdvanced(
|
||||
createTaskPlanningPrompt(self, {
|
||||
'user_request': userInput,
|
||||
'available_documents': self._getAvailableDocuments(workflow),
|
||||
'available_documents': self.service.getAvailableDocuments(workflow),
|
||||
'workflow_id': workflow.id
|
||||
})
|
||||
)
|
||||
|
|
@ -55,7 +55,7 @@ class HandlingTasks:
|
|||
task_step=task_step,
|
||||
workflow=workflow,
|
||||
workflow_id=workflow.id,
|
||||
available_documents=self._getAvailableDocuments(workflow),
|
||||
available_documents=self.service.getAvailableDocuments(workflow),
|
||||
previous_results=previous_results or [],
|
||||
improvements=[],
|
||||
retry_count=0,
|
||||
|
|
@ -205,13 +205,7 @@ class HandlingTasks:
|
|||
return {'error': str(e)}
|
||||
|
||||
# --- Helper and validation methods (unchanged, but can be inlined or made private) ---
|
||||
def _getAvailableDocuments(self, workflow):
|
||||
documents = []
|
||||
for message in workflow.messages:
|
||||
for doc in message.documents:
|
||||
documents.append(doc.filename)
|
||||
return documents
|
||||
|
||||
|
||||
def _parseTaskPlanResponse(self, response: str) -> dict:
|
||||
try:
|
||||
json_start = response.find('{')
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ from modules.interfaces.interfaceChatObjects import getInterface as getChatObjec
|
|||
from modules.interfaces.interfaceChatModel import ActionResult
|
||||
from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects
|
||||
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
|
||||
from gateway.modules.chat.documents.documentProcessing import DocumentProcessor
|
||||
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||
from modules.chat.methodBase import MethodBase
|
||||
import uuid
|
||||
|
||||
|
|
@ -37,7 +37,7 @@ class ServiceCenter:
|
|||
self.interfaceComponent = getComponentObjects(currentUser)
|
||||
self.interfaceApp = getAppObjects(currentUser)
|
||||
self.interfaceAiCalls = AiCalls()
|
||||
self.documentProcessor = DocumentProcessor(self)
|
||||
self.documentProcessor = DocumentExtraction(self)
|
||||
|
||||
# Initialize methods catalog
|
||||
self.methods = {}
|
||||
|
|
@ -259,6 +259,15 @@ class ServiceCenter:
|
|||
return filename.split('.')[-1].lower()
|
||||
return "txt" # Default to text
|
||||
|
||||
def getFileExtension(self, filename):
|
||||
"""
|
||||
Extract file extension from filename (without dot, lowercased).
|
||||
Returns empty string if no extension is found.
|
||||
"""
|
||||
if '.' in filename:
|
||||
return filename.rsplit('.', 1)[-1].lower()
|
||||
return ''
|
||||
|
||||
# ===== Functions =====
|
||||
|
||||
def extractContent(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
||||
|
|
@ -859,6 +868,22 @@ Please provide a clear summary of this message."""
|
|||
logger.error(f"Error calculating user input size: {str(e)}")
|
||||
return 0
|
||||
|
||||
def getAvailableDocuments(self, workflow) -> List[str]:
|
||||
"""
|
||||
Get list of available document filenames from workflow.
|
||||
|
||||
Args:
|
||||
workflow: ChatWorkflow object
|
||||
|
||||
Returns:
|
||||
List[str]: List of document filenames
|
||||
"""
|
||||
documents = []
|
||||
for message in workflow.messages:
|
||||
for doc in message.documents:
|
||||
documents.append(doc.filename)
|
||||
return documents
|
||||
|
||||
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""Execute a method action"""
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -1,11 +1,5 @@
|
|||
|
||||
TODO
|
||||
- refactory of chat manager
|
||||
|
||||
|
||||
- to put document modules into documents--> creation, extraction -> adapt references over global search
|
||||
|
||||
|
||||
- neutralizer to activate AND put back placeholders to the returned data
|
||||
- referenceHandling and authentication for connections in the method actions
|
||||
- check methods
|
||||
|
|
|
|||
|
|
@ -177,7 +177,7 @@ class ServiceCenter:
|
|||
self.tasks: Dict[str, AgentTask] = {}
|
||||
self.promptManager = AIPromptManager()
|
||||
self.taskStateManager = TaskStateManager()
|
||||
self.documentProcessor = DocumentProcessor()
|
||||
self.documentProcessor = DocumentExtraction()
|
||||
|
||||
async def execute_task(self, task: AgentTask) -> None:
|
||||
"""Execute task with improved error handling and timeout"""
|
||||
|
|
@ -304,7 +304,7 @@ class DocumentContext(BaseModel):
|
|||
relevantSections: List[str]
|
||||
processingStatus: Dict[str, str]
|
||||
|
||||
class DocumentProcessor:
|
||||
class DocumentExtraction:
|
||||
"""Processes documents with context awareness"""
|
||||
|
||||
def process_with_context(self, doc: Dict, context: DocumentContext) -> Dict:
|
||||
|
|
|
|||
Loading…
Reference in a new issue