gateway/modules/methods/methodOperator.py
2025-07-08 01:14:27 +02:00

339 lines
12 KiB
Python

"""Operator method implementation for handling collections and AI operations"""
from typing import Dict, List, Any, Optional
from datetime import datetime, UTC
import logging
import uuid
from modules.workflow.methodBase import MethodBase, ActionResult, action
logger = logging.getLogger(__name__)
class MethodOperator(MethodBase):
"""Operator method implementation for data operations"""
def __init__(self, serviceContainer: Any):
super().__init__(serviceContainer)
self.name = "operator"
self.description = "Handle data operations like filtering, sorting, and transformation"
@action
async def filter(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Filter data based on criteria
Parameters:
documentList (str): Reference to the document list to filter
criteria (Dict[str, Any]): Filter criteria
field (str, optional): Field to filter on
"""
try:
documentList = parameters.get("documentList")
criteria = parameters.get("criteria")
field = parameters.get("field")
if not documentList or not criteria:
return self._createResult(
success=False,
data={},
error="Document list reference and criteria are required"
)
chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList)
if not chatDocuments:
return self._createResult(
success=False,
data={},
error="No documents found for the provided reference"
)
# Extract content from all documents
all_document_content = []
for chatDocument in chatDocuments:
fileId = chatDocument.fileId
file_data = self.serviceContainer.getFileData(fileId)
file_info = self.serviceContainer.getFileInfo(fileId)
if not file_data:
logger.warning(f"File data not found for fileId: {fileId}")
continue
all_document_content.append({
"fileId": fileId,
"fileName": file_info.get('name', 'unknown'),
"content": file_data
})
if not all_document_content:
return self._createResult(
success=False,
data={},
error="No content could be extracted from any documents"
)
# Combine all document content for filtering
combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join([
f"File: {doc['fileName']}\nContent: {doc['content']}"
for doc in all_document_content
])
filter_prompt = f"""
Filter the following data based on the specified criteria.
Data to filter:
{combined_content}
Filter criteria:
{criteria}
Field to filter on: {field or 'All fields'}
Please provide:
1. Filtered data that matches the criteria
2. Summary of filtering results
3. Number of items before and after filtering
4. Any data quality insights
"""
filtered_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(filter_prompt)
result_data = {
"documentCount": len(chatDocuments),
"criteria": criteria,
"field": field,
"filteredData": filtered_result,
"originalCount": len(all_document_content),
"timestamp": datetime.now(UTC).isoformat()
}
return self._createResult(
success=True,
data={
"documentName": f"filtered_data_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
"documentData": result_data
}
)
except Exception as e:
logger.error(f"Error filtering data: {str(e)}")
return self._createResult(
success=False,
data={},
error=str(e)
)
@action
async def sort(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Sort data by specified field
Parameters:
documentList (str): Reference to the document list to sort
field (str): Field to sort by
order (str, optional): Sort order (asc/desc, default: "asc")
"""
try:
documentList = parameters.get("documentList")
field = parameters.get("field")
order = parameters.get("order", "asc")
if not documentList or not field:
return self._createResult(
success=False,
data={},
error="Document list reference and field are required"
)
# Get documents from reference
chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList)
if not chatDocuments:
return self._createResult(
success=False,
data={},
error="No documents found for the provided reference"
)
# Extract content from all documents
all_document_content = []
for chatDocument in chatDocuments:
fileId = chatDocument.fileId
file_data = self.serviceContainer.getFileData(fileId)
file_info = self.serviceContainer.getFileInfo(fileId)
if not file_data:
logger.warning(f"File data not found for fileId: {fileId}")
continue
all_document_content.append({
"fileId": fileId,
"fileName": file_info.get('name', 'unknown'),
"content": file_data
})
if not all_document_content:
return self._createResult(
success=False,
data={},
error="No content could be extracted from any documents"
)
# Combine all document content for sorting
combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join([
f"File: {doc['fileName']}\nContent: {doc['content']}"
for doc in all_document_content
])
# Create sorting prompt
sort_prompt = f"""
Sort the following data by the specified field.
Data to sort:
{combined_content}
Sort field: {field}
Sort order: {order}
Please provide:
1. Sorted data in the specified order
2. Summary of sorting results
3. Any data insights from the sorting
4. Validation of sort field existence
"""
# Use AI to perform sorting
sorted_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(sort_prompt)
# Create result data
result_data = {
"documentCount": len(chatDocuments),
"field": field,
"order": order,
"sortedData": sorted_result,
"timestamp": datetime.now(UTC).isoformat()
}
return self._createResult(
success=True,
data={
"documentName": f"sorted_data_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json",
"documentData": result_data
}
)
except Exception as e:
logger.error(f"Error sorting data: {str(e)}")
return self._createResult(
success=False,
data={},
error=str(e)
)
@action
async def transform(self, parameters: Dict[str, Any]) -> ActionResult:
"""
Transform data structure or format
Parameters:
documentList (str): Reference to the document list to transform
transformation (Dict[str, Any]): Transformation rules
outputFormat (str, optional): Desired output format
"""
try:
documentList = parameters.get("documentList")
transformation = parameters.get("transformation")
outputFormat = parameters.get("outputFormat", "json")
if not documentList or not transformation:
return self._createResult(
success=False,
data={},
error="Document list reference and transformation rules are required"
)
# Get documents from reference
chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList)
if not chatDocuments:
return self._createResult(
success=False,
data={},
error="No documents found for the provided reference"
)
# Extract content from all documents
all_document_content = []
for chatDocument in chatDocuments:
fileId = chatDocument.fileId
file_data = self.serviceContainer.getFileData(fileId)
file_info = self.serviceContainer.getFileInfo(fileId)
if not file_data:
logger.warning(f"File data not found for fileId: {fileId}")
continue
all_document_content.append({
"fileId": fileId,
"fileName": file_info.get('name', 'unknown'),
"content": file_data
})
if not all_document_content:
return self._createResult(
success=False,
data={},
error="No content could be extracted from any documents"
)
# Combine all document content for transformation
combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join([
f"File: {doc['fileName']}\nContent: {doc['content']}"
for doc in all_document_content
])
# Create transformation prompt
transform_prompt = f"""
Transform the following data according to the specified rules.
Data to transform:
{combined_content}
Transformation rules:
{transformation}
Output format: {outputFormat}
Please provide:
1. Transformed data in the specified format
2. Summary of transformation results
3. Validation of transformation rules
4. Any data quality improvements
"""
# Use AI to perform transformation
transformed_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(transform_prompt)
# Create result data
result_data = {
"documentCount": len(chatDocuments),
"transformation": transformation,
"outputFormat": outputFormat,
"transformedData": transformed_result,
"timestamp": datetime.now(UTC).isoformat()
}
return self._createResult(
success=True,
data={
"documentName": f"transformed_data_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.{outputFormat}",
"documentData": result_data
}
)
except Exception as e:
logger.error(f"Error transforming data: {str(e)}")
return self._createResult(
success=False,
data={},
error=str(e)
)