gateway/modules/workflows/methods/methodOutlook/helpers/emailProcessing.py
2025-12-17 10:45:09 +01:00

184 lines
7.8 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""
Email Processing helper for Outlook operations.
Handles email search query sanitization, search parameter building, and filter construction.
"""
import logging
import re
from typing import Dict, Any
logger = logging.getLogger(__name__)
class EmailProcessingHelper:
"""Helper for email search and processing operations"""
def __init__(self, methodInstance):
"""
Initialize email processing helper.
Args:
methodInstance: Instance of MethodOutlook (for access to services)
"""
self.method = methodInstance
self.services = methodInstance.services
def sanitizeSearchQuery(self, query: str) -> str:
"""
Sanitize and validate search query for Microsoft Graph API
Microsoft Graph API has specific requirements for search queries:
- Escape special characters properly
- Handle search operators correctly
- Ensure query format is valid
"""
if not query:
return ""
# Clean the query
clean_query = query.strip()
# Handle folder specifications first
if clean_query.lower().startswith('folder:'):
folder_name = clean_query[7:].strip()
if folder_name:
# Return the folder specification as-is
return clean_query
# Remove any double quotes that might cause issues
clean_query = clean_query.replace('"', '')
# Handle common search operators
# Recognize Graph operators including both singular and plural forms for hasAttachments
lowered = clean_query.lower()
if any(op in lowered for op in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:', 'hasattachments:']):
# This is an advanced search query, return as-is
return clean_query
# For basic text search, ensure it's safe for contains() filter
# Remove any characters that might break the OData filter syntax
# Remove or escape characters that could break OData filter syntax
safe_query = re.sub(r'[\\\'"]', '', clean_query)
return safe_query
def buildSearchParameters(self, query: str, folder: str, limit: int) -> Dict[str, Any]:
"""
Build search parameters for Microsoft Graph API
This method handles the complexity of building search parameters
while avoiding conflicts between $search and $filter parameters.
"""
params = {
"$top": limit
}
if not query or not query.strip():
# No query specified, just get emails from folder
if folder and folder.lower() != "all":
# Use folder name directly for well-known folders, or get folder ID
if folder.lower() in ["inbox", "drafts", "sentitems", "deleteditems"]:
params["$filter"] = f"parentFolderId eq '{folder}'"
else:
# For custom folders, we need to get the folder ID first
# This will be handled by the calling method
params["$filter"] = f"parentFolderId eq '{folder}'"
# Add orderby for basic queries
params["$orderby"] = "receivedDateTime desc"
return params
clean_query = self.sanitizeSearchQuery(query)
# Check if this is a folder specification (e.g., "folder:Drafts", "folder:Inbox")
if clean_query.lower().startswith('folder:'):
folder_name = clean_query[7:].strip() # Remove "folder:" prefix
if folder_name:
# This is a folder specification, not a text search
# Just filter by folder and return
params["$filter"] = f"parentFolderId eq '{folder_name}'"
params["$orderby"] = "receivedDateTime desc"
return params
# Check if this is a complex search query with multiple operators
# Recognize Graph operators including both singular and plural forms for hasAttachments
lowered = clean_query.lower()
if any(op in lowered for op in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:', 'hasattachments:']):
# This is an advanced search query, use $search
# Microsoft Graph API supports complex search syntax
params["$search"] = f'"{clean_query}"'
# Note: When using $search, we cannot combine it with $orderby or $filter for folder
# We'll need to filter results after the API call
# Folder filtering will be done after the API call
else:
# Use $filter for basic text search, but keep it simple to avoid "InefficientFilter" error
# Microsoft Graph API has limitations on complex filters
if len(clean_query) > 50:
# If query is too long, truncate it to avoid complex filter issues
clean_query = clean_query[:50]
# Use only subject search to keep filter simple
# Handle wildcard queries specially
if clean_query == "*" or clean_query == "":
# For wildcard or empty query, don't use contains filter
# Just use folder filter if specified
if folder and folder.lower() != "all":
params["$filter"] = f"parentFolderId eq '{folder}'"
else:
# No filter needed for wildcard search across all folders
pass
else:
params["$filter"] = f"contains(subject,'{clean_query}')"
# Add folder filter if specified
if folder and folder.lower() != "all":
params["$filter"] = f"{params['$filter']} and parentFolderId eq '{folder}'"
# Add orderby for basic queries
params["$orderby"] = "receivedDateTime desc"
return params
def buildGraphFilter(self, filter_text: str) -> Dict[str, str]:
"""
Build proper Microsoft Graph API filter parameters based on filter text
Args:
filter_text (str): The filter text to process
Returns:
Dict[str, str]: Dictionary with either $filter or $search parameter
"""
if not filter_text:
return {}
filter_text = filter_text.strip()
# Handle folder specifications (e.g., "folder:Drafts", "folder:Inbox")
if filter_text.lower().startswith('folder:'):
folder_name = filter_text[7:].strip() # Remove "folder:" prefix
if folder_name:
# This is a folder specification, return empty to let the main method handle it
return {}
# Handle search queries (from:, to:, subject:, etc.) - check this FIRST
# Support both singular and plural forms for hasAttachments
lt = filter_text.lower()
if any(lt.startswith(prefix) for prefix in ['from:', 'to:', 'subject:', 'received:', 'hasattachment:', 'hasattachments:']):
return {"$search": f'"{filter_text}"'}
# Handle email address filters (only if it's NOT a search query)
if '@' in filter_text and '.' in filter_text and ' ' not in filter_text and not filter_text.startswith('from:'):
return {"$filter": f"from/fromAddress/address eq '{filter_text}'"}
# Handle OData filter conditions (contains 'eq', 'ne', 'gt', 'lt', etc.)
if any(op in filter_text.lower() for op in [' eq ', ' ne ', ' gt ', ' lt ', ' ge ', ' le ', ' and ', ' or ']):
return {"$filter": filter_text}
# Handle text content - search in subject
return {"$filter": f"contains(subject,'{filter_text}')"}