759 lines
No EOL
26 KiB
Python
759 lines
No EOL
26 KiB
Python
"""
|
|
Centralized utility functions for the Agentservice (continued).
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
import json
|
|
import uuid
|
|
from datetime import datetime
|
|
from typing import List, Dict, Any, Optional, Tuple, Union, Callable
|
|
from io import BytesIO
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class WorkflowUtils:
|
|
"""
|
|
Utility class for workflow operations.
|
|
Centralizes common workflow-related functions.
|
|
"""
|
|
|
|
def __init__(self, workflow_id: str = None):
|
|
"""Initialize with optional workflow ID"""
|
|
self.workflow_id = workflow_id
|
|
|
|
def set_workflow_id(self, workflow_id: str):
|
|
"""Set or update the workflow ID"""
|
|
self.workflow_id = workflow_id
|
|
|
|
def get_documents(self, workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get all documents from a workflow across all messages.
|
|
|
|
Args:
|
|
workflow: The workflow object
|
|
|
|
Returns:
|
|
List of document objects
|
|
"""
|
|
documents = []
|
|
|
|
# Process all messages
|
|
for message in workflow.get("messages", []):
|
|
# Extract documents from the message
|
|
for doc in message.get("documents", []):
|
|
# Add to list if not already present
|
|
if not any(d.get("id") == doc.get("id") for d in documents):
|
|
documents.append(doc)
|
|
|
|
return documents
|
|
|
|
def get_files(self, workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get all file references from a workflow.
|
|
|
|
Args:
|
|
workflow: The workflow object
|
|
|
|
Returns:
|
|
List of file metadata objects
|
|
"""
|
|
files = []
|
|
|
|
# Process all messages
|
|
for message in workflow.get("messages", []):
|
|
# Extract documents from the message
|
|
for doc in message.get("documents", []):
|
|
source = doc.get("source", {})
|
|
|
|
# Only include file documents
|
|
if source.get("type") == "file":
|
|
file_info = {
|
|
"id": source.get("id", ""),
|
|
"name": source.get("name", ""),
|
|
"type": source.get("content_type", ""),
|
|
"content_type": source.get("content_type", ""),
|
|
"size": source.get("size", 0)
|
|
}
|
|
|
|
# Check if file is already in the list
|
|
if not any(f.get("id") == file_info["id"] for f in files):
|
|
files.append(file_info)
|
|
|
|
return files
|
|
|
|
def extract_by_prompt(self, workflow: Dict[str, Any], prompt: str, ai_service) -> Dict[str, Any]:
|
|
"""
|
|
Extract data from workflow documents based on an AI prompt.
|
|
|
|
Args:
|
|
workflow: The workflow object
|
|
prompt: The extraction prompt
|
|
ai_service: The AI service to use for extraction
|
|
|
|
Returns:
|
|
Extracted data
|
|
"""
|
|
# This is an async method but we're exposing it as a regular method
|
|
# The caller should use it with asyncio.run() or await
|
|
async def _extract():
|
|
# Create extraction prompt
|
|
files = self.get_files(workflow)
|
|
file_descriptions = "\n".join([f"- {f.get('name', 'unnamed')} ({f.get('type', 'unknown')})" for f in files])
|
|
|
|
extraction_prompt = f"""
|
|
Extract relevant information from the following files based on this request:
|
|
|
|
REQUEST: {prompt}
|
|
|
|
FILES:
|
|
{file_descriptions}
|
|
|
|
Focus on the most relevant content and provide a structured output.
|
|
"""
|
|
|
|
# Call AI
|
|
response = await ai_service.call_api([{"role": "user", "content": extraction_prompt}])
|
|
|
|
return {
|
|
"prompt": prompt,
|
|
"extracted_content": response,
|
|
"files_processed": len(files)
|
|
}
|
|
|
|
# Return the coroutine
|
|
return _extract()
|
|
|
|
def merge_workflows(self, workflows: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""
|
|
Merge multiple workflows into a single unified workflow.
|
|
Useful for workflow templates or combining partial workflows.
|
|
|
|
Args:
|
|
workflows: List of workflow objects to merge
|
|
|
|
Returns:
|
|
Merged workflow
|
|
"""
|
|
if not workflows:
|
|
return {}
|
|
|
|
# Start with the first workflow
|
|
result = workflows[0].copy()
|
|
|
|
# Initialize lists if not present
|
|
if "messages" not in result:
|
|
result["messages"] = []
|
|
if "logs" not in result:
|
|
result["logs"] = []
|
|
|
|
# Merge additional workflows
|
|
for workflow in workflows[1:]:
|
|
# Append messages
|
|
for message in workflow.get("messages", []):
|
|
# Check for duplicates
|
|
if not any(m.get("id") == message.get("id") for m in result["messages"]):
|
|
result["messages"].append(message)
|
|
|
|
# Append logs
|
|
for log in workflow.get("logs", []):
|
|
# Check for duplicates
|
|
if not any(l.get("id") == log.get("id") for l in result["logs"]):
|
|
result["logs"].append(log)
|
|
|
|
# Update status if needed
|
|
if workflow.get("status") == "failed":
|
|
result["status"] = "failed"
|
|
|
|
# Update last_activity if newer
|
|
if (workflow.get("last_activity") and
|
|
(not result.get("last_activity") or
|
|
workflow["last_activity"] > result["last_activity"])):
|
|
result["last_activity"] = workflow["last_activity"]
|
|
|
|
return result
|
|
|
|
def get_message(self, workflow: Dict[str, Any], message_id: str) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Find a message by ID in the workflow.
|
|
|
|
Args:
|
|
workflow: The workflow object
|
|
message_id: The message ID to find
|
|
|
|
Returns:
|
|
Message object or None if not found
|
|
"""
|
|
for message in workflow.get("messages", []):
|
|
if message.get("id") == message_id:
|
|
return message
|
|
return None
|
|
|
|
def to_str(self, workflow: Dict[str, Any]) -> str:
|
|
"""
|
|
Convert workflow to a formatted string representation.
|
|
|
|
Args:
|
|
workflow: The workflow object
|
|
|
|
Returns:
|
|
String representation of the workflow
|
|
"""
|
|
# Create a summary string
|
|
result = f"Workflow: {workflow.get('id')}\n"
|
|
result += f"Status: {workflow.get('status', 'unknown')}\n"
|
|
result += f"Started: {workflow.get('started_at', 'unknown')}\n"
|
|
result += f"Last Activity: {workflow.get('last_activity', 'unknown')}\n"
|
|
|
|
# Add message count
|
|
message_count = len(workflow.get("messages", []))
|
|
result += f"Messages: {message_count}\n"
|
|
|
|
# Add log count
|
|
log_count = len(workflow.get("logs", []))
|
|
result += f"Logs: {log_count}\n"
|
|
|
|
return result
|
|
|
|
|
|
class MessageUtils:
|
|
"""
|
|
Utility class for message operations.
|
|
Centralizes common message-related functions.
|
|
"""
|
|
|
|
def create_message(self, workflow_id: str, role: str = "system") -> Dict[str, Any]:
|
|
"""
|
|
Create a new message object.
|
|
|
|
Args:
|
|
workflow_id: ID of the workflow
|
|
role: Role of the message ('system', 'user', 'assistant')
|
|
|
|
Returns:
|
|
New message object
|
|
"""
|
|
message_id = f"msg_{uuid.uuid4()}"
|
|
current_time = datetime.now().isoformat()
|
|
|
|
# Create message object
|
|
message = {
|
|
"id": message_id,
|
|
"workflow_id": workflow_id,
|
|
"parent_message_id": None,
|
|
"started_at": current_time,
|
|
"finished_at": None,
|
|
"sequence_no": 0,
|
|
|
|
"status": "pending",
|
|
"role": role,
|
|
|
|
"data_stats": {
|
|
"processing_time": 0.0,
|
|
"token_count": 0,
|
|
"bytes_sent": 0,
|
|
"bytes_received": 0
|
|
},
|
|
|
|
"documents": [],
|
|
"content": None,
|
|
"agent_type": None
|
|
}
|
|
|
|
return message
|
|
|
|
def finalize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Finalize a message by setting completion timestamp.
|
|
|
|
Args:
|
|
message: The message object
|
|
|
|
Returns:
|
|
Updated message object
|
|
"""
|
|
message["finished_at"] = datetime.now().isoformat()
|
|
message["status"] = "completed"
|
|
return message
|
|
|
|
def get_documents(self, message: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get all documents from a message.
|
|
|
|
Args:
|
|
message: The message object
|
|
|
|
Returns:
|
|
List of document objects
|
|
"""
|
|
return message.get("documents", [])
|
|
|
|
def get_files(self, message: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get all file references from a message.
|
|
|
|
Args:
|
|
message: The message object
|
|
|
|
Returns:
|
|
List of file metadata objects
|
|
"""
|
|
files = []
|
|
|
|
# Extract documents from the message
|
|
for doc in message.get("documents", []):
|
|
source = doc.get("source", {})
|
|
|
|
# Only include file documents
|
|
if source.get("type") == "file":
|
|
file_info = {
|
|
"id": source.get("id", ""),
|
|
"name": source.get("name", ""),
|
|
"type": source.get("content_type", ""),
|
|
"content_type": source.get("content_type", ""),
|
|
"size": source.get("size", 0)
|
|
}
|
|
|
|
files.append(file_info)
|
|
|
|
return files
|
|
|
|
def extract_text_content(self, message: Dict[str, Any]) -> str:
|
|
"""
|
|
Extract text content from a message including document content.
|
|
|
|
Args:
|
|
message: The message object
|
|
|
|
Returns:
|
|
String with all text content from the message
|
|
"""
|
|
content = message.get("content", "")
|
|
|
|
# Add document content
|
|
for doc in message.get("documents", []):
|
|
# Check for document contents
|
|
for doc_content in doc.get("contents", []):
|
|
if doc_content.get("type") == "text":
|
|
content += "\n\n" + doc_content.get("text", "")
|
|
|
|
return content
|
|
|
|
def to_str(self, message: Dict[str, Any]) -> str:
|
|
"""
|
|
Convert message to a formatted string representation.
|
|
|
|
Args:
|
|
message: The message object
|
|
|
|
Returns:
|
|
String representation of the message
|
|
"""
|
|
# Create a summary string
|
|
result = f"Message: {message.get('id')}\n"
|
|
result += f"Role: {message.get('role', 'unknown')}\n"
|
|
|
|
# Add agent info if available
|
|
if message.get("agent_type"):
|
|
result += f"Agent: {message.get('agent_name', message.get('agent_type', 'unknown'))}\n"
|
|
|
|
# Add content summary
|
|
content = message.get("content", "")
|
|
if content:
|
|
content_preview = content[:100] + "..." if len(content) > 100 else content
|
|
result += f"Content: {content_preview}\n"
|
|
|
|
# Add document count
|
|
doc_count = len(message.get("documents", []))
|
|
result += f"Documents: {doc_count}\n"
|
|
|
|
return result
|
|
|
|
|
|
class FileUtils:
|
|
"""
|
|
Utility class for file operations.
|
|
Centralizes common file-related functions.
|
|
"""
|
|
|
|
def is_text_extractable(self, file_name: str, content_type: str = None) -> bool:
|
|
"""
|
|
Check if text can be extracted from a file.
|
|
|
|
Args:
|
|
file_name: Name of the file
|
|
content_type: MIME type (optional)
|
|
|
|
Returns:
|
|
True if text can be extracted, False otherwise
|
|
"""
|
|
# Text files
|
|
if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py', '.csv')):
|
|
return True
|
|
|
|
# Excel files
|
|
if file_name.endswith(('.xlsx', '.xls')):
|
|
try:
|
|
import pandas
|
|
return True
|
|
except ImportError:
|
|
return False
|
|
|
|
# PDF files
|
|
if file_name.endswith('.pdf'):
|
|
try:
|
|
# Check if PyPDF2 or PyMuPDF is available
|
|
try:
|
|
import PyPDF2
|
|
return True
|
|
except ImportError:
|
|
try:
|
|
import fitz # PyMuPDF
|
|
return True
|
|
except ImportError:
|
|
return False
|
|
except:
|
|
return False
|
|
|
|
# Images and other non-text files
|
|
if file_name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg',
|
|
'.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv',
|
|
'.mp3', '.wav', '.ogg', '.flac', '.aac')):
|
|
return False
|
|
|
|
# Check content type if file extension doesn't give a clear answer
|
|
if content_type:
|
|
if content_type.startswith(('text/', 'application/json', 'application/xml')):
|
|
return True
|
|
elif content_type == 'application/pdf':
|
|
return True
|
|
elif content_type.startswith(('image/', 'video/', 'audio/')):
|
|
return False
|
|
|
|
# Default to allowing extraction attempt
|
|
return True
|
|
|
|
def get_mime_type(self, file_name: str) -> str:
|
|
"""
|
|
Get MIME type based on file name.
|
|
|
|
Args:
|
|
file_name: Name of the file
|
|
|
|
Returns:
|
|
MIME type string
|
|
"""
|
|
import mimetypes
|
|
|
|
# Initialize mimetypes
|
|
mimetypes.init()
|
|
|
|
# Get MIME type
|
|
mime_type, _ = mimetypes.guess_type(file_name)
|
|
|
|
if not mime_type:
|
|
# Default mappings for common extensions
|
|
extension_map = {
|
|
'txt': 'text/plain',
|
|
'md': 'text/markdown',
|
|
'json': 'application/json',
|
|
'csv': 'text/csv',
|
|
'html': 'text/html',
|
|
'htm': 'text/html',
|
|
'pdf': 'application/pdf',
|
|
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
'jpg': 'image/jpeg',
|
|
'jpeg': 'image/jpeg',
|
|
'png': 'image/png',
|
|
'gif': 'image/gif',
|
|
'svg': 'image/svg+xml',
|
|
'webp': 'image/webp',
|
|
'mp4': 'video/mp4',
|
|
'mp3': 'audio/mpeg'
|
|
}
|
|
|
|
# Get extension
|
|
ext = os.path.splitext(file_name)[1].lower().lstrip('.')
|
|
|
|
# Return mapped MIME type or default
|
|
mime_type = extension_map.get(ext, 'application/octet-stream')
|
|
|
|
return mime_type
|
|
|
|
|
|
class LoggingUtils:
|
|
"""
|
|
Enhanced logging utilities for better workflow tracking.
|
|
Provides structured and categorized logging for workflows.
|
|
"""
|
|
|
|
def __init__(self, workflow_id: str = None, log_func: Callable = None):
|
|
"""
|
|
Initialize logging utilities.
|
|
|
|
Args:
|
|
workflow_id: ID of the workflow for context
|
|
log_func: Function to call for adding workflow logs
|
|
"""
|
|
self.workflow_id = workflow_id
|
|
self.log_func = log_func
|
|
self.logger = logging.getLogger(__name__)
|
|
|
|
# Define log categories
|
|
self.categories = {
|
|
"workflow": "Workflow Management",
|
|
"planning": "Activity Planning",
|
|
"execution": "Activity Execution",
|
|
"agents": "Agent Selection & Execution",
|
|
"files": "File Processing",
|
|
"summary": "Results Summary",
|
|
"error": "Error Handling",
|
|
"code": "Code Execution",
|
|
}
|
|
|
|
def set_workflow_id(self, workflow_id: str):
|
|
"""Update the workflow ID"""
|
|
self.workflow_id = workflow_id
|
|
|
|
def set_log_func(self, log_func: Callable):
|
|
"""Update the log function"""
|
|
self.log_func = log_func
|
|
|
|
def info(self, message: str, category: str = "workflow", details: str = None):
|
|
"""
|
|
Log an informational message.
|
|
|
|
Args:
|
|
message: The log message
|
|
category: Log category
|
|
details: Optional detailed information
|
|
"""
|
|
category_name = self.categories.get(category, category)
|
|
log_message = f"[{category_name}] {message}"
|
|
|
|
# Log to standard logger
|
|
self.logger.info(log_message)
|
|
|
|
# Log to workflow if function available
|
|
if self.log_func and self.workflow_id:
|
|
self.log_func(self.workflow_id, message, "info", category, category_name)
|
|
|
|
def warning(self, message: str, category: str = "workflow", details: str = None):
|
|
"""
|
|
Log a warning message.
|
|
|
|
Args:
|
|
message: The log message
|
|
category: Log category
|
|
details: Optional detailed information
|
|
"""
|
|
category_name = self.categories.get(category, category)
|
|
log_message = f"[{category_name}] {message}"
|
|
|
|
# Log to standard logger
|
|
self.logger.warning(log_message)
|
|
|
|
# Log to workflow if function available
|
|
if self.log_func and self.workflow_id:
|
|
self.log_func(self.workflow_id, message, "warning", category, category_name)
|
|
|
|
def error(self, message: str, category: str = "error", details: str = None):
|
|
"""
|
|
Log an error message.
|
|
|
|
Args:
|
|
message: The log message
|
|
category: Log category
|
|
details: Optional detailed information
|
|
"""
|
|
category_name = self.categories.get(category, category)
|
|
log_message = f"[{category_name}] {message}"
|
|
|
|
# Log to standard logger
|
|
self.logger.error(log_message)
|
|
|
|
# Log to workflow if function available
|
|
if self.log_func and self.workflow_id:
|
|
self.log_func(self.workflow_id, message, "error", category, category_name)
|
|
|
|
def debug(self, message: str, category: str = "workflow", details: str = None):
|
|
"""
|
|
Log a debug message.
|
|
|
|
Args:
|
|
message: The log message
|
|
category: Log category
|
|
details: Optional detailed information
|
|
"""
|
|
category_name = self.categories.get(category, category)
|
|
log_message = f"[{category_name}] {message}"
|
|
|
|
# Log to standard logger
|
|
self.logger.debug(log_message)
|
|
|
|
def get_category_name(self, category: str) -> str:
|
|
"""
|
|
Get human-readable category name.
|
|
|
|
Args:
|
|
category: Category code
|
|
|
|
Returns:
|
|
Human-readable category name
|
|
"""
|
|
return self.categories.get(category, category)
|
|
|
|
|
|
def extract_text_from_file_content(file_content: bytes, file_name: str, content_type: str = None) -> Tuple[str, bool]:
|
|
"""
|
|
Extract text from various file formats based on binary content.
|
|
|
|
Args:
|
|
file_content: Binary content of the file
|
|
file_name: Name of the file for format detection
|
|
content_type: Optional MIME type of the file
|
|
|
|
Returns:
|
|
Tuple with (extracted text, is_extracted flag)
|
|
"""
|
|
# Check if file is likely text-extractable
|
|
if not is_text_extractable(file_name, content_type):
|
|
return f"[File: {file_name} - Text extraction not supported]", False
|
|
|
|
try:
|
|
# Simple text files
|
|
if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py', '.csv', '.log', '.ini', '.cfg', '.conf')) or (content_type and (content_type.startswith('text/') or content_type in ['application/json', 'application/xml', 'text/csv'])):
|
|
try:
|
|
return file_content.decode('utf-8'), True
|
|
except UnicodeDecodeError:
|
|
try:
|
|
return file_content.decode('latin1'), True
|
|
except:
|
|
return file_content.decode('cp1252', errors='replace'), True
|
|
|
|
# Excel files
|
|
elif file_name.endswith(('.xlsx', '.xls')):
|
|
try:
|
|
import pandas as pd
|
|
# Create temporary in-memory file
|
|
file_obj = BytesIO(file_content)
|
|
df = pd.read_excel(file_obj)
|
|
result = f"Excel file with {len(df)} rows and {len(df.columns)} columns.\n"
|
|
result += f"Columns: {', '.join(df.columns.tolist())}\n\n"
|
|
result += df.to_string(index=False)
|
|
return result, True
|
|
except ImportError:
|
|
return f"[Excel file: {file_name} - pandas not installed]", False
|
|
except Exception as e:
|
|
return f"[Error extracting Excel content: {str(e)}]", False
|
|
|
|
# CSV files
|
|
elif file_name.endswith('.csv'):
|
|
try:
|
|
import pandas as pd
|
|
try:
|
|
# Create temporary in-memory file
|
|
file_obj = BytesIO(file_content)
|
|
df = pd.read_csv(file_obj, encoding='utf-8')
|
|
except UnicodeDecodeError:
|
|
file_obj = BytesIO(file_content)
|
|
try:
|
|
df = pd.read_csv(file_obj, encoding='latin1')
|
|
except:
|
|
file_obj = BytesIO(file_content)
|
|
df = pd.read_csv(file_obj, encoding='cp1252')
|
|
|
|
result = f"CSV file with {len(df)} rows and {len(df.columns)} columns.\n"
|
|
result += f"Columns: {', '.join(df.columns.tolist())}\n\n"
|
|
result += df.to_string(index=False)
|
|
return result, True
|
|
except ImportError:
|
|
return f"[CSV file: {file_name} - pandas not installed]", False
|
|
except Exception as e:
|
|
return f"[Error extracting CSV content: {str(e)}]", False
|
|
|
|
# PDF files
|
|
elif file_name.endswith('.pdf'):
|
|
try:
|
|
try:
|
|
from PyPDF2 import PdfReader
|
|
reader = PdfReader(BytesIO(file_content))
|
|
text = ""
|
|
for page in reader.pages:
|
|
text += page.extract_text() + "\n\n"
|
|
return text, True
|
|
except ImportError:
|
|
try:
|
|
import fitz # PyMuPDF
|
|
doc = fitz.open(stream=file_content, filetype="pdf")
|
|
text = ""
|
|
for page in doc:
|
|
text += page.get_text() + "\n\n"
|
|
return text, True
|
|
except ImportError:
|
|
return f"[PDF: {file_name} - No PDF library installed]", False
|
|
except Exception as e:
|
|
return f"[Error reading PDF file {file_name}: {str(e)}]", False
|
|
|
|
# Default case - try basic text extraction
|
|
else:
|
|
try:
|
|
return file_content.decode('utf-8', errors='replace'), True
|
|
except Exception as e:
|
|
logger.error(f"Error extracting text from {file_name}: {str(e)}")
|
|
return f"[Text extraction error: {str(e)}]", False
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error extracting text from {file_name}: {str(e)}")
|
|
return f"[Text extraction error: {str(e)}]", False
|
|
|
|
|
|
def is_text_extractable(file_name: str, content_type: str = None) -> bool:
|
|
"""Check if text can be extracted from a file."""
|
|
# Text files
|
|
if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py', '.csv')):
|
|
return True
|
|
|
|
# Excel files
|
|
if file_name.endswith(('.xlsx', '.xls')):
|
|
try:
|
|
import pandas
|
|
return True
|
|
except ImportError:
|
|
return False
|
|
|
|
# PDF files
|
|
if file_name.endswith('.pdf'):
|
|
try:
|
|
# Check if PyPDF2 or PyMuPDF is available
|
|
try:
|
|
import PyPDF2
|
|
return True
|
|
except ImportError:
|
|
try:
|
|
import fitz # PyMuPDF
|
|
return True
|
|
except ImportError:
|
|
return False
|
|
except:
|
|
return False
|
|
|
|
# Images and other non-text files
|
|
if file_name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg',
|
|
'.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv',
|
|
'.mp3', '.wav', '.ogg', '.flac', '.aac')):
|
|
return False
|
|
|
|
# Check content type if file extension doesn't give a clear answer
|
|
if content_type:
|
|
if content_type.startswith(('text/', 'application/json', 'application/xml')):
|
|
return True
|
|
elif content_type == 'application/pdf':
|
|
return True
|
|
elif content_type.startswith(('image/', 'video/', 'audio/')):
|
|
return False
|
|
|
|
# Default to allowing extraction attempt
|
|
return True |