Merge pull request #21 from valueonag/dev-patrick-20250515
Dev patrick 20250515
This commit is contained in:
commit
2e1850af8b
38 changed files with 7156 additions and 4344 deletions
114
GOOGLE_OAUTH_SETUP.md
Normal file
114
GOOGLE_OAUTH_SETUP.md
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
# Google OAuth 2.0 Setup Guide for PowerOn
|
||||
|
||||
## Overview
|
||||
This guide explains how to set up Google OAuth 2.0 authentication for the PowerOn application.
|
||||
|
||||
## Prerequisites
|
||||
- A Google account
|
||||
- Access to Google Cloud Console (https://console.cloud.google.com/)
|
||||
|
||||
## Step 1: Create a Google Cloud Project
|
||||
|
||||
1. Go to [Google Cloud Console](https://console.cloud.google.com/)
|
||||
2. Click on the project dropdown at the top of the page
|
||||
3. Click "New Project"
|
||||
4. Enter a project name (e.g., "PowerOn OAuth")
|
||||
5. Click "Create"
|
||||
|
||||
## Step 2: Enable Google+ API
|
||||
|
||||
1. In your new project, go to "APIs & Services" > "Library"
|
||||
2. Search for "Google+ API" or "Google Identity"
|
||||
3. Click on "Google+ API" and click "Enable"
|
||||
|
||||
## Step 3: Create OAuth 2.0 Credentials
|
||||
|
||||
1. Go to "APIs & Services" > "Credentials"
|
||||
2. Click "Create Credentials" > "OAuth client ID"
|
||||
3. If prompted, configure the OAuth consent screen first:
|
||||
- Choose "External" user type
|
||||
- Fill in the required fields (App name, User support email, Developer contact information)
|
||||
- Add scopes: `https://www.googleapis.com/auth/userinfo.profile`, `https://www.googleapis.com/auth/userinfo.email`
|
||||
- Add test users if needed
|
||||
- Click "Save and Continue" through all sections
|
||||
|
||||
4. Back to creating OAuth client ID:
|
||||
- Application type: "Web application"
|
||||
- Name: "PowerOn Web Client"
|
||||
- Authorized redirect URIs: Add your redirect URI
|
||||
- For development: `http://localhost:8000/api/google/auth/callback`
|
||||
- For production: `https://yourdomain.com/api/google/auth/callback`
|
||||
|
||||
5. Click "Create"
|
||||
6. **Important**: Copy the Client ID and Client Secret - you'll need these for the next step
|
||||
|
||||
## Step 4: Configure PowerOn Application
|
||||
|
||||
1. Open your environment file (`gateway/env_dev.env` for development)
|
||||
2. Replace the placeholder values with your actual Google OAuth credentials:
|
||||
|
||||
```env
|
||||
# Google OAuth Configuration
|
||||
Service_GOOGLE_CLIENT_ID = your-actual-client-id-from-google-console
|
||||
Service_GOOGLE_CLIENT_SECRET = your-actual-client-secret-from-google-console
|
||||
Service_GOOGLE_REDIRECT_URI = http://localhost:8000/api/google/auth/callback
|
||||
```
|
||||
|
||||
3. Save the file
|
||||
4. Restart your PowerOn gateway server
|
||||
|
||||
## Step 5: Test the Configuration
|
||||
|
||||
1. Start your PowerOn application
|
||||
2. Go to the Connections module
|
||||
3. Click "Connect Google"
|
||||
4. You should be redirected to Google's OAuth consent screen
|
||||
5. After authorization, you should be redirected back to PowerOn
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
#### 1. "Missing required parameter: redirect_uri"
|
||||
- **Cause**: Google OAuth client is not properly configured with the redirect URI
|
||||
- **Solution**: Ensure the redirect URI in Google Cloud Console exactly matches your application's callback URL
|
||||
|
||||
#### 2. "Invalid client" error
|
||||
- **Cause**: Client ID or Client Secret is incorrect
|
||||
- **Solution**: Double-check the credentials in your environment file
|
||||
|
||||
#### 3. "Redirect URI mismatch" error
|
||||
- **Cause**: The redirect URI in your OAuth request doesn't match what's configured in Google Cloud Console
|
||||
- **Solution**: Ensure both URIs are identical (including protocol, domain, port, and path)
|
||||
|
||||
### Debug Steps
|
||||
|
||||
1. Check the PowerOn gateway logs for OAuth configuration details
|
||||
2. Verify environment variables are loaded correctly
|
||||
3. Ensure the Google OAuth client is configured for "Web application" type
|
||||
4. Check that the redirect URI includes the full path: `/api/google/auth/callback`
|
||||
|
||||
## Security Notes
|
||||
|
||||
- **Never commit** your Google OAuth credentials to version control
|
||||
- Use environment variables or secure configuration management
|
||||
- Regularly rotate your client secrets
|
||||
- Monitor OAuth usage in Google Cloud Console
|
||||
|
||||
## Production Considerations
|
||||
|
||||
For production deployment:
|
||||
|
||||
1. Use HTTPS for all OAuth redirects
|
||||
2. Configure proper domain verification in Google Cloud Console
|
||||
3. Set up monitoring and alerting for OAuth usage
|
||||
4. Consider implementing additional security measures like PKCE (Proof Key for Code Exchange)
|
||||
|
||||
## Support
|
||||
|
||||
If you continue to experience issues:
|
||||
|
||||
1. Check the PowerOn gateway logs for detailed error messages
|
||||
2. Verify your Google OAuth configuration in Google Cloud Console
|
||||
3. Test with a simple OAuth flow to isolate the issue
|
||||
4. Ensure your Google Cloud project has billing enabled (required for some APIs)
|
||||
3
app.py
3
app.py
|
|
@ -53,7 +53,8 @@ def initLogging():
|
|||
'response_closed.started',
|
||||
'_send_single_request',
|
||||
'httpcore.http11',
|
||||
'httpx._client'
|
||||
'httpx._client',
|
||||
'HTTP Request'
|
||||
]
|
||||
return not any(pattern in record.msg for pattern in http_debug_patterns)
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -55,6 +55,5 @@ Service_MSFT_CLIENT_SECRET = Kxf8Q~2lJIteZ~JaI32kMf1lfaWKATqxXiNiFbzV
|
|||
Service_MSFT_TENANT_ID = common
|
||||
|
||||
# Google Service configuration
|
||||
Service_GOOGLE_CLIENT_ID = your-google-client-id
|
||||
Service_GOOGLE_CLIENT_SECRET = your-google-client-secret
|
||||
Service_GOOGLE_REDIRECT_URI = http://localhost:8000/api/google/auth/callback
|
||||
Service_GOOGLE_CLIENT_ID = 354925410565-aqs2b2qaiqmm73qpjnel6al8eid78uvg.apps.googleusercontent.com
|
||||
Service_GOOGLE_CLIENT_SECRET = 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
|
||||
|
|
|
|||
45
env_dev.env
45
env_dev.env
|
|
@ -1,45 +0,0 @@
|
|||
# Development Environment Configuration
|
||||
|
||||
# System Configuration
|
||||
APP_ENV_TYPE = dev
|
||||
APP_ENV_LABEL = Development Instance Patrick
|
||||
APP_API_URL = http://localhost:8000
|
||||
|
||||
# Database Configuration for Application
|
||||
DB_APP_HOST=D:/Temp/_powerondb
|
||||
DB_APP_DATABASE=app
|
||||
DB_APP_USER=dev_user
|
||||
DB_APP_PASSWORD_SECRET=dev_password
|
||||
|
||||
# Database Configuration Chat
|
||||
DB_CHAT_HOST=D:/Temp/_powerondb
|
||||
DB_CHAT_DATABASE=chat
|
||||
DB_CHAT_USER=dev_user
|
||||
DB_CHAT_PASSWORD_SECRET=dev_password
|
||||
|
||||
# Database Configuration Management
|
||||
DB_MANAGEMENT_HOST=D:/Temp/_powerondb
|
||||
DB_MANAGEMENT_DATABASE=management
|
||||
DB_MANAGEMENT_USER=dev_user
|
||||
DB_MANAGEMENT_PASSWORD_SECRET=dev_password
|
||||
|
||||
# Security Configuration
|
||||
APP_JWT_SECRET_SECRET=dev_jwt_secret_token
|
||||
APP_TOKEN_EXPIRY=300
|
||||
|
||||
# CORS Configuration
|
||||
APP_ALLOWED_ORIGINS=http://localhost:8080,https://playground.poweron-center.net,http://localhost:5176,https://nyla.poweron-center.net
|
||||
|
||||
# Logging configuration
|
||||
APP_LOGGING_LOG_LEVEL = DEBUG
|
||||
APP_LOGGING_LOG_FILE = poweron.log
|
||||
APP_LOGGING_FORMAT = %(asctime)s - %(levelname)s - %(name)s - %(message)s
|
||||
APP_LOGGING_DATE_FORMAT = %Y-%m-%d %H:%M:%S
|
||||
APP_LOGGING_CONSOLE_ENABLED = True
|
||||
APP_LOGGING_FILE_ENABLED = True
|
||||
APP_LOGGING_ROTATION_SIZE = 10485760
|
||||
APP_LOGGING_BACKUP_COUNT = 5
|
||||
|
||||
# Service Redirects
|
||||
Service_MSFT_REDIRECT_URI = http://localhost:8000/api/msft/auth/callback
|
||||
Service_GOOGLE_REDIRECT_URI = http://localhost:8000/api/google/auth/callback
|
||||
|
|
@ -42,4 +42,4 @@ APP_LOGGING_BACKUP_COUNT = 5
|
|||
|
||||
# Service Redirects
|
||||
Service_MSFT_REDIRECT_URI = https://gateway.poweron-center.net/api/msft/auth/callback
|
||||
Service_GOOGLE_REDIRECT_URI = http://gateway.poweron-center.net/api/google/auth/callback
|
||||
Service_GOOGLE_REDIRECT_URI = https://gateway.poweron-center.net/api/google/auth/callback
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -41,33 +41,105 @@ class DocumentGenerator:
|
|||
if mime_type == "application/octet-stream":
|
||||
content = getattr(doc, 'content', '')
|
||||
mime_type = detectMimeTypeFromContent(content, doc.filename, self.service)
|
||||
|
||||
# Add result label to filename for document objects too
|
||||
base_filename = doc.filename
|
||||
if hasattr(action, 'execResultLabel') and action.execResultLabel:
|
||||
result_label = action.execResultLabel.strip()
|
||||
if result_label:
|
||||
# Check if filename already starts with resultLabel to avoid duplication
|
||||
if not base_filename.startswith(f"{result_label}-"):
|
||||
base_filename = f"{result_label}-{base_filename}"
|
||||
logger.info(f"Added resultLabel '{result_label}' as prefix to document object filename: {base_filename}")
|
||||
else:
|
||||
logger.info(f"Document object filename already has resultLabel prefix: {base_filename}")
|
||||
|
||||
return {
|
||||
'filename': doc.filename,
|
||||
'filename': base_filename,
|
||||
'fileSize': getattr(doc, 'fileSize', 0),
|
||||
'mimeType': mime_type,
|
||||
'content': getattr(doc, 'content', ''),
|
||||
'document': doc
|
||||
}
|
||||
elif isinstance(doc, dict):
|
||||
# Dictionary format document
|
||||
filename = doc.get('documentName', doc.get('filename', \
|
||||
f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"))
|
||||
fileSize = doc.get('fileSize', len(str(doc.get('documentData', ''))))
|
||||
# Dictionary format document - handle both 'documentName' and 'filename' keys
|
||||
base_filename = doc.get('documentName', doc.get('filename', ''))
|
||||
|
||||
# Debug logging for resultLabel
|
||||
if hasattr(action, 'execResultLabel'):
|
||||
logger.info(f"Action {action.execMethod}.{action.execAction} has execResultLabel: '{action.execResultLabel}' (type: {type(action.execResultLabel)})")
|
||||
else:
|
||||
logger.info(f"Action {action.execMethod}.{action.execAction} has NO execResultLabel attribute")
|
||||
|
||||
# If no filename provided, generate one with action info
|
||||
if not base_filename:
|
||||
base_filename = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
|
||||
|
||||
# ALWAYS add result label to filename for better document selection
|
||||
# This ensures consistent naming regardless of whether filename was provided or generated
|
||||
if hasattr(action, 'execResultLabel') and action.execResultLabel:
|
||||
result_label = action.execResultLabel.strip()
|
||||
if result_label:
|
||||
# Check if filename already starts with resultLabel to avoid duplication
|
||||
if not base_filename.startswith(f"{result_label}-"):
|
||||
base_filename = f"{result_label}-{base_filename}"
|
||||
logger.info(f"Added resultLabel '{result_label}' as prefix to filename: {base_filename}")
|
||||
else:
|
||||
logger.info(f"Filename already has resultLabel prefix: {base_filename}")
|
||||
else:
|
||||
logger.info(f"No resultLabel available for action {action.execMethod}.{action.execAction}")
|
||||
|
||||
filename = base_filename
|
||||
mimeType = doc.get('mimeType', 'application/octet-stream')
|
||||
|
||||
# Handle documentData structure - it might be a dict with 'content' key or direct content
|
||||
document_data = doc.get('documentData', '')
|
||||
if isinstance(document_data, dict) and 'content' in document_data:
|
||||
# This is the structure returned by extract action: documentData.content
|
||||
content = document_data['content']
|
||||
# Also check for other potential content fields
|
||||
if not content and 'data' in document_data:
|
||||
content = document_data['data']
|
||||
else:
|
||||
# Direct content (fallback)
|
||||
content = document_data
|
||||
|
||||
# Calculate file size from actual content
|
||||
fileSize = len(str(content)) if content else 0
|
||||
|
||||
# Detect mime type if not specified
|
||||
if mimeType == "application/octet-stream":
|
||||
document_data = doc.get('documentData', '')
|
||||
mimeType = detectMimeTypeFromContent(document_data, filename, self.service)
|
||||
mimeType = detectMimeTypeFromContent(content, filename, self.service)
|
||||
|
||||
logger.info(f"Processed document: {filename}, content length: {len(str(content))}, mimeType: {mimeType}")
|
||||
|
||||
return {
|
||||
'filename': filename,
|
||||
'fileSize': fileSize,
|
||||
'mimeType': mimeType,
|
||||
'content': doc.get('documentData', ''),
|
||||
'content': content,
|
||||
'document': doc
|
||||
}
|
||||
else:
|
||||
# Unknown document type
|
||||
logger.warning(f"Unknown document type for action {action.execMethod}.{action.execAction}: {type(doc)}")
|
||||
filename = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
|
||||
base_filename = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
|
||||
|
||||
# ALWAYS add result label to filename for better document selection
|
||||
# This ensures consistent naming regardless of document type
|
||||
if hasattr(action, 'execResultLabel') and action.execResultLabel:
|
||||
result_label = action.execResultLabel.strip()
|
||||
if result_label:
|
||||
# Check if filename already starts with resultLabel to avoid duplication
|
||||
if not base_filename.startswith(f"{result_label}-"):
|
||||
base_filename = f"{result_label}-{base_filename}"
|
||||
logger.info(f"Added resultLabel '{result_label}' as prefix to fallback filename: {base_filename}")
|
||||
else:
|
||||
logger.info(f"Fallback filename already has resultLabel prefix: {base_filename}")
|
||||
else:
|
||||
logger.info(f"No resultLabel available for action {action.execMethod}.{action.execAction}")
|
||||
|
||||
filename = base_filename
|
||||
mimeType = detectMimeTypeFromContent(doc, filename, self.service)
|
||||
return {
|
||||
'filename': filename,
|
||||
|
|
@ -86,20 +158,32 @@ class DocumentGenerator:
|
|||
Returns a list of created document objects.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Creating documents from action result for {action.execMethod}.{action.execAction}")
|
||||
logger.info(f"Action result data keys: {list(action_result.data.keys())}")
|
||||
|
||||
processed_docs = self.processActionResultDocuments(action_result, action, workflow)
|
||||
logger.info(f"Processed {len(processed_docs)} documents")
|
||||
|
||||
created_documents = []
|
||||
for doc_data in processed_docs:
|
||||
for i, doc_data in enumerate(processed_docs):
|
||||
try:
|
||||
document_name = doc_data['filename']
|
||||
document_data = doc_data['content']
|
||||
mime_type = doc_data['mimeType']
|
||||
|
||||
logger.info(f"Creating document {i+1}: {document_name} (mime: {mime_type}, content length: {len(str(document_data))})")
|
||||
|
||||
# Convert document data to string content
|
||||
content = convertDocumentDataToString(document_data, getFileExtension(document_name))
|
||||
|
||||
# Skip empty or minimal content
|
||||
minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
|
||||
if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
|
||||
logger.warning(f"Empty or minimal content for document {document_name}, skipping")
|
||||
continue
|
||||
|
||||
logger.info(f"Document {document_name} has content: {len(content)} characters")
|
||||
|
||||
# Create file in system
|
||||
file_id = self.service.createFile(
|
||||
fileName=document_name,
|
||||
|
|
@ -110,54 +194,28 @@ class DocumentGenerator:
|
|||
if not file_id:
|
||||
logger.error(f"Failed to create file for document {document_name}")
|
||||
continue
|
||||
# Create document object
|
||||
|
||||
logger.info(f"Created file with ID: {file_id}")
|
||||
|
||||
# Create document object using existing file ID
|
||||
document = self.service.createDocument(
|
||||
fileName=document_name,
|
||||
mimeType=mime_type,
|
||||
content=content,
|
||||
base64encoded=False
|
||||
base64encoded=False,
|
||||
existing_file_id=file_id
|
||||
)
|
||||
if document:
|
||||
created_documents.append(document)
|
||||
logger.info(f"Created document: {document_name} with file ID: {file_id} and MIME type: {mime_type}")
|
||||
logger.info(f"Successfully created ChatDocument: {document_name} (ID: {getattr(document, 'id', 'N/A')}, fileId: {getattr(document, 'fileId', 'N/A')})")
|
||||
else:
|
||||
logger.error(f"Failed to create ChatDocument object for {document_name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating document {doc_data.get('filename', 'unknown')}: {str(e)}")
|
||||
continue
|
||||
|
||||
logger.info(f"Successfully created {len(created_documents)} documents")
|
||||
return created_documents
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating documents from action result: {str(e)}")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def get_delivered_files_and_formats(documents):
|
||||
delivered_files = []
|
||||
delivered_formats = []
|
||||
for doc in documents:
|
||||
if hasattr(doc, 'filename'):
|
||||
delivered_files.append(doc.filename)
|
||||
file_extension = getFileExtension(doc.filename)
|
||||
mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
|
||||
delivered_formats.append({
|
||||
'filename': doc.filename,
|
||||
'extension': file_extension,
|
||||
'mimeType': mime_type
|
||||
})
|
||||
elif isinstance(doc, dict) and 'filename' in doc:
|
||||
delivered_files.append(doc['filename'])
|
||||
file_extension = getFileExtension(doc['filename'])
|
||||
mime_type = doc.get('mimeType', 'application/octet-stream')
|
||||
delivered_formats.append({
|
||||
'filename': doc['filename'],
|
||||
'extension': file_extension,
|
||||
'mimeType': mime_type
|
||||
})
|
||||
else:
|
||||
delivered_files.append(f"document_{len(delivered_files)}")
|
||||
delivered_formats.append({
|
||||
'filename': f"document_{len(delivered_files)}",
|
||||
'extension': 'unknown',
|
||||
'mimeType': 'application/octet-stream'
|
||||
})
|
||||
return delivered_files, delivered_formats
|
||||
|
|
@ -1,42 +1,56 @@
|
|||
# executionState.py
|
||||
# Contains all execution state management logic extracted from managerChat.py
|
||||
|
||||
import logging
|
||||
from typing import List
|
||||
from modules.interfaces.interfaceChatModel import TaskStep, ActionExecutionResult
|
||||
from datetime import datetime, UTC
|
||||
from modules.interfaces.interfaceChatModel import TaskStep, ActionResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class TaskExecutionState:
|
||||
"""Manages state during task execution with retry logic"""
|
||||
"""Manages execution state for a task with retry logic"""
|
||||
|
||||
def __init__(self, task_step: TaskStep):
|
||||
self.task_step = task_step
|
||||
self.successful_actions: List[ActionExecutionResult] = [] # Preserved across retries
|
||||
self.failed_actions: List[ActionExecutionResult] = [] # For analysis
|
||||
self.successful_actions: List[ActionResult] = [] # Preserved across retries
|
||||
self.failed_actions: List[ActionResult] = [] # For analysis
|
||||
self.current_action_index = 0
|
||||
self.retry_count = 0
|
||||
self.improvements = []
|
||||
self.partial_results = {} # Store intermediate results
|
||||
self.max_retries = 3
|
||||
|
||||
def addSuccessfulAction(self, action_result: ActionExecutionResult):
|
||||
|
||||
def addSuccessfulAction(self, action_result: ActionResult):
|
||||
"""Add a successful action to the state"""
|
||||
self.successful_actions.append(action_result)
|
||||
if action_result.data.get('resultLabel'):
|
||||
self.partial_results[action_result.data['resultLabel']] = action_result
|
||||
|
||||
def addFailedAction(self, action_result: ActionExecutionResult):
|
||||
self.current_action_index += 1
|
||||
|
||||
def addFailedAction(self, action_result: ActionResult):
|
||||
"""Add a failed action to the state for analysis"""
|
||||
self.failed_actions.append(action_result)
|
||||
self.current_action_index += 1
|
||||
|
||||
def getAvailableResults(self) -> list:
|
||||
return [result.data.get('resultLabel', '') for result in self.successful_actions if result.data.get('resultLabel')]
|
||||
"""Get available results from successful actions"""
|
||||
results = []
|
||||
for action in self.successful_actions:
|
||||
if action.data and action.data.get('result'):
|
||||
results.append(action.data['result'])
|
||||
return results
|
||||
|
||||
def shouldRetryTask(self) -> bool:
|
||||
return len(self.successful_actions) > 0 and len(self.failed_actions) > 0
|
||||
"""Determine if task should be retried based on failure patterns"""
|
||||
return len(self.failed_actions) > 0 and self.canRetry()
|
||||
|
||||
def canRetry(self) -> bool:
|
||||
"""Check if task can be retried"""
|
||||
return self.retry_count < self.max_retries
|
||||
|
||||
def incrementRetryCount(self):
|
||||
"""Increment retry count"""
|
||||
self.retry_count += 1
|
||||
|
||||
def getFailurePatterns(self) -> list:
|
||||
"""Analyze failure patterns from failed actions"""
|
||||
patterns = []
|
||||
for action in self.failed_actions:
|
||||
error = action.error.lower() if action.error else ''
|
||||
|
|
|
|||
|
|
@ -1,232 +0,0 @@
|
|||
# handlingActions.py
|
||||
# Contains all action handling functions extracted from managerChat.py
|
||||
|
||||
import logging
|
||||
import json
|
||||
import time
|
||||
from typing import Dict, Any, Optional, List, Union
|
||||
from datetime import datetime, UTC
|
||||
from modules.interfaces.interfaceChatModel import ReviewResult, ActionResult
|
||||
from .promptFactory import createResultReviewPrompt
|
||||
from modules.chat.documents.documentGeneration import DocumentGenerator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class HandlingActions:
|
||||
def __init__(self, service, chatInterface):
|
||||
self.service = service
|
||||
self.chatInterface = chatInterface
|
||||
self.documentGenerator = DocumentGenerator(service)
|
||||
|
||||
async def executeSingleAction(self, action, workflow):
|
||||
"""Execute a single action and return ActionResult with enhanced document processing"""
|
||||
try:
|
||||
enhanced_parameters = action.execParameters.copy()
|
||||
if action.expectedDocumentFormats:
|
||||
enhanced_parameters['expectedDocumentFormats'] = action.expectedDocumentFormats
|
||||
logger.info(f"Action {action.execMethod}.{action.execAction} expects formats: {action.expectedDocumentFormats}")
|
||||
result = await self.service.executeAction(
|
||||
methodName=action.execMethod,
|
||||
actionName=action.execAction,
|
||||
parameters=enhanced_parameters
|
||||
)
|
||||
result_label = action.execResultLabel
|
||||
if result.success:
|
||||
action.setSuccess()
|
||||
action.result = result.data.get("result", "")
|
||||
action.execResultLabel = result_label
|
||||
await self.createActionMessage(action, result, workflow, result_label)
|
||||
else:
|
||||
action.setError(result.error or "Action execution failed")
|
||||
processed_documents = self.documentGenerator.processActionResultDocuments(result, action, workflow)
|
||||
return ActionResult(
|
||||
success=result.success,
|
||||
data={
|
||||
"result": result.data.get("result", ""),
|
||||
"documents": processed_documents,
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"resultLabel": result_label
|
||||
},
|
||||
metadata={
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"resultLabel": result_label
|
||||
},
|
||||
validation=[],
|
||||
error=result.error or ""
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing single action: {str(e)}")
|
||||
action.setError(str(e))
|
||||
return ActionResult(
|
||||
success=False,
|
||||
data={
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"documents": []
|
||||
},
|
||||
metadata={
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction
|
||||
},
|
||||
validation=[],
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def validateActionResult(self, action_result, action, context) -> dict:
|
||||
try:
|
||||
prompt = self._createGenericValidationPrompt(action_result, action, context)
|
||||
response = await self.service.callAiTextAdvanced(prompt, "action_validation")
|
||||
validation = self._parseValidationResponse(response)
|
||||
validation['action_id'] = action.id
|
||||
validation['action_method'] = action.execMethod
|
||||
validation['action_name'] = action.execAction
|
||||
validation['result_label'] = action.execResultLabel
|
||||
return validation
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating action result: {str(e)}")
|
||||
return {
|
||||
'status': 'success',
|
||||
'reason': f'Validation failed: {str(e)}',
|
||||
'confidence': 0.5,
|
||||
'improvements': [],
|
||||
'action_id': action.id,
|
||||
'action_method': action.execMethod,
|
||||
'action_name': action.execAction,
|
||||
'result_label': action.execResultLabel
|
||||
}
|
||||
|
||||
async def createActionMessage(self, action, result, workflow, result_label=None):
|
||||
"""Create and store a message for the action result in the workflow with enhanced document processing"""
|
||||
try:
|
||||
if result_label is None:
|
||||
result_label = action.execResultLabel
|
||||
message_data = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": f"Executed action {action.execMethod}.{action.execAction}",
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": datetime.now(UTC).isoformat(),
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"documentsLabel": result_label,
|
||||
"documents": []
|
||||
}
|
||||
# Use the local createDocumentsFromActionResult method
|
||||
created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow)
|
||||
message_data["documents"] = created_documents
|
||||
message = self.chatInterface.createWorkflowMessage(message_data)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Created action message for {action.execMethod}.{action.execAction} with {len(created_documents)} documents")
|
||||
logger.debug(f"WORKFLOW STATE after createActionMessage: id={id(workflow)}, message_count={len(workflow.messages)}")
|
||||
for idx, msg in enumerate(workflow.messages):
|
||||
label = getattr(msg, 'documentsLabel', None)
|
||||
docs = getattr(msg, 'documents', None)
|
||||
logger.debug(f" Message {idx}: label='{label}', documents_count={len(docs) if docs else 0}")
|
||||
else:
|
||||
logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating action message: {str(e)}")
|
||||
|
||||
def parseActionResponse(self, response: str) -> list:
|
||||
try:
|
||||
json_start = response.find('{')
|
||||
json_end = response.rfind('}') + 1
|
||||
if json_start == -1 or json_end == 0:
|
||||
raise ValueError("No JSON found in response")
|
||||
json_str = response[json_start:json_end]
|
||||
action_data = json.loads(json_str)
|
||||
if 'actions' not in action_data:
|
||||
raise ValueError("Action response missing 'actions' field")
|
||||
return action_data['actions']
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing action response: {str(e)}")
|
||||
return []
|
||||
|
||||
def parseReviewResponse(self, response: str) -> dict:
|
||||
try:
|
||||
json_start = response.find('{')
|
||||
json_end = response.rfind('}') + 1
|
||||
if json_start == -1 or json_end == 0:
|
||||
raise ValueError("No JSON found in response")
|
||||
json_str = response[json_start:json_end]
|
||||
review = json.loads(json_str)
|
||||
if 'status' not in review:
|
||||
raise ValueError("Review response missing 'status' field")
|
||||
return review
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing review response: {str(e)}")
|
||||
return {'status': 'failed', 'reason': f'Parse error: {str(e)}'}
|
||||
|
||||
# Internal helper methods
|
||||
|
||||
def _createGenericValidationPrompt(self, action_result, action, context) -> str:
|
||||
success = action_result.success
|
||||
result_data = action_result.data
|
||||
error = action_result.error
|
||||
validation_messages = action_result.validation
|
||||
result_text = result_data.get("result", "") if isinstance(result_data, dict) else str(result_data)
|
||||
documents = result_data.get("documents", []) if isinstance(result_data, dict) else []
|
||||
doc_count = len(documents)
|
||||
expected_result_label = action.execResultLabel
|
||||
expected_format = action.execParameters.get('outputFormat', 'unknown')
|
||||
expected_document_formats = action.expectedDocumentFormats or []
|
||||
actual_result_label = result_data.get("resultLabel", "") if isinstance(result_data, dict) else ""
|
||||
result_label_match = actual_result_label == expected_result_label
|
||||
# Use DocumentGenerator for file/format extraction
|
||||
delivered_files, delivered_formats = DocumentGenerator.get_delivered_files_and_formats(documents)
|
||||
content_items = []
|
||||
if isinstance(result_data, dict):
|
||||
if 'extractedContent' in result_data:
|
||||
extracted_content = result_data['extractedContent']
|
||||
if hasattr(extracted_content, 'contents'):
|
||||
content_items = extracted_content.contents
|
||||
elif 'contents' in result_data:
|
||||
content_items = result_data['contents']
|
||||
if delivered_files and not content_items:
|
||||
content_items = [f"File content available in: {', '.join(delivered_files)}"]
|
||||
content_summary = []
|
||||
for item in content_items:
|
||||
if hasattr(item, 'label') and hasattr(item, 'metadata'):
|
||||
content_summary.append(f"{item.label}: {item.metadata.mimeType if hasattr(item.metadata, 'mimeType') else 'unknown'}")
|
||||
elif isinstance(item, str):
|
||||
content_summary.append(item)
|
||||
else:
|
||||
content_summary.append(str(item))
|
||||
return f"""You are an action result validator. Your primary focus is to validate that the action delivered the promised result files in the promised format.\n\nACTION DETAILS:\n- Method: {action.execMethod}\n- Action: {action.execAction}\n- Expected Result Label: {expected_result_label}\n- Actual Result Label: {actual_result_label}\n- Result Label Match: {result_label_match}\n- Expected Format: {expected_format}\n- Expected Document Formats: {json.dumps(expected_document_formats, indent=2) if expected_document_formats else 'None specified'}\n- Parameters: {json.dumps(action.execParameters, indent=2)}\n\nRESULT TO VALIDATE:\n- Success: {success}\n- Result Data: {result_text[:500]}{'...' if len(result_text) > 500 else ''}\n- Error: {error}\n- Validation Messages: {', '.join(validation_messages) if validation_messages else 'None'}\n- Documents Produced: {doc_count}\n- Delivered Files: {', '.join(delivered_files) if delivered_files else 'None'}\n- Delivered Formats: {json.dumps(delivered_formats, indent=2) if delivered_formats else 'None'}\n- Content Items: {', '.join(content_summary) if content_summary else 'None'}\n\nCRITICAL VALIDATION CRITERIA:\n1. **Result Label Match**: Does the action result contain the expected result label?\n2. **File Delivery**: Did the action deliver the promised result file(s)?\n3. **Format Compliance**: If expected document formats were specified, do the delivered files match the expected formats?\n4. **Content Quality**: Is the content of the delivered files usable and complete?\n5. **Content Processing**: If content extraction was expected, was it performed correctly?\n\nCONTEXT:\n- Task Description: {context.task_step.description if context.task_step else 'Unknown'}\n- Previous Results: {', '.join(context.previous_results) if context.previous_results else 'None'}\n\nVALIDATION INSTRUCTIONS:\n1. **Result Label Check**: Verify that the expected result label \"{expected_result_label}\" is present in the action result data. This is the primary success criterion.\n2. **File Delivery**: Check if files were delivered when expected. The individual filenames don't need to match the result label - focus on whether content was actually produced.\n3. **Format Compliance**: If expected document formats were specified, check if delivered files match the expected extensions and MIME types. If no formats were specified, this criterion is satisfied.\n4. **Content Quality**: If files were delivered, consider the action successful. The presence of delivered files indicates content was processed and stored.\n5. **Content Processing**: If files were delivered, assume content extraction was performed correctly. The file delivery is evidence of successful processing.\n6. **Success Criteria**: The action is successful if the result label matches AND files were delivered. If expected formats were specified, they should also match.\n\nIMPORTANT NOTES:\n- The result label must be present in the action result data for success\n- Individual filenames can be different from the result label\n- If files were delivered, consider the action successful even if content details are not provided\n- Focus on whether the action accomplished its intended purpose (file delivery)\n- Empty files should be considered failures, but delivered files indicate success\n\nREQUIRED JSON RESPONSE:\n{{\n \"status\": \"success|retry|fail\",\n \"reason\": \"Detailed explanation focusing on result label match and content quality\",\n \"confidence\": 0.0-1.0,\n \"improvements\": [\"specific improvements if needed\"],\n \"quality_score\": 1-10,\n \"missing_elements\": [\"missing result label\", \"missing files\", \"content issues\"],\n \"suggested_retry_approach\": \"Specific approach for retry if status is retry\"\n}}\n\nNOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||
|
||||
def _parseValidationResponse(self, response: str) -> dict:
|
||||
try:
|
||||
json_start = response.find('{')
|
||||
json_end = response.rfind('}') + 1
|
||||
if json_start == -1 or json_end == 0:
|
||||
raise ValueError("No JSON found in validation response")
|
||||
json_str = response[json_start:json_end]
|
||||
validation = json.loads(json_str)
|
||||
if 'status' not in validation:
|
||||
raise ValueError("Validation response missing 'status' field")
|
||||
validation.setdefault('confidence', 0.5)
|
||||
validation.setdefault('improvements', [])
|
||||
validation.setdefault('quality_score', 5)
|
||||
validation.setdefault('missing_elements', [])
|
||||
validation.setdefault('suggested_retry_approach', '')
|
||||
return validation
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing validation response: {str(e)}")
|
||||
return {
|
||||
'status': 'success',
|
||||
'reason': f'Parse error: {str(e)}',
|
||||
'confidence': 0.5,
|
||||
'improvements': [],
|
||||
'quality_score': 5,
|
||||
'missing_elements': [],
|
||||
'suggested_retry_approach': ''
|
||||
}
|
||||
|
|
@ -8,41 +8,116 @@ import time
|
|||
from typing import Dict, Any, Optional, List, Union
|
||||
from datetime import datetime, UTC
|
||||
from modules.interfaces.interfaceChatModel import (
|
||||
TaskStatus, TaskStep, TaskContext, TaskAction, ActionExecutionResult, ReviewResult, TaskPlan, WorkflowResult, TaskResult, ReviewContext
|
||||
TaskStatus, TaskStep, TaskContext, TaskAction, ReviewResult, TaskPlan, WorkflowResult, TaskResult, ReviewContext, ActionResult
|
||||
)
|
||||
from .executionState import TaskExecutionState
|
||||
from .handlingActions import HandlingActions
|
||||
from .promptFactory import createTaskPlanningPrompt, createActionDefinitionPrompt, createResultReviewPrompt
|
||||
from modules.chat.documents.documentGeneration import DocumentGenerator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class WorkflowStoppedException(Exception):
|
||||
"""Exception raised when a workflow is stopped by the user."""
|
||||
pass
|
||||
|
||||
class HandlingTasks:
|
||||
def __init__(self, chatInterface, service, workflow=None):
|
||||
self.chatInterface = chatInterface
|
||||
self.service = service
|
||||
self.workflow = workflow
|
||||
self.handlingActions = HandlingActions(service, chatInterface)
|
||||
self.documentGenerator = DocumentGenerator(service)
|
||||
|
||||
def _checkWorkflowStopped(self):
|
||||
"""
|
||||
Check if workflow has been stopped by user and raise exception if so.
|
||||
This function centralizes all workflow stop checking logic to avoid code duplication.
|
||||
"""
|
||||
try:
|
||||
# Get the current workflow status from the database to avoid stale data
|
||||
current_workflow = self.chatInterface.getWorkflow(self.service.workflow.id)
|
||||
if current_workflow and current_workflow.status == "stopped":
|
||||
logger.info("Workflow stopped by user, aborting execution")
|
||||
raise WorkflowStoppedException("Workflow was stopped by user")
|
||||
except WorkflowStoppedException:
|
||||
# Re-raise the WorkflowStoppedException immediately
|
||||
raise
|
||||
except Exception as e:
|
||||
# If we can't get the current status due to other database issues, fall back to the in-memory object
|
||||
logger.warning(f"Could not check current workflow status from database: {str(e)}")
|
||||
if self.service.workflow.status == "stopped":
|
||||
logger.info("Workflow stopped by user (from in-memory object), aborting execution")
|
||||
raise WorkflowStoppedException("Workflow was stopped by user")
|
||||
|
||||
async def generateTaskPlan(self, userInput: str, workflow) -> TaskPlan:
|
||||
"""Generate a high-level task plan for the workflow."""
|
||||
try:
|
||||
# Check workflow status before generating task plan
|
||||
self._checkWorkflowStopped()
|
||||
|
||||
logger.info(f"Generating task plan for workflow {workflow.id}")
|
||||
available_docs = self.service.getAvailableDocuments(workflow)
|
||||
|
||||
|
||||
# Check workflow status before calling AI service
|
||||
self._checkWorkflowStopped()
|
||||
|
||||
prompt = await self.service.callAiTextAdvanced(
|
||||
createTaskPlanningPrompt(self, {
|
||||
createTaskPlanningPrompt({
|
||||
'user_request': userInput,
|
||||
'available_documents': self.service.getAvailableDocuments(workflow),
|
||||
'available_documents': available_docs,
|
||||
'workflow_id': workflow.id
|
||||
})
|
||||
)
|
||||
task_plan_dict = self._parseTaskPlanResponse(prompt)
|
||||
# Inline _parseTaskPlanResponse logic
|
||||
try:
|
||||
json_start = prompt.find('{')
|
||||
json_end = prompt.rfind('}') + 1
|
||||
if json_start == -1 or json_end == 0:
|
||||
raise ValueError("No JSON found in response")
|
||||
json_str = prompt[json_start:json_end]
|
||||
task_plan_dict = json.loads(json_str)
|
||||
if 'tasks' not in task_plan_dict:
|
||||
raise ValueError("Task plan missing 'tasks' field")
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing task plan response: {str(e)}")
|
||||
task_plan_dict = {'tasks': []}
|
||||
|
||||
if not self._validateTaskPlan(task_plan_dict):
|
||||
logger.error("Generated task plan failed validation")
|
||||
logger.error(f"AI Response: {prompt}")
|
||||
logger.error(f"Parsed Task Plan: {json.dumps(task_plan_dict, indent=2)}")
|
||||
raise Exception("AI-generated task plan failed validation - AI is required for task planning")
|
||||
tasks = [TaskStep(**task_dict) for task_dict in task_plan_dict.get('tasks', [])]
|
||||
return TaskPlan(
|
||||
|
||||
tasks = []
|
||||
for task_dict in task_plan_dict.get('tasks', []):
|
||||
# Map old 'description' field to new 'objective' field
|
||||
if 'description' in task_dict and 'objective' not in task_dict:
|
||||
task_dict['objective'] = task_dict.pop('description')
|
||||
tasks.append(TaskStep(**task_dict))
|
||||
task_plan = TaskPlan(
|
||||
overview=task_plan_dict.get('overview', ''),
|
||||
tasks=tasks
|
||||
)
|
||||
|
||||
logger.info(f"Task plan generated successfully with {len(tasks)} tasks")
|
||||
|
||||
# Log the generated tasks
|
||||
for i, task in enumerate(tasks):
|
||||
logger.info(f" Task {i+1}: {task.objective}")
|
||||
if hasattr(task, 'success_criteria') and task.success_criteria:
|
||||
logger.info(f" Success criteria: {task.success_criteria}")
|
||||
|
||||
# Log the complete task plan
|
||||
logger.info("=== GENERATED TASK PLAN ===")
|
||||
logger.info(f"Overview: {task_plan.overview}")
|
||||
logger.info(f"Total tasks: {len(tasks)}")
|
||||
|
||||
# Log the RAW AI-generated task plan JSON for debugging
|
||||
logger.info("=== RAW AI TASK PLAN JSON ===")
|
||||
logger.info(f"AI Response with task plan: {prompt}")
|
||||
logger.info("=== END RAW AI TASK PLAN JSON ===")
|
||||
|
||||
return task_plan
|
||||
except Exception as e:
|
||||
logger.error(f"Error in generateTaskPlan: {str(e)}")
|
||||
raise
|
||||
|
|
@ -50,12 +125,20 @@ class HandlingTasks:
|
|||
async def generateTaskActions(self, task_step, workflow, previous_results=None, enhanced_context=None) -> List[TaskAction]:
|
||||
"""Generate actions for a given task step."""
|
||||
try:
|
||||
logger.info(f"Generating actions for task: {task_step.description}")
|
||||
# Check workflow status before generating actions
|
||||
self._checkWorkflowStopped()
|
||||
|
||||
logger.info(f"Generating actions for task: {task_step.objective}")
|
||||
|
||||
available_docs = self.service.getAvailableDocuments(workflow)
|
||||
available_connections = self.service.getConnectionReferenceList()
|
||||
|
||||
|
||||
context = enhanced_context or TaskContext(
|
||||
task_step=task_step,
|
||||
workflow=workflow,
|
||||
workflow_id=workflow.id,
|
||||
available_documents=self.service.getAvailableDocuments(workflow),
|
||||
available_documents=available_docs,
|
||||
previous_results=previous_results or [],
|
||||
improvements=[],
|
||||
retry_count=0,
|
||||
|
|
@ -66,13 +149,30 @@ class HandlingTasks:
|
|||
failed_actions=[],
|
||||
successful_actions=[]
|
||||
)
|
||||
# Check workflow status before calling AI service
|
||||
self._checkWorkflowStopped()
|
||||
|
||||
prompt = await self.service.callAiTextAdvanced(
|
||||
createActionDefinitionPrompt(self, context)
|
||||
await createActionDefinitionPrompt(context, self.service)
|
||||
)
|
||||
actions = self.handlingActions.parseActionResponse(prompt)
|
||||
# Inline parseActionResponse logic here
|
||||
json_start = prompt.find('{')
|
||||
json_end = prompt.rfind('}') + 1
|
||||
if json_start == -1 or json_end == 0:
|
||||
raise ValueError("No JSON found in response")
|
||||
json_str = prompt[json_start:json_end]
|
||||
try:
|
||||
action_data = json.loads(json_str)
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing action response JSON: {str(e)}")
|
||||
action_data = {}
|
||||
if 'actions' not in action_data:
|
||||
raise ValueError("Action response missing 'actions' field")
|
||||
actions = action_data['actions']
|
||||
if not self._validateActions(actions, context):
|
||||
logger.error("Generated actions failed validation")
|
||||
raise Exception("AI-generated actions failed validation - AI is required for action generation")
|
||||
|
||||
# Convert to TaskAction objects
|
||||
task_actions = [self.chatInterface.createTaskAction({
|
||||
"execMethod": a.get('method', 'unknown'),
|
||||
|
|
@ -82,37 +182,171 @@ class HandlingTasks:
|
|||
"expectedDocumentFormats": a.get('expectedDocumentFormats', None),
|
||||
"status": TaskStatus.PENDING
|
||||
}) for a in actions]
|
||||
return [ta for ta in task_actions if ta]
|
||||
|
||||
valid_actions = [ta for ta in task_actions if ta]
|
||||
logger.info(f"Generated {len(valid_actions)} actions for task: {task_step.objective}")
|
||||
|
||||
# Log the generated actions
|
||||
for i, action in enumerate(valid_actions):
|
||||
logger.info(f" Action {i+1}: {action.execMethod}.{action.execAction}")
|
||||
if action.expectedDocumentFormats:
|
||||
logger.info(f" Expected formats: {action.expectedDocumentFormats}")
|
||||
if action.execParameters.get('documentList'):
|
||||
logger.info(f" Input documents: {action.execParameters['documentList']}")
|
||||
|
||||
# Log the complete action plan
|
||||
logger.info("=== GENERATED ACTION PLAN ===")
|
||||
logger.info(f"Task: {task_step.objective}")
|
||||
logger.info(f"Total actions: {len(valid_actions)}")
|
||||
|
||||
# Log the RAW AI-generated action plan JSON for debugging
|
||||
logger.info("=== RAW AI ACTION PLAN JSON ===")
|
||||
logger.info(f"AI Response with parsed actions: {prompt}")
|
||||
logger.info("=== END RAW AI ACTION PLAN JSON ===")
|
||||
|
||||
return valid_actions
|
||||
except Exception as e:
|
||||
logger.error(f"Error in generateTaskActions: {str(e)}")
|
||||
return []
|
||||
|
||||
async def executeTask(self, task_step, workflow, context) -> TaskResult:
|
||||
async def executeTask(self, task_step, workflow, context, task_index=None, total_tasks=None) -> TaskResult:
|
||||
"""Execute all actions for a task step, with state management and retries."""
|
||||
logger.info(f"Executing task: {task_step.description}")
|
||||
logger.info(f"=== STARTING TASK {task_index or '?'}: {task_step.objective} ===")
|
||||
|
||||
# Create database log entry for task start in format expected by frontend
|
||||
if task_index is not None:
|
||||
if total_tasks is not None:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Executing task {task_index}/{total_tasks}",
|
||||
"type": "info"
|
||||
})
|
||||
else:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Executing task {task_index}/?",
|
||||
"type": "info"
|
||||
})
|
||||
|
||||
# Create a task start message for the user
|
||||
task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index)
|
||||
task_start_message = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": f"🚀 Starting Task {task_progress}\n\nObjective: {task_step.objective}",
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": datetime.now(UTC).isoformat(),
|
||||
"documentsLabel": f"task_{task_index}_start",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
message = self.chatInterface.createWorkflowMessage(task_start_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Task start message created for task {task_index}")
|
||||
|
||||
state = TaskExecutionState(task_step)
|
||||
retry_context = context
|
||||
max_retries = state.max_retries
|
||||
for attempt in range(max_retries):
|
||||
logger.info(f"Task execution attempt {attempt+1}/{max_retries}")
|
||||
|
||||
# Check workflow status before starting task execution
|
||||
self._checkWorkflowStopped()
|
||||
|
||||
actions = await self.generateTaskActions(task_step, workflow, previous_results=retry_context.previous_results, enhanced_context=retry_context)
|
||||
if not actions:
|
||||
logger.error("No actions defined for task step, aborting task execution")
|
||||
break
|
||||
|
||||
# Log total actions count for this task
|
||||
total_actions = len(actions)
|
||||
logger.info(f"Task {task_index or '?'} has {total_actions} actions")
|
||||
|
||||
action_results = []
|
||||
for action in actions:
|
||||
result = await self.handlingActions.executeSingleAction(action, workflow)
|
||||
for action_idx, action in enumerate(actions):
|
||||
# Check workflow status before each action execution
|
||||
self._checkWorkflowStopped()
|
||||
|
||||
# Log action start in format expected by frontend
|
||||
action_number = action_idx + 1
|
||||
logger.info(f"Task {task_index} - Starting action {action_number}/{total_actions}")
|
||||
|
||||
# Create database log entry for action start
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Task {task_index} - Starting action {action_number}/{total_actions}",
|
||||
"type": "info"
|
||||
})
|
||||
|
||||
# Create an action start message for the user
|
||||
action_start_message = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": f"⚡ Task {task_index} - Action {action_number}/{total_actions}\n\nMethod: {action.execMethod}.{action.execAction}",
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": datetime.now(UTC).isoformat(),
|
||||
"documentsLabel": f"action_{action_number}_start",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
message = self.chatInterface.createWorkflowMessage(action_start_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Action start message created for action {action_number}")
|
||||
|
||||
# Pass action index to executeSingleAction with task context
|
||||
result = await self.executeSingleAction(action, workflow, task_step, task_index, action_number, total_actions)
|
||||
action_results.append(result)
|
||||
if result.success:
|
||||
state.addSuccessfulAction(result)
|
||||
else:
|
||||
state.addFailedAction(result)
|
||||
|
||||
# Check workflow status before review
|
||||
self._checkWorkflowStopped()
|
||||
|
||||
review_result = await self.reviewTaskCompletion(task_step, actions, action_results, workflow)
|
||||
success = review_result.status == 'success'
|
||||
feedback = review_result.reason
|
||||
error = None if success else review_result.reason
|
||||
if success:
|
||||
logger.info(f"Task step '{task_step.description}' completed successfully")
|
||||
logger.info(f"=== TASK {task_index or '?'} COMPLETED SUCCESSFULLY: {task_step.objective} ===")
|
||||
|
||||
# Create database log entry for task completion
|
||||
if total_tasks is not None:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"🎯 Task {task_index}/{total_tasks} completed",
|
||||
"type": "success"
|
||||
})
|
||||
else:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"🎯 Task {task_index}/? completed",
|
||||
"type": "success"
|
||||
})
|
||||
|
||||
# Create a task completion message for the user
|
||||
task_progress = f"{task_index}/{total_tasks}" if total_tasks is not None else str(task_index)
|
||||
task_completion_message = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": f"🎯 Task {task_progress} Completed Successfully!\n\nObjective: {task_step.objective}\n\nFeedback: {feedback or 'Task completed successfully'}",
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": datetime.now(UTC).isoformat(),
|
||||
"documentsLabel": f"task_{task_index}_completion",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
message = self.chatInterface.createWorkflowMessage(task_completion_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Task completion message created for task {task_index}")
|
||||
|
||||
return TaskResult(
|
||||
taskId=task_step.id,
|
||||
status=TaskStatus.COMPLETED,
|
||||
|
|
@ -121,7 +355,7 @@ class HandlingTasks:
|
|||
error=None
|
||||
)
|
||||
elif review_result.status == 'retry' and state.canRetry():
|
||||
logger.warning(f"Task step '{task_step.description}' requires retry: {review_result.improvements}")
|
||||
logger.warning(f"Task step '{task_step.objective}' requires retry: {review_result.improvements}")
|
||||
state.incrementRetryCount()
|
||||
retry_context.retry_count = state.retry_count
|
||||
retry_context.improvements = review_result.improvements
|
||||
|
|
@ -133,7 +367,46 @@ class HandlingTasks:
|
|||
retry_context.successful_actions = state.successful_actions
|
||||
continue
|
||||
else:
|
||||
logger.error(f"Task step '{task_step.description}' failed after {attempt+1} attempts")
|
||||
logger.error(f"=== TASK {task_index or '?'} FAILED: {task_step.objective} after {attempt+1} attempts ===")
|
||||
|
||||
# Create user-facing error message for task failure
|
||||
error_message = f"❌ Task {task_index or '?'} - '{task_step.objective}' failed after {attempt+1} attempts\n\n"
|
||||
error_message += f"Objective: {task_step.objective}\n\n"
|
||||
|
||||
# Add specific error details if available
|
||||
if error:
|
||||
error_message += f"Error: {error}\n\n"
|
||||
|
||||
# Add retry information
|
||||
error_message += f"Attempts: {attempt+1}\n"
|
||||
error_message += f"Status: Will retry automatically\n\n"
|
||||
error_message += "The system will attempt to retry this task. Please wait..."
|
||||
|
||||
# Create workflow message for user
|
||||
message_data = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": error_message,
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": datetime.now(UTC).isoformat(),
|
||||
"actionId": None,
|
||||
"actionMethod": "task",
|
||||
"actionName": "task_retry",
|
||||
"documentsLabel": None,
|
||||
"documents": []
|
||||
}
|
||||
|
||||
try:
|
||||
message = self.chatInterface.createWorkflowMessage(message_data)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Created user-facing retry message for failed task: {task_step.objective}")
|
||||
else:
|
||||
logger.error(f"Failed to create user-facing retry message for failed task: {task_step.objective}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating user-facing retry message: {str(e)}")
|
||||
|
||||
return TaskResult(
|
||||
taskId=task_step.id,
|
||||
status=TaskStatus.FAILED,
|
||||
|
|
@ -141,7 +414,46 @@ class HandlingTasks:
|
|||
feedback=feedback,
|
||||
error=error
|
||||
)
|
||||
logger.error(f"Task step '{task_step.description}' failed after all retries")
|
||||
logger.error(f"=== TASK {task_index or '?'} FAILED AFTER ALL RETRIES: {task_step.objective} ===")
|
||||
|
||||
# Create user-facing error message for task failure
|
||||
error_message = f"❌ Task {task_index or '?'} - '{task_step.objective}' failed after all retries\n\n"
|
||||
error_message += f"Objective: {task_step.objective}\n\n"
|
||||
|
||||
# Add specific error details if available
|
||||
if error and error != "Task failed after all retries.":
|
||||
error_message += f"Error: {error}\n\n"
|
||||
|
||||
# Add retry information
|
||||
error_message += f"Retries attempted: {retry_context.retry_count if retry_context else 'Unknown'}\n"
|
||||
error_message += f"Status: Task failed permanently\n\n"
|
||||
error_message += "Please check the connection and try again, or contact support if the issue persists."
|
||||
|
||||
# Create workflow message for user
|
||||
message_data = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": error_message,
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": datetime.now(UTC).isoformat(),
|
||||
"actionId": None,
|
||||
"actionMethod": "task",
|
||||
"actionName": "task_failure",
|
||||
"documentsLabel": None,
|
||||
"documents": []
|
||||
}
|
||||
|
||||
try:
|
||||
message = self.chatInterface.createWorkflowMessage(message_data)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Created user-facing error message for failed task: {task_step.objective}")
|
||||
else:
|
||||
logger.error(f"Failed to create user-facing error message for failed task: {task_step.objective}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating user-facing error message: {str(e)}")
|
||||
|
||||
return TaskResult(
|
||||
taskId=task_step.id,
|
||||
status=TaskStatus.FAILED,
|
||||
|
|
@ -152,6 +464,9 @@ class HandlingTasks:
|
|||
|
||||
async def reviewTaskCompletion(self, task_step, task_actions, action_results, workflow):
|
||||
try:
|
||||
# Check workflow status before reviewing task completion
|
||||
self._checkWorkflowStopped()
|
||||
|
||||
review_context = ReviewContext(
|
||||
task_step=task_step,
|
||||
action_results=action_results,
|
||||
|
|
@ -163,23 +478,71 @@ class HandlingTasks:
|
|||
'errors': [result.error for result in action_results if not result.success]
|
||||
}
|
||||
)
|
||||
# Check workflow status before calling AI service
|
||||
self._checkWorkflowStopped()
|
||||
|
||||
# Use promptFactory for review prompt
|
||||
prompt = await createResultReviewPrompt(self, review_context)
|
||||
prompt = await createResultReviewPrompt(review_context)
|
||||
response = await self.service.callAiTextAdvanced(prompt)
|
||||
review_dict = self.handlingActions.parseReviewResponse(response)
|
||||
review_dict.setdefault('status', 'unknown')
|
||||
review_dict.setdefault('reason', 'No reason provided')
|
||||
review_dict.setdefault('quality_score', 5)
|
||||
return ReviewResult(
|
||||
status=review_dict.get('status', 'unknown'),
|
||||
reason=review_dict.get('reason', 'No reason provided'),
|
||||
improvements=review_dict.get('improvements', []),
|
||||
quality_score=review_dict.get('quality_score', 5),
|
||||
missing_outputs=review_dict.get('missing_outputs', []),
|
||||
met_criteria=review_dict.get('met_criteria', []),
|
||||
unmet_criteria=review_dict.get('unmet_criteria', []),
|
||||
confidence=review_dict.get('confidence', 0.5)
|
||||
# Inline parseReviewResponse logic here
|
||||
json_start = response.find('{')
|
||||
json_end = response.rfind('}') + 1
|
||||
if json_start == -1 or json_end == 0:
|
||||
raise ValueError("No JSON found in review response")
|
||||
json_str = response[json_start:json_end]
|
||||
try:
|
||||
review = json.loads(json_str)
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing review response JSON: {str(e)}")
|
||||
review = {}
|
||||
if 'status' not in review:
|
||||
raise ValueError("Review response missing 'status' field")
|
||||
review.setdefault('status', 'unknown')
|
||||
review.setdefault('reason', 'No reason provided')
|
||||
review.setdefault('quality_score', 5)
|
||||
|
||||
# Ensure improvements is a list
|
||||
improvements = review.get('improvements', [])
|
||||
if isinstance(improvements, str):
|
||||
# Split string into list if it's a single improvement
|
||||
improvements = [improvements.strip()] if improvements.strip() else []
|
||||
elif not isinstance(improvements, list):
|
||||
improvements = []
|
||||
|
||||
# Ensure all list fields are properly typed
|
||||
met_criteria = review.get('met_criteria', [])
|
||||
if not isinstance(met_criteria, list):
|
||||
met_criteria = []
|
||||
|
||||
unmet_criteria = review.get('unmet_criteria', [])
|
||||
if not isinstance(unmet_criteria, list):
|
||||
unmet_criteria = []
|
||||
|
||||
review_result = ReviewResult(
|
||||
status=review.get('status', 'unknown'),
|
||||
reason=review.get('reason', 'No reason provided'),
|
||||
improvements=improvements,
|
||||
quality_score=review.get('quality_score', 5),
|
||||
missing_outputs=[],
|
||||
met_criteria=met_criteria,
|
||||
unmet_criteria=unmet_criteria,
|
||||
confidence=review.get('confidence', 0.5)
|
||||
)
|
||||
|
||||
# Enhanced validation logging
|
||||
logger.info(f"VALIDATION RESULT - Task: '{task_step.objective}' - Status: {review_result.status.upper()}, Quality: {review_result.quality_score}/10")
|
||||
if review_result.status == 'success':
|
||||
logger.info(f"VALIDATION SUCCESS - Task completed successfully")
|
||||
if review_result.met_criteria:
|
||||
logger.info(f"Met criteria: {', '.join(review_result.met_criteria)}")
|
||||
elif review_result.status == 'retry':
|
||||
logger.warning(f"VALIDATION RETRY - Task requires retry: {review_result.improvements}")
|
||||
if review_result.unmet_criteria:
|
||||
logger.warning(f"Unmet criteria: {', '.join(review_result.unmet_criteria)}")
|
||||
else:
|
||||
logger.error(f"VALIDATION FAILED - Task failed: {review_result.reason}")
|
||||
|
||||
return review_result
|
||||
except Exception as e:
|
||||
logger.error(f"Error in reviewTaskCompletion: {str(e)}")
|
||||
return ReviewResult(
|
||||
|
|
@ -190,9 +553,22 @@ class HandlingTasks:
|
|||
|
||||
async def prepareTaskHandover(self, task_step, task_actions, review_result, workflow):
|
||||
try:
|
||||
# Check workflow status before preparing task handover
|
||||
self._checkWorkflowStopped()
|
||||
|
||||
# Log handover status summary
|
||||
if hasattr(review_result, 'status'):
|
||||
status = review_result.status
|
||||
if hasattr(review_result, 'met_criteria'):
|
||||
met = review_result.met_criteria
|
||||
else:
|
||||
met = []
|
||||
|
||||
|
||||
|
||||
handover_data = {
|
||||
'task_id': task_step.id,
|
||||
'task_description': task_step.description,
|
||||
'task_description': task_step.objective,
|
||||
'actions': [action.to_dict() for action in task_actions],
|
||||
'review_result': review_result.to_dict() if hasattr(review_result, 'to_dict') else review_result,
|
||||
'workflow_id': workflow.id,
|
||||
|
|
@ -204,48 +580,291 @@ class HandlingTasks:
|
|||
logger.error(f"Error in prepareTaskHandover: {str(e)}")
|
||||
return {'error': str(e)}
|
||||
|
||||
# --- Helper and validation methods (unchanged, but can be inlined or made private) ---
|
||||
|
||||
def _parseTaskPlanResponse(self, response: str) -> dict:
|
||||
# --- Helper action handling methods ---
|
||||
|
||||
async def executeSingleAction(self, action, workflow, task_step, task_index=None, action_index=None, total_actions=None):
|
||||
"""Execute a single action and return ActionResult with enhanced document processing"""
|
||||
try:
|
||||
json_start = response.find('{')
|
||||
json_end = response.rfind('}') + 1
|
||||
if json_start == -1 or json_end == 0:
|
||||
raise ValueError("No JSON found in response")
|
||||
json_str = response[json_start:json_end]
|
||||
task_plan = json.loads(json_str)
|
||||
if 'tasks' not in task_plan:
|
||||
raise ValueError("Task plan missing 'tasks' field")
|
||||
return task_plan
|
||||
# Check workflow status before executing action
|
||||
self._checkWorkflowStopped()
|
||||
|
||||
# Use passed indices or fallback to '?'
|
||||
task_num = task_index if task_index is not None else '?'
|
||||
action_num = action_index if action_index is not None else '?'
|
||||
|
||||
logger.info(f"=== TASK {task_num} ACTION {action_num}: {action.execMethod}.{action.execAction} ===")
|
||||
|
||||
# Log input parameters
|
||||
input_docs = action.execParameters.get('documentList', [])
|
||||
input_connections = action.execParameters.get('connections', [])
|
||||
logger.info(f"Input documents: {input_docs} (type: {type(input_docs)})")
|
||||
if input_connections:
|
||||
logger.info(f"Input connections: {input_connections}")
|
||||
|
||||
# Log all action parameters for debugging
|
||||
logger.info(f"All action parameters: {action.execParameters}")
|
||||
|
||||
enhanced_parameters = action.execParameters.copy()
|
||||
if action.expectedDocumentFormats:
|
||||
enhanced_parameters['expectedDocumentFormats'] = action.expectedDocumentFormats
|
||||
logger.info(f"Expected formats: {action.expectedDocumentFormats}")
|
||||
|
||||
# Check workflow status before executing the action
|
||||
self._checkWorkflowStopped()
|
||||
|
||||
result = await self.service.executeAction(
|
||||
methodName=action.execMethod,
|
||||
actionName=action.execAction,
|
||||
parameters=enhanced_parameters
|
||||
)
|
||||
result_label = action.execResultLabel
|
||||
|
||||
# Process documents from the action result
|
||||
created_documents = []
|
||||
if result.success:
|
||||
created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow)
|
||||
action.setSuccess()
|
||||
action.result = result.data.get("result", "")
|
||||
action.execResultLabel = result_label
|
||||
await self.createActionMessage(action, result, workflow, result_label, created_documents, task_step, task_index)
|
||||
|
||||
# Log action results
|
||||
logger.info(f"✓ Action completed successfully")
|
||||
|
||||
# Create database log entry for action completion
|
||||
if total_actions is not None:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"✅ Task {task_num} - Action {action_num}/{total_actions} completed",
|
||||
"type": "success"
|
||||
})
|
||||
else:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"✅ Task {task_num} - Action {action_num}/? completed",
|
||||
"type": "success"
|
||||
})
|
||||
|
||||
if created_documents:
|
||||
logger.info(f"Output documents ({len(created_documents)}):")
|
||||
for i, doc in enumerate(created_documents):
|
||||
if hasattr(doc, 'filename'):
|
||||
logger.info(f" {i+1}. {doc.filename}")
|
||||
elif isinstance(doc, dict) and 'filename' in doc:
|
||||
logger.info(f" {i+1}. {doc['filename']}")
|
||||
else:
|
||||
logger.info(f" {i+1}. {type(doc).__name__}")
|
||||
|
||||
# Log document details for debugging
|
||||
logger.info("Document details:")
|
||||
for i, doc in enumerate(created_documents):
|
||||
if hasattr(doc, 'filename'):
|
||||
logger.info(f" Doc {i+1}: filename={doc.filename}, type={type(doc)}")
|
||||
if hasattr(doc, 'id'):
|
||||
logger.info(f" ID: {doc.id}")
|
||||
if hasattr(doc, 'fileId'):
|
||||
logger.info(f" File ID: {doc.fileId}")
|
||||
elif isinstance(doc, dict):
|
||||
logger.info(f" Doc {i+1}: dict with keys: {list(doc.keys())}")
|
||||
else:
|
||||
logger.info("Output: No documents created")
|
||||
else:
|
||||
action.setError(result.error or "Action execution failed")
|
||||
logger.error(f"✗ Action failed: {result.error}")
|
||||
|
||||
# ⚠️ IMPORTANT: Create error message for failed actions so user can see what went wrong
|
||||
await self.createActionMessage(action, result, workflow, result_label, [], task_step, task_index)
|
||||
|
||||
# Create database log entry for action failure
|
||||
if total_actions is not None:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"❌ Task {task_num} - Action {action_num}/{total_actions} failed: {result.error}",
|
||||
"type": "error"
|
||||
})
|
||||
else:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"❌ Task {task_num} - Action {action_num}/? failed: {result.error}",
|
||||
"type": "error"
|
||||
})
|
||||
|
||||
# Extract document filenames for the ActionResult
|
||||
document_filenames = []
|
||||
for doc in created_documents:
|
||||
if hasattr(doc, 'filename'):
|
||||
document_filenames.append(doc.filename)
|
||||
elif isinstance(doc, dict) and 'filename' in doc:
|
||||
document_filenames.append(doc['filename'])
|
||||
|
||||
# Also include the original documents from the service result for validation
|
||||
original_documents = result.data.get("documents", [])
|
||||
|
||||
# Log action summary
|
||||
logger.info(f"=== TASK {task_num} ACTION {action_num} COMPLETED ===")
|
||||
|
||||
return ActionResult(
|
||||
success=result.success,
|
||||
data={
|
||||
"result": result.data.get("result", ""),
|
||||
"documents": created_documents, # Include actual document objects in data
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"resultLabel": result_label
|
||||
},
|
||||
documents=document_filenames, # Keep as filenames for the documents field
|
||||
metadata={
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"resultLabel": result_label
|
||||
},
|
||||
validation={},
|
||||
error=result.error or ""
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing task plan response: {str(e)}")
|
||||
return {'tasks': []}
|
||||
logger.error(f"Error executing single action: {str(e)}")
|
||||
action.setError(str(e))
|
||||
return ActionResult(
|
||||
success=False,
|
||||
data={
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"documents": []
|
||||
},
|
||||
metadata={
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction
|
||||
},
|
||||
validation={},
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def createActionMessage(self, action, result, workflow, result_label=None, created_documents=None, task_step=None, task_index=None):
|
||||
"""Create and store a message for the action result in the workflow with enhanced document processing"""
|
||||
try:
|
||||
# Check workflow status before creating action message
|
||||
self._checkWorkflowStopped()
|
||||
|
||||
if result_label is None:
|
||||
result_label = action.execResultLabel
|
||||
|
||||
# Use provided documents or process them if not provided
|
||||
if created_documents is None:
|
||||
created_documents = self.documentGenerator.createDocumentsFromActionResult(result, action, workflow)
|
||||
|
||||
# Log delivered documents
|
||||
if created_documents:
|
||||
logger.info(f"Result label: {result_label} - {len(created_documents)} documents")
|
||||
else:
|
||||
logger.info(f"Result label: {result_label} - No documents")
|
||||
|
||||
# Create a more meaningful message that includes task context
|
||||
task_objective = task_step.objective if task_step else 'Unknown task'
|
||||
|
||||
# Build a user-friendly message based on success/failure
|
||||
if result.success:
|
||||
if created_documents and len(created_documents) > 0:
|
||||
doc_names = [doc.filename if hasattr(doc, 'filename') else str(doc) for doc in created_documents[:3]]
|
||||
if len(created_documents) > 3:
|
||||
doc_names.append(f"... and {len(created_documents) - 3} more")
|
||||
|
||||
message_text = f"✅ Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} completed\n\nObjective: {task_objective}\n\nGenerated {len(created_documents)} document(s): {', '.join(doc_names)}"
|
||||
else:
|
||||
message_text = f"✅ Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} completed\n\nObjective: {task_objective}\n\nAction executed successfully"
|
||||
else:
|
||||
# ⚠️ FAILURE MESSAGE - Show error details to user
|
||||
error_details = result.error if result.error else "Unknown error occurred"
|
||||
message_text = f"❌ Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} failed\n\nObjective: {task_objective}\n\nError: {error_details}\n\nPlease check the connection and try again."
|
||||
|
||||
message_data = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": message_text,
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": datetime.now(UTC).isoformat(),
|
||||
"actionId": action.id,
|
||||
"actionMethod": action.execMethod,
|
||||
"actionName": action.execAction,
|
||||
"documentsLabel": result_label,
|
||||
"documents": created_documents
|
||||
}
|
||||
|
||||
# Add debugging for error messages
|
||||
if not result.success:
|
||||
logger.info(f"Creating ERROR message: {message_text}")
|
||||
logger.info(f"Message data: {message_data}")
|
||||
|
||||
message = self.chatInterface.createWorkflowMessage(message_data)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
logger.info(f"Message created: {action.execMethod}.{action.execAction}")
|
||||
else:
|
||||
logger.error(f"Failed to create workflow message for action {action.execMethod}.{action.execAction}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating action message: {str(e)}")
|
||||
|
||||
# --- Helper validation methods ---
|
||||
|
||||
def _validateTaskPlan(self, task_plan: Dict[str, Any]) -> bool:
|
||||
try:
|
||||
|
||||
|
||||
if not isinstance(task_plan, dict):
|
||||
logger.error("Task plan is not a dictionary")
|
||||
return False
|
||||
|
||||
if 'tasks' not in task_plan or not isinstance(task_plan['tasks'], list):
|
||||
logger.error(f"Task plan missing 'tasks' field or not a list. Found: {type(task_plan.get('tasks', 'MISSING'))}")
|
||||
return False
|
||||
|
||||
# First pass: collect all task IDs to validate dependencies
|
||||
task_ids = set()
|
||||
for task in task_plan['tasks']:
|
||||
if not isinstance(task, dict):
|
||||
logger.error(f"Task is not a dictionary: {type(task)}")
|
||||
return False
|
||||
required_fields = ['id', 'description', 'expected_outputs', 'success_criteria']
|
||||
if not all(field in task for field in required_fields):
|
||||
return False
|
||||
if task['id'] in task_ids:
|
||||
if 'id' not in task:
|
||||
logger.error(f"Task missing 'id' field: {task}")
|
||||
return False
|
||||
task_ids.add(task['id'])
|
||||
|
||||
# Second pass: validate each task
|
||||
for i, task in enumerate(task_plan['tasks']):
|
||||
|
||||
|
||||
if not isinstance(task, dict):
|
||||
logger.error(f"Task {i} is not a dictionary: {type(task)}")
|
||||
return False
|
||||
|
||||
required_fields = ['id', 'objective', 'success_criteria']
|
||||
missing_fields = [field for field in required_fields if field not in task]
|
||||
if missing_fields:
|
||||
logger.error(f"Task {i} missing required fields: {missing_fields}")
|
||||
return False
|
||||
|
||||
# Check for duplicate IDs (shouldn't happen after first pass, but safety check)
|
||||
if task['id'] in task_ids and list(task_plan['tasks']).count(task['id']) > 1:
|
||||
logger.error(f"Task {i} has duplicate ID: {task['id']}")
|
||||
return False
|
||||
|
||||
dependencies = task.get('dependencies', [])
|
||||
if not isinstance(dependencies, list):
|
||||
logger.error(f"Task {i} dependencies is not a list: {type(dependencies)}")
|
||||
return False
|
||||
|
||||
for dep in dependencies:
|
||||
if dep not in task_ids and dep != 'task_0':
|
||||
logger.error(f"Task {i} has invalid dependency: {dep} (available: {list(task_ids) + ['task_0']})")
|
||||
return False
|
||||
if 'ai_prompt' in task and not isinstance(task['ai_prompt'], str):
|
||||
return False
|
||||
|
||||
logger.info(f"Task plan validation successful with {len(task_ids)} tasks")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating task plan: {str(e)}")
|
||||
return False
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from typing import Any, Dict
|
|||
|
||||
# Prompt creation helpers extracted from managerChat.py
|
||||
|
||||
def createTaskPlanningPrompt(self, context: Dict[str, Any]) -> str:
|
||||
def createTaskPlanningPrompt(context: Dict[str, Any]) -> str:
|
||||
"""Create prompt for task planning"""
|
||||
return f"""You are a task planning AI that analyzes user requests and creates structured task plans.
|
||||
|
||||
|
|
@ -19,17 +19,16 @@ INSTRUCTIONS:
|
|||
1. Analyze the user request and available documents
|
||||
2. Break down the request into 2-4 meaningful high-level task steps
|
||||
3. Focus on business outcomes, not technical operations
|
||||
4. For document processing, create ONE task with a comprehensive AI prompt rather than multiple granular tasks
|
||||
5. Each task should produce meaningful, usable outputs
|
||||
6. Ensure proper handover between tasks using result labels
|
||||
7. Return a JSON object with the exact structure shown below
|
||||
4. Each task should produce meaningful, usable outputs
|
||||
5. Ensure proper handover between tasks using result labels
|
||||
6. Return a JSON object with the exact structure shown below
|
||||
|
||||
TASK PLANNING PRINCIPLES:
|
||||
- Combine related operations into single tasks (e.g., \"Extract and analyze all candidate profiles\" instead of separate \"read file\" and \"analyze content\" tasks)
|
||||
- Use comprehensive AI prompts for document processing rather than multiple small tasks
|
||||
- Break down complex requests into logical, sequential steps
|
||||
- Focus on business value and outcomes
|
||||
- Keep tasks at a meaningful level of abstraction
|
||||
- Each task should produce results that can be used by subsequent tasks
|
||||
- Ensure clear dependencies and handovers between tasks
|
||||
|
||||
REQUIRED JSON STRUCTURE:
|
||||
{{
|
||||
|
|
@ -37,31 +36,34 @@ REQUIRED JSON STRUCTURE:
|
|||
\"tasks\": [
|
||||
{{
|
||||
\"id\": \"task_1\",
|
||||
\"description\": \"Clear description of what this task accomplishes (business outcome)\",
|
||||
\"objective\": \"Clear business objective this task accomplishes\",
|
||||
\"dependencies\": [\"task_0\"], // IDs of tasks that must complete first
|
||||
\"expected_outputs\": [\"output1\", \"output2\"],
|
||||
\"success_criteria\": [\"criteria1\", \"criteria2\"],
|
||||
\"required_documents\": [\"doc1\", \"doc2\"],
|
||||
\"estimated_complexity\": \"low|medium|high\",
|
||||
\"ai_prompt\": \"Comprehensive AI prompt for document processing tasks (if applicable)\"
|
||||
\"estimated_complexity\": \"low|medium|high\"
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
EXAMPLES OF GOOD TASK DESCRIPTIONS:
|
||||
- \"Extract and analyze all candidate profiles to identify key qualifications and experience\"
|
||||
- \"Create evaluation matrix and rate candidates against product designer criteria\"
|
||||
- \"Generate comprehensive PowerPoint presentation for management decision\"
|
||||
- \"Store final presentation in SharePoint for specified account\"
|
||||
EXAMPLES OF GOOD TASK OBJECTIVES:
|
||||
- \"Extract key information from documents for email preparation\"
|
||||
- \"Draft professional email incorporating analyzed information\"
|
||||
- \"Send email using specified email account\"
|
||||
- \"Store email draft and confirmation in system\"
|
||||
|
||||
EXAMPLES OF BAD TASK DESCRIPTIONS:
|
||||
EXAMPLES OF GOOD SUCCESS CRITERIA:
|
||||
- \"Document analysis completed with key points identified\"
|
||||
- \"Email draft created with professional tone and clear structure\"
|
||||
- \"Email successfully sent with delivery confirmation\"
|
||||
- \"All outputs properly stored and accessible for future use\"
|
||||
|
||||
EXAMPLES OF BAD TASK OBJECTIVES:
|
||||
- \"Open and read the PDF file\" (too granular)
|
||||
- \"Identify table structure\" (technical detail)
|
||||
- \"Convert data to CSV format\" (implementation detail)
|
||||
|
||||
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||
|
||||
async def createActionDefinitionPrompt(self, context) -> str:
|
||||
async def createActionDefinitionPrompt(context, service) -> str:
|
||||
"""Create prompt for action generation with enhanced document extraction guidance and retry context"""
|
||||
task_step = context.task_step
|
||||
workflow = context.workflow
|
||||
|
|
@ -71,23 +73,32 @@ async def createActionDefinitionPrompt(self, context) -> str:
|
|||
retry_count = context.retry_count or 0
|
||||
previous_action_results = context.previous_action_results or []
|
||||
previous_review_result = context.previous_review_result
|
||||
methodList = self.service.getMethodsList()
|
||||
methodList = service.getMethodsList()
|
||||
method_actions = {}
|
||||
for sig in methodList:
|
||||
if '.' in sig:
|
||||
method, rest = sig.split('.', 1)
|
||||
action = rest.split('(')[0]
|
||||
method_actions.setdefault(method, []).append((action, sig))
|
||||
messageSummary = await self.service.summarizeChat(workflow.messages)
|
||||
docRefs = self.service.getDocumentReferenceList()
|
||||
connRefs = self.service.getConnectionReferenceList()
|
||||
all_doc_refs = docRefs.get('chat', []) + docRefs.get('history', [])
|
||||
messageSummary = await service.summarizeChat(workflow.messages)
|
||||
# Get ALL documents from the entire workflow, not just current round
|
||||
docRefs = service.getDocumentReferenceList()
|
||||
connRefs = service.getConnectionReferenceList()
|
||||
|
||||
# Get documents from current round (chat) and entire workflow history
|
||||
current_round_docs = docRefs.get('chat', [])
|
||||
workflow_history_docs = docRefs.get('history', [])
|
||||
|
||||
# Combine all documents, prioritizing current round first, then workflow history
|
||||
all_doc_refs = current_round_docs + workflow_history_docs
|
||||
|
||||
# Log document availability for debugging
|
||||
logging.debug(f"Document references - Current round: {len(current_round_docs)}, Workflow history: {len(workflow_history_docs)}, Total: {len(all_doc_refs)}")
|
||||
available_methods_str = ''
|
||||
for method, actions in method_actions.items():
|
||||
available_methods_str += f"- {method}:\n"
|
||||
for action, sig in actions:
|
||||
available_methods_str += f" - {action}: {sig}\n"
|
||||
task_ai_prompt = task_step.ai_prompt or ''
|
||||
retry_context = ""
|
||||
if retry_count > 0:
|
||||
retry_context = f"""
|
||||
|
|
@ -105,17 +116,36 @@ Previous review feedback:
|
|||
- Status: {previous_review_result.status or 'unknown'}
|
||||
- Reason: {previous_review_result.reason or 'No reason provided'}
|
||||
- Quality Score: {previous_review_result.quality_score or 0}/10
|
||||
- Missing Outputs: {', '.join(previous_review_result.missing_outputs or [])}
|
||||
- Unmet Criteria: {', '.join(previous_review_result.unmet_criteria or [])}
|
||||
"""
|
||||
expected_outputs_str = ', '.join(task_step.expected_outputs or [])
|
||||
success_criteria_str = ', '.join(task_step.success_criteria or [])
|
||||
previous_results_str = ', '.join(previous_results) if previous_results else 'None'
|
||||
improvements_str = str(improvements) if improvements else 'None'
|
||||
available_connections_str = '\n'.join(f"- {conn}" for conn in connRefs)
|
||||
available_documents_str = '\n'.join(
|
||||
f"- {doc.documentsLabel} contains {', '.join(doc.documents)}" for doc in all_doc_refs
|
||||
)
|
||||
# Build comprehensive document list showing both current round and workflow history
|
||||
if all_doc_refs:
|
||||
available_documents_str = "CURRENT ROUND DOCUMENTS:\n"
|
||||
if current_round_docs:
|
||||
for doc in current_round_docs:
|
||||
available_documents_str += f"- {doc.documentsLabel} contains {', '.join(doc.documents)}\n"
|
||||
else:
|
||||
available_documents_str += "- No documents in current round\n"
|
||||
|
||||
available_documents_str += "\nWORKFLOW HISTORY DOCUMENTS:\n"
|
||||
if workflow_history_docs:
|
||||
for doc in workflow_history_docs:
|
||||
available_documents_str += f"- {doc.documentsLabel} contains {', '.join(doc.documents)}\n"
|
||||
else:
|
||||
available_documents_str += "- No documents in workflow history\n"
|
||||
else:
|
||||
available_documents_str = "NO DOCUMENTS AVAILABLE - This workflow has no documents to process."
|
||||
|
||||
# Debug logging for document availability
|
||||
logging.debug(f"Available documents string length: {len(available_documents_str)}")
|
||||
logging.debug(f"Current round docs count: {len(current_round_docs)}")
|
||||
logging.debug(f"Workflow history docs count: {len(workflow_history_docs)}")
|
||||
logging.debug(f"Total doc refs: {len(all_doc_refs)}")
|
||||
|
||||
prompt = f"""
|
||||
You are an action generation AI that creates specific actions to accomplish a task step.
|
||||
|
||||
|
|
@ -130,12 +160,11 @@ CRITICAL DOCUMENT REFERENCE RULES:
|
|||
- NEVER invent new labels or use message IDs
|
||||
- NEVER use formats like "msg_xxx:documents" or "task_X_results" (these will fail)
|
||||
- ONLY use the exact labels shown in AVAILABLE DOCUMENTS
|
||||
- **When generating multiple actions, you may only use as input documents those that are already present in AVAILABLE DOCUMENTS or produced by actions that come earlier in the list. Do NOT use as input any document label that will be produced by a later action.**
|
||||
- When generating multiple actions, you may only use as input documents those that are already present in AVAILABLE DOCUMENTS or produced by actions that come earlier in the list. Do NOT use as input any document label that will be produced by a later action.
|
||||
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you CANNOT create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed.
|
||||
|
||||
TASK STEP: {task_step.description} (ID: {task_step.id})
|
||||
EXPECTED OUTPUTS: {expected_outputs_str}
|
||||
TASK STEP: {task_step.objective} (ID: {task_step.id})
|
||||
SUCCESS CRITERIA: {success_criteria_str}
|
||||
TASK AI PROMPT: {task_ai_prompt if task_ai_prompt else 'None provided'}
|
||||
|
||||
CONTEXT - Chat History:
|
||||
{messageSummary}
|
||||
|
|
@ -180,7 +209,8 @@ ACTION GENERATION PRINCIPLES:
|
|||
INSTRUCTIONS:
|
||||
- Generate actions to accomplish this task step using available documents, connections, and previous results
|
||||
- Use docItem for single documents and docList labels for groups of documents as shown in AVAILABLE DOCUMENTS
|
||||
- Always pass documentList as a LIST of references (docItem and/or docList)
|
||||
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you cannot create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed.
|
||||
- Always pass documentList as a LIST of references (docItem and/or docList) - this list CANNOT be empty for document extraction actions
|
||||
- For resultLabel, use the format: "task{{task_id}}_action{{action_number}}_{{short_label}}" where:
|
||||
- {{task_id}} = the current task's id (e.g., 1)
|
||||
- {{action_number}} = the sequence number of the action within the task (e.g., 2)
|
||||
|
|
@ -202,8 +232,8 @@ REQUIRED JSON STRUCTURE:
|
|||
"resultLabel": "task1_action3_analysis_results",
|
||||
"expectedDocumentFormats": [ // OPTIONAL: Specify expected document formats when needed
|
||||
{{
|
||||
"extension": ".csv",
|
||||
"mimeType": "text/csv",
|
||||
"extension": ".txt",
|
||||
"mimeType": "text/plain",
|
||||
"description": "Structured data output"
|
||||
}}
|
||||
],
|
||||
|
|
@ -314,19 +344,33 @@ EXAMPLES OF GOOD ACTIONS:
|
|||
]
|
||||
}}
|
||||
|
||||
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||
logging.debug(f"[ACTION PLAN PROMPT] Available Documents Section:\n{available_documents_str}\nUser Connections Section:\n{available_connections_str}\nAvailable Methods (summarized):\n{', '.join(method_actions.keys())}")
|
||||
6. When no documents are available (NO DOCUMENTS AVAILABLE scenario):
|
||||
{{
|
||||
"method": "document",
|
||||
"action": "generateReport",
|
||||
"parameters": {{
|
||||
"documentList": [],
|
||||
"title": "Workflow Status Report"
|
||||
}},
|
||||
"resultLabel": "task1_action1_status_report",
|
||||
"description": "Generate a status report informing the user that no documents are available for processing and requesting document upload or alternative input."
|
||||
}}
|
||||
|
||||
IMPORTANT NOTES:
|
||||
- Respond with ONLY the JSON object. Do not include any explanatory text.
|
||||
- Before creating any document extraction action, verify that AVAILABLE DOCUMENTS contains actual document references.
|
||||
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", use example 6 above to create a status report action instead of document extraction."""
|
||||
logging.debug(f"[ACTION PLAN PROMPT] Available Documents Section:\n{available_documents_str}\nUser Connections Section:\n{available_connections_str}\nAvailable Methods (detailed):\n{available_methods_str}")
|
||||
return prompt
|
||||
|
||||
async def createResultReviewPrompt(self, review_context) -> str:
|
||||
async def createResultReviewPrompt(review_context) -> str:
|
||||
"""Create prompt for result review"""
|
||||
task_step = review_context.task_step
|
||||
step_result = review_context.step_result or {}
|
||||
step_result_serializable = {
|
||||
'task_step': {
|
||||
'id': task_step.id,
|
||||
'description': task_step.description,
|
||||
'expected_outputs': task_step.expected_outputs or [],
|
||||
'objective': task_step.objective,
|
||||
'success_criteria': task_step.success_criteria or []
|
||||
},
|
||||
'action_results': [],
|
||||
|
|
@ -337,7 +381,12 @@ async def createResultReviewPrompt(self, review_context) -> str:
|
|||
}
|
||||
for action_result in (review_context.action_results or []):
|
||||
documents_metadata = []
|
||||
for doc in (action_result.documents or []):
|
||||
|
||||
# FIX: Look for documents in the correct place - action_result.data.documents contains actual document objects
|
||||
# action_result.documents only contains document references (strings)
|
||||
documents_to_check = action_result.data.get("documents", [])
|
||||
|
||||
for doc in documents_to_check:
|
||||
if hasattr(doc, 'filename'):
|
||||
documents_metadata.append({
|
||||
'filename': doc.filename,
|
||||
|
|
@ -350,6 +399,14 @@ async def createResultReviewPrompt(self, review_context) -> str:
|
|||
'fileSize': doc.get('fileSize', 0),
|
||||
'mimeType': doc.get('mimeType', 'unknown')
|
||||
})
|
||||
elif isinstance(doc, str):
|
||||
# Handle case where documents are just filenames
|
||||
documents_metadata.append({
|
||||
'filename': doc,
|
||||
'fileSize': 0,
|
||||
'mimeType': 'unknown'
|
||||
})
|
||||
|
||||
serializable_action_result = {
|
||||
'status': 'completed' if action_result.success else 'failed',
|
||||
'result_summary': action_result.data.get('result', '')[:200] + '...' if len(action_result.data.get('result', '')) > 200 else action_result.data.get('result', ''),
|
||||
|
|
@ -367,39 +424,60 @@ async def createResultReviewPrompt(self, review_context) -> str:
|
|||
}
|
||||
step_result_serializable['action_results'].append(serializable_action_result)
|
||||
step_result_json = json.dumps(step_result_serializable, indent=2, ensure_ascii=False)
|
||||
expected_outputs_str = ', '.join(task_step.expected_outputs or [])
|
||||
success_criteria_str = ', '.join(task_step.success_criteria or [])
|
||||
return f"""You are a result review AI that evaluates task step completion and decides on next actions.
|
||||
return f"""You are a result review AI that evaluates task step completion with BASIC validation.
|
||||
|
||||
TASK STEP: {task_step.description}
|
||||
EXPECTED OUTPUTS: {expected_outputs_str}
|
||||
TASK STEP: {task_step.objective}
|
||||
SUCCESS CRITERIA: {success_criteria_str}
|
||||
|
||||
STEP RESULT: {step_result_json}
|
||||
|
||||
INSTRUCTIONS:
|
||||
1. Evaluate if the task step was completed successfully
|
||||
2. Check if all expected outputs were produced
|
||||
3. Verify if success criteria were met
|
||||
4. Decide on next action: continue, retry, or fail
|
||||
5. If retry, provide specific improvements needed
|
||||
BASIC VALIDATION RULES:
|
||||
1. SUCCESS if: Action completed AND (documents were produced OR meaningful text output exists)
|
||||
2. RETRY if: Action failed due to technical issues that can be fixed
|
||||
3. FAILED if: Action completely failed with no recoverable output
|
||||
|
||||
IMPORTANT NOTES:
|
||||
- Actions can produce either text results OR documents (or both)
|
||||
- Empty result_summary is acceptable if documents were produced (documents_count > 0)
|
||||
- Focus on whether the action achieved its intended purpose, not just text output
|
||||
- Document-based actions (like file extractions) often have empty text results but successful document outputs
|
||||
- Check the 'success_indicator' field: 'documents' means success via document output, 'text_result' means success via text, 'none' means no output
|
||||
VALIDATION PRINCIPLES:
|
||||
- Be GENEROUS with success - if the action achieved its basic purpose, mark as success
|
||||
- Focus on FUNCTIONALITY, not perfection
|
||||
- Document outputs are PRIMARY indicators of success
|
||||
- Text outputs are SECONDARY indicators
|
||||
- Only retry for CLEAR technical issues, not minor imperfections
|
||||
- Don't be picky about formatting or minor details
|
||||
- Check if ANY documents were produced (documents_count > 0)
|
||||
- If documents were produced, consider it a SUCCESS
|
||||
|
||||
EXAMPLES OF SUCCESS:
|
||||
- Document extraction produced a file (even if imperfect)
|
||||
- Text analysis provided meaningful insights
|
||||
- Data processing completed with results
|
||||
- Any action that produced documents (documents_count > 0)
|
||||
|
||||
EXAMPLES OF RETRY:
|
||||
- Technical errors (API failures, timeouts)
|
||||
- Missing required inputs
|
||||
- Clear implementation bugs
|
||||
|
||||
EXAMPLES OF FAILED:
|
||||
- Complete system failures
|
||||
- No output whatsoever
|
||||
- Unrecoverable errors
|
||||
- Actions with documents_count = 0 AND no meaningful text output
|
||||
|
||||
REQUIRED JSON STRUCTURE:
|
||||
{{
|
||||
"status": "success|retry|failed",
|
||||
"reason": "Explanation of the decision",
|
||||
"improvements": "Specific improvements for retry (if status is retry)",
|
||||
"reason": "Brief explanation",
|
||||
"improvements": ["specific technical fixes only"],
|
||||
"quality_score": 1-10,
|
||||
"missing_outputs": ["output1", "output2"],
|
||||
"met_criteria": ["criteria1", "criteria2"],
|
||||
"unmet_criteria": ["criteria3", "criteria4"]
|
||||
"met_criteria": ["basic functionality achieved"],
|
||||
"unmet_criteria": []
|
||||
}}
|
||||
|
||||
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
||||
VALIDATION LOGIC:
|
||||
- If ANY action has documents_count > 0, mark as SUCCESS
|
||||
- If ALL actions have documents_count = 0 AND no meaningful text output, mark as FAILED
|
||||
- Only mark as RETRY for clear technical issues that can be fixed
|
||||
- Focus on actual document production and functionality, not specific output names
|
||||
|
||||
NOTE: Respond with ONLY the JSON object. Be GENEROUS with success ratings."""
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
import logging
|
||||
from typing import Dict, Any, List
|
||||
from modules.interfaces.interfaceAppModel import User
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow, UserInputRequest, TaskStep, TaskAction, ActionExecutionResult, ReviewResult, TaskPlan, WorkflowResult, TaskContext
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow, UserInputRequest, TaskStep, TaskAction, ActionResult, ReviewResult, TaskPlan, WorkflowResult, TaskContext
|
||||
from modules.chat.serviceCenter import ServiceCenter
|
||||
from modules.interfaces.interfaceChatObjects import ChatObjects
|
||||
from .handling.handlingTasks import HandlingTasks
|
||||
from .handling.handlingTasks import HandlingTasks, WorkflowStoppedException
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -30,44 +30,72 @@ class ChatManager:
|
|||
"""Unified Workflow Execution"""
|
||||
try:
|
||||
logger.info(f"Starting unified workflow execution for workflow {workflow.id}")
|
||||
logger.debug(f"User request: {userInput.prompt}")
|
||||
|
||||
# Phase 1: High-Level Task Planning
|
||||
task_plan = await self.handlingTasks.planHighLevelTasks(userInput.userRequest, workflow)
|
||||
logger.info("Phase 1: Generating task plan")
|
||||
task_plan = await self.handlingTasks.generateTaskPlan(userInput.prompt, workflow)
|
||||
if not task_plan or not task_plan.tasks:
|
||||
raise Exception("No tasks generated in task plan.")
|
||||
workflow.taskPlan = task_plan
|
||||
# Phase 2-5: For each task, define actions, execute, review, and handover
|
||||
|
||||
# Phase 2-5: For each task, execute and get results
|
||||
total_tasks = len(task_plan.tasks)
|
||||
logger.info(f"Phase 2: Executing {total_tasks} tasks")
|
||||
all_task_results = []
|
||||
previous_results = []
|
||||
for idx, task_step in enumerate(task_plan.tasks):
|
||||
logger.info(f"Processing task {idx+1}/{len(task_plan.tasks)}: {task_step.description}")
|
||||
# Define actions
|
||||
previous_results = self.handlingTasks.getPreviousResults(task_step) if hasattr(self.handlingTasks, 'getPreviousResults') else []
|
||||
actions = await self.handlingTasks.defineTaskActions(task_step, workflow, previous_results=previous_results)
|
||||
if not actions:
|
||||
logger.warning(f"No actions defined for task {task_step.id}, skipping.")
|
||||
continue
|
||||
# Execute actions
|
||||
action_results = await self.handlingTasks.executeTaskActions(actions, workflow)
|
||||
# Review completion
|
||||
review_result = await self.handlingTasks.reviewTaskCompletion(task_step, actions, action_results, workflow)
|
||||
# Pass task index to executeTask method
|
||||
current_task_index = idx + 1
|
||||
|
||||
logger.info(f"Task {idx+1}/{total_tasks}: {task_step.objective}")
|
||||
|
||||
# Create task context for this task
|
||||
task_context = TaskContext(
|
||||
task_step=task_step,
|
||||
workflow=workflow,
|
||||
workflow_id=workflow.id,
|
||||
available_documents=self.service.getAvailableDocuments(workflow),
|
||||
previous_results=previous_results
|
||||
)
|
||||
# Execute task (this handles action generation, execution, and review internally)
|
||||
task_result = await self.handlingTasks.executeTask(task_step, workflow, task_context, current_task_index, total_tasks)
|
||||
# Handover
|
||||
handover_data = await self.handlingTasks.prepareTaskHandover(task_step, actions, review_result, workflow)
|
||||
handover_data = await self.handlingTasks.prepareTaskHandover(task_step, [], task_result, workflow)
|
||||
# Collect results
|
||||
all_task_results.append({
|
||||
'task_step': task_step,
|
||||
'actions': actions,
|
||||
'action_results': action_results,
|
||||
'review_result': review_result,
|
||||
'task_result': task_result,
|
||||
'handover_data': handover_data
|
||||
})
|
||||
# Update previous results for next task
|
||||
if task_result.success and task_result.feedback:
|
||||
previous_results.append(task_result.feedback)
|
||||
|
||||
# Final workflow result
|
||||
workflow_result = WorkflowResult(
|
||||
status="completed",
|
||||
task_results=all_task_results,
|
||||
workflow=workflow
|
||||
completed_tasks=len(all_task_results),
|
||||
total_tasks=len(task_plan.tasks),
|
||||
execution_time=0.0, # TODO: Calculate actual execution time
|
||||
final_results_count=len(all_task_results)
|
||||
)
|
||||
logger.info(f"Unified workflow execution completed for workflow {workflow.id}")
|
||||
logger.info(f"Unified workflow execution completed successfully for workflow {workflow.id}")
|
||||
return workflow_result
|
||||
except WorkflowStoppedException:
|
||||
logger.info(f"Workflow {workflow.id} was stopped by user")
|
||||
return WorkflowResult(
|
||||
status="stopped",
|
||||
completed_tasks=0,
|
||||
total_tasks=0,
|
||||
execution_time=0.0,
|
||||
final_results_count=0
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in executeUnifiedWorkflow: {str(e)}")
|
||||
from modules.interfaces.interfaceChatModel import WorkflowResult
|
||||
return WorkflowResult(status="failed", task_results=[], workflow=workflow)
|
||||
return WorkflowResult(
|
||||
status="failed",
|
||||
completed_tasks=0,
|
||||
total_tasks=0,
|
||||
execution_time=0.0,
|
||||
final_results_count=0
|
||||
)
|
||||
|
|
|
|||
|
|
@ -264,10 +264,12 @@ class MethodBase:
|
|||
success=success,
|
||||
data=data,
|
||||
metadata=metadata or {},
|
||||
validation=[],
|
||||
validation={},
|
||||
error=error
|
||||
)
|
||||
|
||||
def _addValidationMessage(self, result: ActionResult, message: str) -> None:
|
||||
"""Add a validation message to the result"""
|
||||
result.validation.append(message)
|
||||
if 'messages' not in result.validation:
|
||||
result.validation['messages'] = []
|
||||
result.validation['messages'].append(message)
|
||||
|
|
@ -310,54 +310,43 @@ class ServiceCenter:
|
|||
chat_exchanges = []
|
||||
history_exchanges = []
|
||||
|
||||
# Process messages in reverse order to find current chat round
|
||||
# Process messages in reverse order; "first" marks boundary: include up to and including
|
||||
# the first "first" message in the chat container, older messages in the history container
|
||||
in_current_round = True
|
||||
for message in reversed(self.workflow.messages):
|
||||
# Get document references from message
|
||||
is_first = getattr(message, "status", None) == "first"
|
||||
|
||||
# Build a DocumentExchange if message has documents
|
||||
doc_exchange = None
|
||||
if message.documents:
|
||||
# For messages with action context, create DocumentExchange with docList reference
|
||||
if message.actionId and message.documentsLabel:
|
||||
doc_ref = self.getDocumentReferenceFromMessage(message)
|
||||
if doc_ref:
|
||||
# Create DocumentExchange with single docList reference
|
||||
doc_exchange = DocumentExchange(
|
||||
documentsLabel=message.documentsLabel,
|
||||
documents=[doc_ref]
|
||||
)
|
||||
|
||||
# Add to appropriate list based on message status
|
||||
if message.status == "first":
|
||||
chat_exchanges.append(doc_exchange)
|
||||
break # Stop after finding first message
|
||||
elif message.status == "step":
|
||||
chat_exchanges.append(doc_exchange)
|
||||
else:
|
||||
history_exchanges.append(doc_exchange)
|
||||
# For regular messages, create DocumentExchange with individual docItem references
|
||||
else:
|
||||
doc_refs = []
|
||||
for doc in message.documents:
|
||||
doc_ref = self.getDocumentReferenceFromChatDocument(doc)
|
||||
doc_refs.append(doc_ref)
|
||||
|
||||
if doc_refs:
|
||||
# Create DocumentExchange with individual document references
|
||||
doc_exchange = DocumentExchange(
|
||||
documentsLabel=f"{message.id}:documents",
|
||||
documents=doc_refs
|
||||
)
|
||||
|
||||
# Add to appropriate list based on message status
|
||||
if message.status == "first":
|
||||
chat_exchanges.append(doc_exchange)
|
||||
break # Stop after finding first message
|
||||
elif message.status == "step":
|
||||
chat_exchanges.append(doc_exchange)
|
||||
else:
|
||||
history_exchanges.append(doc_exchange)
|
||||
|
||||
# Stop processing if we hit a first message
|
||||
if message.status == "first":
|
||||
break
|
||||
# Append to appropriate container based on boundary
|
||||
if doc_exchange:
|
||||
if in_current_round:
|
||||
chat_exchanges.append(doc_exchange)
|
||||
else:
|
||||
history_exchanges.append(doc_exchange)
|
||||
|
||||
# Flip boundary after including the "first" message in chat
|
||||
if in_current_round and is_first:
|
||||
in_current_round = False
|
||||
|
||||
# Sort both lists by datetime in descending order
|
||||
chat_exchanges.sort(key=lambda x: x.documentsLabel, reverse=True)
|
||||
|
|
@ -393,31 +382,7 @@ class ServiceCenter:
|
|||
try:
|
||||
# ADDED LOGGING: Print workflow id, message count, and all message labels and document counts
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.debug(f"WORKFLOW STATE at getChatDocumentsFromDocumentList: id={id(self.workflow)}, message_count={len(self.workflow.messages) if hasattr(self.workflow, 'messages') else 'N/A'}")
|
||||
for idx, message in enumerate(getattr(self.workflow, 'messages', [])):
|
||||
label = getattr(message, 'documentsLabel', None)
|
||||
docs = getattr(message, 'documents', None)
|
||||
logger.debug(f" Message {idx}: label='{label}', documents_count={len(docs) if docs else 0}")
|
||||
# DEBUG LOGGING: Print all document labels and their documents before extraction
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info("==== DEBUG: Listing all workflow message document labels and contained documents ====")
|
||||
for message in self.workflow.messages:
|
||||
label = getattr(message, 'documentsLabel', None)
|
||||
docs = getattr(message, 'documents', None)
|
||||
if label is not None:
|
||||
doc_names = []
|
||||
if docs:
|
||||
for doc in docs:
|
||||
if hasattr(doc, 'filename'):
|
||||
doc_names.append(doc.filename)
|
||||
elif isinstance(doc, dict) and 'filename' in doc:
|
||||
doc_names.append(doc['filename'])
|
||||
else:
|
||||
doc_names.append(str(doc))
|
||||
logger.info(f"Message label: '{label}' | Documents: {doc_names if doc_names else 'None'}")
|
||||
logger.info("==== END DEBUG LIST ====")
|
||||
|
||||
all_documents = []
|
||||
for doc_ref in documentList:
|
||||
# Parse reference format
|
||||
|
|
@ -434,12 +399,12 @@ class ServiceCenter:
|
|||
found = True
|
||||
break
|
||||
if not found:
|
||||
logger.warning(f"No documents found for label: {label}")
|
||||
logger.debug(f"No documents found for label: {label}")
|
||||
continue
|
||||
|
||||
# Handle structured reference format
|
||||
if len(parts) < 3:
|
||||
logger.warning(f"Invalid document reference format: {doc_ref}")
|
||||
logger.debug(f"Invalid document reference format: {doc_ref}")
|
||||
continue
|
||||
|
||||
ref_type = parts[0]
|
||||
|
|
@ -476,24 +441,53 @@ class ServiceCenter:
|
|||
return []
|
||||
|
||||
def getConnectionReferenceList(self) -> List[str]:
|
||||
"""Get list of all UserConnection objects as references"""
|
||||
"""Get list of all UserConnection objects as references with enhanced state information"""
|
||||
connections = []
|
||||
# Get user connections through AppObjects interface
|
||||
user_connections = self.interfaceApp.getUserConnections(self.user.id)
|
||||
for conn in user_connections:
|
||||
connections.append(self.getConnectionReferenceFromUserConnection(conn))
|
||||
# Get enhanced connection reference with state information
|
||||
enhanced_ref = self.getConnectionReferenceFromUserConnection(conn)
|
||||
connections.append(enhanced_ref)
|
||||
# Sort by connection reference
|
||||
return sorted(connections)
|
||||
|
||||
def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str:
|
||||
"""Get connection reference from UserConnection"""
|
||||
return f"connection:{connection.authority}:{connection.externalUsername}:{connection.id}"
|
||||
"""Get connection reference from UserConnection with enhanced state information"""
|
||||
# Get token information to check if it's expired
|
||||
token = None
|
||||
token_status = "unknown"
|
||||
try:
|
||||
token = self.interfaceApp.getToken(connection.authority.value)
|
||||
if token:
|
||||
if hasattr(token, 'expiresAt') and token.expiresAt:
|
||||
import time
|
||||
current_time = time.time()
|
||||
if current_time > token.expiresAt:
|
||||
token_status = "expired"
|
||||
else:
|
||||
token_status = "valid"
|
||||
else:
|
||||
token_status = "no_expiration"
|
||||
else:
|
||||
token_status = "no_token"
|
||||
except Exception as e:
|
||||
token_status = f"error: {str(e)}"
|
||||
|
||||
# Build enhanced reference with state information
|
||||
base_ref = f"connection:{connection.authority.value}:{connection.externalUsername}:{connection.id}"
|
||||
state_info = f" [status:{connection.status.value}, token:{token_status}]"
|
||||
|
||||
return base_ref + state_info
|
||||
|
||||
def getUserConnectionFromConnectionReference(self, connectionReference: str) -> Optional[UserConnection]:
|
||||
"""Get UserConnection from reference string"""
|
||||
"""Get UserConnection from reference string (handles both old and enhanced formats)"""
|
||||
try:
|
||||
# Parse reference format: connection:{authority}:{username}:{id}
|
||||
parts = connectionReference.split(':')
|
||||
# Parse reference format: connection:{authority}:{username}:{id} [status:..., token:...]
|
||||
# Remove state information if present
|
||||
base_reference = connectionReference.split(' [')[0]
|
||||
|
||||
parts = base_reference.split(':')
|
||||
if len(parts) != 4 or parts[0] != "connection":
|
||||
return None
|
||||
|
||||
|
|
@ -506,7 +500,7 @@ class ServiceCenter:
|
|||
|
||||
# Find matching connection
|
||||
for conn in user_connections:
|
||||
if str(conn.id) == conn_id and conn.authority == authority and conn.externalUsername == username:
|
||||
if str(conn.id) == conn_id and conn.authority.value == authority and conn.externalUsername == username:
|
||||
return conn
|
||||
return None
|
||||
|
||||
|
|
@ -700,16 +694,16 @@ Please provide a clear summary of this message."""
|
|||
async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
||||
"""Extract content from ChatDocument using prompt"""
|
||||
try:
|
||||
# Extract file data from ChatDocument
|
||||
if document.data:
|
||||
fileData = document.data.encode('utf-8') if isinstance(document.data, str) else document.data
|
||||
else:
|
||||
# Try to get file data from service center if document has fileId
|
||||
if hasattr(document, 'fileId') and document.fileId:
|
||||
fileData = self.getFileData(document.fileId)
|
||||
else:
|
||||
logger.error(f"No file data available in document: {document}")
|
||||
raise ValueError("No file data available in document")
|
||||
# ChatDocument is just a reference, so we need to get file data using fileId
|
||||
if not hasattr(document, 'fileId') or not document.fileId:
|
||||
logger.error(f"Document {document.id} has no fileId")
|
||||
raise ValueError("Document has no fileId")
|
||||
|
||||
# Get file data from service center using document's fileId
|
||||
fileData = self.getFileData(document.fileId)
|
||||
if not fileData:
|
||||
logger.error(f"No file data found for fileId: {document.fileId}")
|
||||
raise ValueError("No file data found for document")
|
||||
|
||||
# Get filename and mime type from document
|
||||
filename = document.filename if hasattr(document, 'filename') else "document"
|
||||
|
|
@ -739,11 +733,11 @@ Please provide a clear summary of this message."""
|
|||
"""Extract content from file data directly using prompt"""
|
||||
try:
|
||||
return await self.documentProcessor.processFileData(
|
||||
prompt=prompt,
|
||||
fileData=fileData,
|
||||
filename=filename,
|
||||
mimeType=mimeType,
|
||||
base64Encoded=base64Encoded,
|
||||
prompt=prompt,
|
||||
documentId=documentId
|
||||
)
|
||||
except Exception as e:
|
||||
|
|
@ -771,15 +765,19 @@ Please provide a clear summary of this message."""
|
|||
|
||||
return file_item.id
|
||||
|
||||
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True) -> ChatDocument:
|
||||
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, existing_file_id: str = None) -> ChatDocument:
|
||||
"""Create document from file data object created by AI call"""
|
||||
# First create the file and get its ID
|
||||
file_id = self.createFile(fileName, mimeType, content, base64encoded)
|
||||
# Use existing file ID if provided, otherwise create new file
|
||||
if existing_file_id:
|
||||
file_id = existing_file_id
|
||||
else:
|
||||
# First create the file and get its ID
|
||||
file_id = self.createFile(fileName, mimeType, content, base64encoded)
|
||||
|
||||
# Get file info for metadata
|
||||
file_info = self.interfaceComponent.getFile(file_id)
|
||||
|
||||
# Create document with file reference
|
||||
# Create document with file reference (ChatDocument is just a reference, not a data container)
|
||||
return ChatDocument(
|
||||
id=str(uuid.uuid4()),
|
||||
fileId=file_id,
|
||||
|
|
@ -807,8 +805,7 @@ Please provide a clear summary of this message."""
|
|||
bytesReceived=bytesReceived
|
||||
)
|
||||
|
||||
# Log the stats event
|
||||
logger.debug(f"Workflow stats updated - Event: {eventLabel}, Sent: {bytesSent}, Received: {bytesReceived}, Tokens: {tokenCount}")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating workflow stats: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -383,6 +383,22 @@ class DatabaseConnector:
|
|||
self._tablesCache = {}
|
||||
self._tableMetadataCache = {}
|
||||
|
||||
def clearTableCache(self, table: str) -> None:
|
||||
"""Clears cache for a specific table to ensure fresh data."""
|
||||
if table in self._tablesCache:
|
||||
del self._tablesCache[table]
|
||||
logger.debug(f"Cleared cache for table: {table}")
|
||||
|
||||
if table in self._tableMetadataCache:
|
||||
del self._tableMetadataCache[table]
|
||||
logger.debug(f"Cleared metadata cache for table: {table}")
|
||||
|
||||
def clearAllCache(self) -> None:
|
||||
"""Clears all cache to ensure completely fresh data."""
|
||||
self._tablesCache.clear()
|
||||
self._tableMetadataCache.clear()
|
||||
logger.debug("Cleared all database cache")
|
||||
|
||||
# Public API
|
||||
|
||||
def getTables(self) -> List[str]:
|
||||
|
|
|
|||
|
|
@ -62,33 +62,42 @@ class AiCalls:
|
|||
Advanced text processing using Anthropic.
|
||||
Fallback to OpenAI if Anthropic is overloaded or rate-limited.
|
||||
"""
|
||||
messages = []
|
||||
if context:
|
||||
messages.append({
|
||||
"role": "system",
|
||||
"content": context
|
||||
})
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
})
|
||||
if hasattr(self, 'userLanguage') and self.userLanguage:
|
||||
ltext = f"Please respond in '{self.userLanguage}' language."
|
||||
if messages and messages[0]["role"] == "system":
|
||||
if "language" not in messages[0]["content"].lower():
|
||||
messages[0]["content"] = f"{ltext} {messages[0]['content']}"
|
||||
else:
|
||||
messages.insert(0, {
|
||||
"role": "system",
|
||||
"content": ltext
|
||||
})
|
||||
# For Anthropic, we need to handle system content differently
|
||||
# Anthropic expects system content in a top-level parameter, not as a message role
|
||||
try:
|
||||
response = await self.anthropicService.callAiBasic(messages)
|
||||
# Create messages without system role for Anthropic
|
||||
anthropic_messages = []
|
||||
if hasattr(self, 'userLanguage') and self.userLanguage:
|
||||
ltext = f"Please respond in '{self.userLanguage}' language."
|
||||
if context:
|
||||
# Combine context and language instruction
|
||||
full_context = f"{ltext}\n\n{context}"
|
||||
else:
|
||||
full_context = ltext
|
||||
else:
|
||||
full_context = context
|
||||
|
||||
# Add user message
|
||||
anthropic_messages.append({
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
})
|
||||
|
||||
# Call Anthropic - let the connector handle system content conversion
|
||||
if full_context:
|
||||
# Send context as part of the user message for Anthropic
|
||||
enhanced_prompt = f"Context:\n{full_context}\n\nUser Request:\n{prompt}"
|
||||
response = await self.anthropicService.callAiBasic([
|
||||
{"role": "user", "content": enhanced_prompt}
|
||||
])
|
||||
else:
|
||||
response = await self.anthropicService.callAiBasic(anthropic_messages)
|
||||
|
||||
return response["choices"][0]["message"]["content"]
|
||||
except Exception as e:
|
||||
err_str = str(e)
|
||||
logger.warning(f"[UI NOTICE] Advanced AI failed, falling back to Basic AI (OpenAI). Reason: {err_str}")
|
||||
# Optionally, you could surface this message to the UI via a return value or error object
|
||||
# Fallback to OpenAI basic
|
||||
return await self.callAiTextBasic(prompt, context)
|
||||
|
||||
async def callAiImageBasic(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None) -> str:
|
||||
|
|
|
|||
|
|
@ -235,6 +235,9 @@ class AppAccess:
|
|||
"lastActivity": datetime.now()
|
||||
})
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self.db.clearTableCache("sessions")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -194,7 +194,11 @@ class AppObjects:
|
|||
Boolean indicating permission
|
||||
"""
|
||||
return self.access.canModify(table, recordId)
|
||||
|
||||
|
||||
def _clearTableCache(self, table: str) -> None:
|
||||
"""Clears the cache for a specific table to ensure fresh data."""
|
||||
self.db.clearTableCache(table)
|
||||
|
||||
def getInitialId(self, table: str) -> Optional[str]:
|
||||
"""Returns the initial ID for a table."""
|
||||
return self.db.getInitialId(table)
|
||||
|
|
@ -352,6 +356,9 @@ class AppObjects:
|
|||
# Save to connections table
|
||||
self.db.recordCreate("connections", connection.to_dict())
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("connections")
|
||||
|
||||
return connection
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -372,6 +379,9 @@ class AppObjects:
|
|||
# Delete connection
|
||||
self.db.recordDelete("connections", connectionId)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("connections")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error removing user connection: {str(e)}")
|
||||
raise ValueError(f"Failed to remove user connection: {str(e)}")
|
||||
|
|
@ -379,8 +389,7 @@ class AppObjects:
|
|||
def authenticateLocalUser(self, username: str, password: str) -> Optional[User]:
|
||||
"""Authenticates a user by username and password using local authentication."""
|
||||
# Clear the users table from cache and reload it
|
||||
if "users" in self.db._tablesCache:
|
||||
del self.db._tablesCache["users"]
|
||||
self._clearTableCache("users")
|
||||
|
||||
# Get user by username
|
||||
user = self.getUserByUsername(username)
|
||||
|
|
@ -445,6 +454,9 @@ class AppObjects:
|
|||
if not createdRecord or not createdRecord.get("id"):
|
||||
raise ValueError("Failed to create user record")
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("users")
|
||||
|
||||
# Add external connection if provided
|
||||
if externalId and externalUsername:
|
||||
self.addUserConnection(
|
||||
|
|
@ -460,11 +472,8 @@ class AppObjects:
|
|||
if not createdUser or len(createdUser) == 0:
|
||||
raise ValueError("Failed to retrieve created user")
|
||||
|
||||
# Clear both table and metadata caches
|
||||
if hasattr(self.db, '_tablesCache') and "users" in self.db._tablesCache:
|
||||
del self.db._tablesCache["users"]
|
||||
if hasattr(self.db, '_tableMetadataCache') and "users" in self.db._tableMetadataCache:
|
||||
del self.db._tableMetadataCache["users"]
|
||||
# Clear cache to ensure fresh data (already done above)
|
||||
# No need for additional cache clearing since _clearTableCache("users") was called
|
||||
|
||||
return User.from_dict(createdUser[0])
|
||||
|
||||
|
|
@ -491,6 +500,9 @@ class AppObjects:
|
|||
# Update user record
|
||||
self.db.recordModify("users", userId, updatedUser.to_dict())
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("users")
|
||||
|
||||
# Get updated user
|
||||
updatedUser = self.getUser(userId)
|
||||
if not updatedUser:
|
||||
|
|
@ -562,11 +574,8 @@ class AppObjects:
|
|||
if not success:
|
||||
raise ValueError(f"Failed to delete user {userId}")
|
||||
|
||||
# Clear both table and metadata caches
|
||||
if hasattr(self.db, '_tablesCache') and "users" in self.db._tablesCache:
|
||||
del self.db._tablesCache["users"]
|
||||
if hasattr(self.db, '_tableMetadataCache') and "users" in self.db._tableMetadataCache:
|
||||
del self.db._tableMetadataCache["users"]
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("users")
|
||||
|
||||
logger.info(f"User {userId} successfully deleted")
|
||||
return True
|
||||
|
|
@ -611,6 +620,9 @@ class AppObjects:
|
|||
if not createdRecord or not createdRecord.get("id"):
|
||||
raise ValueError("Failed to create mandate record")
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("mandates")
|
||||
|
||||
return Mandate.from_dict(createdRecord)
|
||||
|
||||
def updateMandate(self, mandateId: str, updateData: Dict[str, Any]) -> Mandate:
|
||||
|
|
@ -637,6 +649,9 @@ class AppObjects:
|
|||
# Update mandate record
|
||||
self.db.recordModify("mandates", mandateId, updatedMandate.to_dict())
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("mandates")
|
||||
|
||||
# Get updated mandate
|
||||
updatedMandate = self.getMandate(mandateId)
|
||||
if not updatedMandate:
|
||||
|
|
@ -665,7 +680,12 @@ class AppObjects:
|
|||
raise ValueError(f"Cannot delete mandate {mandateId} with existing users")
|
||||
|
||||
# Delete mandate
|
||||
return self.db.recordDelete("mandates", mandateId)
|
||||
success = self.db.recordDelete("mandates", mandateId)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("mandates")
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting mandate: {str(e)}")
|
||||
|
|
@ -747,14 +767,17 @@ class AppObjects:
|
|||
# Save to database
|
||||
self.db.recordCreate("tokens", token_dict)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("tokens")
|
||||
|
||||
logger.debug(f"Token saved for user {self.currentUser.id} with authority {token.authority}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving token: {str(e)}")
|
||||
raise
|
||||
|
||||
def getToken(self, authority: AuthAuthority) -> Optional[Token]:
|
||||
"""Get the latest token for the current user and authority"""
|
||||
def getToken(self, authority: str) -> Optional[Token]:
|
||||
"""Get the latest valid token for the current user and authority"""
|
||||
try:
|
||||
# Get tokens for this user and authority
|
||||
tokens = self.db.getRecordset("tokens", recordFilter={
|
||||
|
|
@ -767,13 +790,20 @@ class AppObjects:
|
|||
|
||||
# Sort by creation date and get the latest
|
||||
tokens.sort(key=lambda x: x.get("createdAt", ""), reverse=True)
|
||||
return Token(**tokens[0])
|
||||
latest_token = Token(**tokens[0])
|
||||
|
||||
# Check if token is expired
|
||||
if latest_token.expiresAt and latest_token.expiresAt < datetime.now().timestamp():
|
||||
logger.warning(f"Token for {authority} is expired (expiresAt: {latest_token.expiresAt})")
|
||||
return None # Don't return expired tokens
|
||||
|
||||
return latest_token
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting token: {str(e)}")
|
||||
return None
|
||||
|
||||
def deleteToken(self, authority: AuthAuthority) -> None:
|
||||
def deleteToken(self, authority: str) -> None:
|
||||
"""Delete all tokens for the current user and authority"""
|
||||
try:
|
||||
# Get tokens to delete
|
||||
|
|
@ -785,7 +815,10 @@ class AppObjects:
|
|||
# Delete each token
|
||||
for token in tokens:
|
||||
self.db.recordDelete("tokens", token["id"])
|
||||
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("tokens")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting token: {str(e)}")
|
||||
raise
|
||||
|
|
|
|||
|
|
@ -13,12 +13,73 @@ from modules.shared.attributeUtils import register_model_labels, ModelMixin
|
|||
# ===== Method Models =====
|
||||
|
||||
class ActionResult(BaseModel, ModelMixin):
|
||||
"""Model for action results from a methods action"""
|
||||
"""Unified model for action results with workflow state management"""
|
||||
# Core result fields
|
||||
success: bool = Field(description="Whether the method execution was successful")
|
||||
data: Dict[str, Any] = Field(description="Result data")
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
|
||||
validation: List[str] = Field(default_factory=list, description="Validation messages")
|
||||
error: Optional[str] = Field(None, description="Error message if any")
|
||||
|
||||
# Action identification
|
||||
actionId: Optional[str] = Field(None, description="ID of the action that produced this result")
|
||||
actionMethod: Optional[str] = Field(None, description="Method of the action that produced this result")
|
||||
actionName: Optional[str] = Field(None, description="Name of the action that produced this result")
|
||||
|
||||
# Document handling
|
||||
documents: List[str] = Field(default_factory=list, description="List of document references")
|
||||
resultLabel: Optional[str] = Field(None, description="Label for the result")
|
||||
|
||||
# Validation and workflow state
|
||||
validation: Dict[str, Any] = Field(default_factory=dict, description="Validation information")
|
||||
is_retry: bool = Field(default=False, description="Whether this is a retry attempt")
|
||||
previous_error: Optional[str] = Field(None, description="Previous error message for retries")
|
||||
applied_improvements: List[str] = Field(default_factory=list, description="Improvements applied for retry")
|
||||
|
||||
@classmethod
|
||||
def success(cls, documents: List[str] = None, resultLabel: str = None, data: Dict[str, Any] = None,
|
||||
actionId: str = None, actionMethod: str = None, actionName: str = None) -> 'ActionResult':
|
||||
"""Create a successful action result"""
|
||||
return cls(
|
||||
success=True,
|
||||
data=data or {},
|
||||
documents=documents or [],
|
||||
resultLabel=resultLabel,
|
||||
actionId=actionId,
|
||||
actionMethod=actionMethod,
|
||||
actionName=actionName
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def failure(cls, error: str, data: Dict[str, Any] = None,
|
||||
actionId: str = None, actionMethod: str = None, actionName: str = None) -> 'ActionResult':
|
||||
"""Create a failed action result"""
|
||||
return cls(
|
||||
success=False,
|
||||
data=data or {},
|
||||
error=error,
|
||||
actionId=actionId,
|
||||
actionMethod=actionMethod,
|
||||
actionName=actionName
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def retry(cls, previous_result: 'ActionResult', improvements: List[str] = None) -> 'ActionResult':
|
||||
"""Create a retry action result based on a previous result"""
|
||||
return cls(
|
||||
success=previous_result.success,
|
||||
data=previous_result.data,
|
||||
metadata=previous_result.metadata,
|
||||
validation=previous_result.validation,
|
||||
error=previous_result.error,
|
||||
documents=previous_result.documents,
|
||||
resultLabel=previous_result.resultLabel,
|
||||
actionId=previous_result.actionId,
|
||||
actionMethod=previous_result.actionMethod,
|
||||
actionName=previous_result.actionName,
|
||||
is_retry=True,
|
||||
previous_error=previous_result.error,
|
||||
applied_improvements=improvements or []
|
||||
)
|
||||
|
||||
# Register labels for ActionResult
|
||||
register_model_labels(
|
||||
|
|
@ -29,7 +90,15 @@ register_model_labels(
|
|||
"data": {"en": "Data", "fr": "Données"},
|
||||
"metadata": {"en": "Metadata", "fr": "Métadonnées"},
|
||||
"validation": {"en": "Validation", "fr": "Validation"},
|
||||
"error": {"en": "Error", "fr": "Erreur"}
|
||||
"error": {"en": "Error", "fr": "Erreur"},
|
||||
"documents": {"en": "Documents", "fr": "Documents"},
|
||||
"resultLabel": {"en": "Result Label", "fr": "Étiquette du résultat"},
|
||||
"actionId": {"en": "Action ID", "fr": "ID de l'action"},
|
||||
"actionMethod": {"en": "Action Method", "fr": "Méthode de l'action"},
|
||||
"actionName": {"en": "Action Name", "fr": "Nom de l'action"},
|
||||
"is_retry": {"en": "Is Retry", "fr": "Est une nouvelle tentative"},
|
||||
"previous_error": {"en": "Previous Error", "fr": "Erreur précédente"},
|
||||
"applied_improvements": {"en": "Applied Improvements", "fr": "Améliorations appliquées"}
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -187,6 +256,7 @@ class TaskAction(BaseModel, ModelMixin):
|
|||
execResultLabel: Optional[str] = Field(None, description="Label for the set of result documents")
|
||||
# NEW: Optional document format specification
|
||||
expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field(None, description="Expected document formats (optional)")
|
||||
|
||||
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status")
|
||||
error: Optional[str] = Field(None, description="Error message if action failed")
|
||||
retryCount: int = Field(default=0, description="Number of retries attempted")
|
||||
|
|
@ -461,13 +531,11 @@ register_model_labels(
|
|||
|
||||
class TaskStep(BaseModel, ModelMixin):
|
||||
id: str
|
||||
description: str
|
||||
objective: str
|
||||
dependencies: Optional[list[str]] = []
|
||||
expected_outputs: Optional[list[str]] = []
|
||||
success_criteria: Optional[list[str]] = []
|
||||
required_documents: Optional[list[str]] = []
|
||||
estimated_complexity: Optional[str] = None
|
||||
ai_prompt: Optional[str] = None
|
||||
|
||||
|
||||
class TaskContext(BaseModel, ModelMixin):
|
||||
task_step: TaskStep
|
||||
|
|
@ -484,20 +552,6 @@ class TaskContext(BaseModel, ModelMixin):
|
|||
failed_actions: Optional[list] = []
|
||||
successful_actions: Optional[list] = []
|
||||
|
||||
class ActionExecutionResult(BaseModel, ModelMixin):
|
||||
success: bool
|
||||
data: dict
|
||||
metadata: dict = {}
|
||||
error: Optional[str] = None
|
||||
actionId: Optional[str] = None
|
||||
actionMethod: Optional[str] = None
|
||||
actionName: Optional[str] = None
|
||||
documents: Optional[list] = []
|
||||
validation: Optional[dict] = {}
|
||||
is_retry: Optional[bool] = False
|
||||
previous_error: Optional[str] = None
|
||||
applied_improvements: Optional[list[str]] = []
|
||||
|
||||
class ReviewContext(BaseModel, ModelMixin):
|
||||
task_step: TaskStep
|
||||
task_actions: Optional[list] = []
|
||||
|
|
|
|||
|
|
@ -121,6 +121,10 @@ class ChatObjects:
|
|||
"""Delegate to access control module."""
|
||||
return self.access.canModify(table, recordId)
|
||||
|
||||
def _clearTableCache(self, table: str) -> None:
|
||||
"""Clears the cache for a specific table to ensure fresh data."""
|
||||
self.db.clearTableCache(table)
|
||||
|
||||
# Utilities
|
||||
|
||||
def getInitialId(self, table: str) -> Optional[str]:
|
||||
|
|
@ -196,6 +200,9 @@ class ChatObjects:
|
|||
# Create workflow in database
|
||||
created = self.db.recordCreate("workflows", workflowData)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("workflows")
|
||||
|
||||
# Convert to ChatWorkflow model
|
||||
return ChatWorkflow(
|
||||
id=created["id"],
|
||||
|
|
@ -226,6 +233,9 @@ class ChatObjects:
|
|||
# Update workflow in database
|
||||
updated = self.db.recordModify("workflows", workflowId, workflowData)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("workflows")
|
||||
|
||||
# Convert to ChatWorkflow model
|
||||
return ChatWorkflow(
|
||||
id=updated["id"],
|
||||
|
|
@ -256,7 +266,12 @@ class ChatObjects:
|
|||
raise PermissionError(f"No permission to delete workflow {workflowId}")
|
||||
|
||||
# Delete workflow
|
||||
return self.db.recordDelete("workflows", workflowId)
|
||||
success = self.db.recordDelete("workflows", workflowId)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("workflows")
|
||||
|
||||
return success
|
||||
|
||||
# Workflow Messages
|
||||
|
||||
|
|
@ -328,6 +343,9 @@ class ChatObjects:
|
|||
# Create message in database
|
||||
createdMessage = self.db.recordCreate("workflowMessages", messageData)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("workflowMessages")
|
||||
|
||||
# Convert to ChatMessage model
|
||||
return ChatMessage(
|
||||
id=createdMessage["id"],
|
||||
|
|
@ -411,6 +429,9 @@ class ChatObjects:
|
|||
updatedMessage = self.db.recordModify("workflowMessages", messageId, messageData)
|
||||
if updatedMessage:
|
||||
logger.debug(f"Message {messageId} updated successfully")
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("workflowMessages")
|
||||
else:
|
||||
logger.warning(f"Failed to update message {messageId}")
|
||||
|
||||
|
|
@ -440,7 +461,12 @@ class ChatObjects:
|
|||
return False
|
||||
|
||||
# Delete the message from the database
|
||||
return self.db.recordDelete("workflowMessages", messageId)
|
||||
success = self.db.recordDelete("workflowMessages", messageId)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("workflowMessages")
|
||||
|
||||
return success
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting message {messageId}: {str(e)}")
|
||||
return False
|
||||
|
|
@ -653,8 +679,8 @@ class ChatObjects:
|
|||
# Create stats record in database
|
||||
self.db.recordCreate("stats", stats_record)
|
||||
|
||||
logger.debug(f"Updated workflow {workflowId} stats: {currentStats}")
|
||||
logger.debug(f"Logged stats record: {stats_record}")
|
||||
# logger.debug(f"Updated workflow {workflowId} stats: {currentStats}")
|
||||
# logger.debug(f"Logged stats record: {stats_record}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -709,6 +735,9 @@ class ChatObjects:
|
|||
# Create log in database
|
||||
createdLog = self.db.recordCreate("workflowLogs", log_model.to_dict())
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("workflowLogs")
|
||||
|
||||
# Return validated ChatLog instance
|
||||
return ChatLog(**createdLog)
|
||||
|
||||
|
|
@ -826,29 +855,34 @@ class ChatObjects:
|
|||
|
||||
# Load messages
|
||||
messages = self.getWorkflowMessages(workflowId)
|
||||
# Sort by sequence number
|
||||
messages.sort(key=lambda x: x.get("sequenceNo", 0))
|
||||
# Messages are already sorted by publishedAt in getWorkflowMessages
|
||||
|
||||
messageCount = len(messages)
|
||||
logger.debug(f"Loaded {messageCount} messages for workflow {workflowId}")
|
||||
|
||||
# Log document counts for each message
|
||||
for msg in messages:
|
||||
docCount = len(msg.get("documents", []))
|
||||
docCount = len(msg.documents) if hasattr(msg, 'documents') else 0
|
||||
if docCount > 0:
|
||||
logger.debug(f"Message {msg.get('id')} has {docCount} documents loaded from database")
|
||||
logger.debug(f"Message {msg.id} has {docCount} documents loaded from database")
|
||||
|
||||
# Load logs
|
||||
logs = self.getWorkflowLogs(workflowId)
|
||||
# Sort by timestamp (Unix timestamps)
|
||||
logs.sort(key=lambda x: float(x.get("timestamp", 0)))
|
||||
# Logs are already sorted by timestamp in getWorkflowLogs
|
||||
|
||||
# Assemble complete workflow object
|
||||
completeWorkflow = workflow.copy()
|
||||
completeWorkflow["messages"] = messages
|
||||
completeWorkflow["logs"] = logs
|
||||
|
||||
return completeWorkflow
|
||||
# Create a new ChatWorkflow object with loaded messages and logs
|
||||
return ChatWorkflow(
|
||||
id=workflow.id,
|
||||
status=workflow.status,
|
||||
name=workflow.name,
|
||||
currentRound=workflow.currentRound,
|
||||
lastActivity=workflow.lastActivity,
|
||||
startedAt=workflow.startedAt,
|
||||
logs=logs,
|
||||
messages=messages,
|
||||
stats=workflow.stats,
|
||||
mandateId=workflow.mandateId
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading workflow state: {str(e)}")
|
||||
return None
|
||||
|
|
@ -871,11 +905,34 @@ class ChatObjects:
|
|||
currentTime = self._getCurrentTimestamp()
|
||||
|
||||
if workflowId:
|
||||
# Continue existing workflow
|
||||
workflow = self.getWorkflow(workflowId)
|
||||
# Continue existing workflow - load complete state including messages
|
||||
workflow = self.loadWorkflowState(workflowId)
|
||||
if not workflow:
|
||||
raise ValueError(f"Workflow {workflowId} not found")
|
||||
|
||||
# Check if workflow is currently running and stop it first
|
||||
if workflow.status == "running":
|
||||
logger.info(f"Stopping running workflow {workflowId} before processing new prompt")
|
||||
|
||||
# Stop the running workflow
|
||||
workflow.status = "stopped"
|
||||
workflow.lastActivity = currentTime
|
||||
self.updateWorkflow(workflowId, {
|
||||
"status": "stopped",
|
||||
"lastActivity": currentTime
|
||||
})
|
||||
|
||||
# Add log entry for workflow stop
|
||||
self.createWorkflowLog({
|
||||
"workflowId": workflowId,
|
||||
"message": "Workflow stopped for new prompt",
|
||||
"type": "info",
|
||||
"status": "stopped",
|
||||
"progress": 100
|
||||
})
|
||||
|
||||
# Wait a moment for any running processes to detect the stop
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Update workflow - set status back to running for resumed workflows
|
||||
self.updateWorkflow(workflowId, {
|
||||
|
|
@ -1050,6 +1107,9 @@ class ChatObjects:
|
|||
# Create task in database
|
||||
createdTask = self.db.recordCreate("tasks", taskData)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("tasks")
|
||||
|
||||
# Convert to TaskItem model
|
||||
task = TaskItem(
|
||||
id=createdTask["id"],
|
||||
|
|
@ -1102,6 +1162,9 @@ class ChatObjects:
|
|||
# Update task in database
|
||||
updatedTask = self.db.recordModify("tasks", taskId, taskData)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("tasks")
|
||||
|
||||
# Convert to TaskItem model
|
||||
return TaskItem(
|
||||
id=updatedTask["id"],
|
||||
|
|
@ -1150,6 +1213,9 @@ class ChatObjects:
|
|||
if taskId in workflowTasks:
|
||||
workflowTasks.remove(taskId)
|
||||
self.updateWorkflow(task.workflowId, {"tasks": workflowTasks})
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("tasks")
|
||||
return True
|
||||
return False
|
||||
|
||||
|
|
@ -1225,7 +1291,7 @@ class ChatObjects:
|
|||
success=createdResult.get("success", False),
|
||||
data=createdResult.get("data", {}),
|
||||
metadata=createdResult.get("metadata", {}),
|
||||
validation=createdResult.get("validation", []),
|
||||
validation=createdResult.get("validation", {}),
|
||||
error=createdResult.get("error")
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -235,6 +235,10 @@ class ComponentObjects:
|
|||
"""Delegate to access control module."""
|
||||
return self.access.canModify(table, recordId)
|
||||
|
||||
def _clearTableCache(self, table: str) -> None:
|
||||
"""Clears the cache for a specific table to ensure fresh data."""
|
||||
self.db.clearTableCache(table)
|
||||
|
||||
# Utilities
|
||||
|
||||
def getInitialId(self, table: str) -> Optional[str]:
|
||||
|
|
@ -279,6 +283,9 @@ class ComponentObjects:
|
|||
if not createdRecord or not createdRecord.get("id"):
|
||||
raise ValueError("Failed to create prompt record")
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("prompts")
|
||||
|
||||
return createdRecord
|
||||
|
||||
def updatePrompt(self, promptId: str, updateData: Dict[str, Any]) -> Dict[str, Any]:
|
||||
|
|
@ -292,6 +299,9 @@ class ComponentObjects:
|
|||
# Update prompt record directly with the update data
|
||||
self.db.recordModify("prompts", promptId, updateData)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("prompts")
|
||||
|
||||
# Get updated prompt
|
||||
updatedPrompt = self.getPrompt(promptId)
|
||||
if not updatedPrompt:
|
||||
|
|
@ -313,7 +323,13 @@ class ComponentObjects:
|
|||
if not self._canModify("prompts", promptId):
|
||||
raise PermissionError(f"No permission to delete prompt {promptId}")
|
||||
|
||||
return self.db.recordDelete("prompts", promptId)
|
||||
# Delete prompt
|
||||
success = self.db.recordDelete("prompts", promptId)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("prompts")
|
||||
|
||||
return success
|
||||
|
||||
# File Utilities
|
||||
|
||||
|
|
@ -528,6 +544,10 @@ class ComponentObjects:
|
|||
|
||||
# Store in database
|
||||
self.db.recordCreate("files", fileItem.to_dict())
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("files")
|
||||
|
||||
return fileItem
|
||||
|
||||
def updateFile(self, fileId: str, updateData: Dict[str, Any]) -> Dict[str, Any]:
|
||||
|
|
@ -545,7 +565,12 @@ class ComponentObjects:
|
|||
updateData["filename"] = self._generateUniqueFilename(updateData["filename"], fileId)
|
||||
|
||||
# Update file
|
||||
return self.db.recordModify("files", fileId, updateData)
|
||||
success = self.db.recordModify("files", fileId, updateData)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("files")
|
||||
|
||||
return success
|
||||
|
||||
def deleteFile(self, fileId: str) -> bool:
|
||||
"""Deletes a file if user has access."""
|
||||
|
|
@ -576,7 +601,12 @@ class ComponentObjects:
|
|||
logger.warning(f"Error deleting FileData for file {fileId}: {str(e)}")
|
||||
|
||||
# Delete the FileItem entry
|
||||
return self.db.recordDelete("files", fileId)
|
||||
success = self.db.recordDelete("files", fileId)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("files")
|
||||
|
||||
return success
|
||||
|
||||
except FileNotFoundError as e:
|
||||
raise
|
||||
|
|
@ -634,6 +664,10 @@ class ComponentObjects:
|
|||
}
|
||||
|
||||
self.db.recordCreate("fileData", fileDataObj)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("fileData")
|
||||
|
||||
logger.debug(f"Successfully stored data for file {fileId} (base64Encoded: {base64Encoded})")
|
||||
return True
|
||||
except Exception as e:
|
||||
|
|
@ -668,8 +702,25 @@ class ComponentObjects:
|
|||
# Decode base64 to bytes
|
||||
return base64.b64decode(data)
|
||||
else:
|
||||
# Convert text to bytes
|
||||
return data.encode('utf-8')
|
||||
# Check if this is supposed to be a binary file based on mime type
|
||||
mimeType = file.mimeType
|
||||
isTextFormat = self.isTextMimeType(mimeType)
|
||||
|
||||
if isTextFormat:
|
||||
# This is a text file, encode to bytes as expected
|
||||
return data.encode('utf-8')
|
||||
else:
|
||||
# This is a binary file that was incorrectly stored as text
|
||||
# Try to decode it as if it was base64 (common fallback scenario)
|
||||
try:
|
||||
logger.warning(f"Binary file {fileId} ({mimeType}) was stored as text, attempting base64 decode")
|
||||
return base64.b64decode(data)
|
||||
except Exception as base64_error:
|
||||
logger.error(f"Failed to decode binary file {fileId} as base64: {str(base64_error)}")
|
||||
# Last resort: return the data as-is (might be corrupted)
|
||||
logger.warning(f"Returning raw data for file {fileId} - file may be corrupted")
|
||||
return data.encode('utf-8') if isinstance(data, str) else data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file data for {fileId}: {str(e)}")
|
||||
return None
|
||||
|
|
@ -810,7 +861,11 @@ class ComponentObjects:
|
|||
self.db.recordCreate("fileData", dataUpdate)
|
||||
logger.debug(f"Created new file data for file ID {fileId} (base64Encoded: {base64Encoded})")
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
self._clearTableCache("fileData")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating data for file {fileId}: {str(e)}")
|
||||
return False
|
||||
|
|
|
|||
164
modules/methods/methodAi.py
Normal file
164
modules/methods/methodAi.py
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
"""
|
||||
AI processing method module.
|
||||
Handles direct AI calls for any type of task.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional
|
||||
import uuid
|
||||
from datetime import datetime, UTC
|
||||
|
||||
from modules.chat.methodBase import MethodBase, ActionResult, action
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MethodAi(MethodBase):
|
||||
"""AI method implementation for direct AI processing"""
|
||||
|
||||
def __init__(self, serviceCenter: Any):
|
||||
"""Initialize the AI method"""
|
||||
super().__init__(serviceCenter)
|
||||
self.name = "ai"
|
||||
self.description = "Handle direct AI processing for any type of task"
|
||||
|
||||
@action
|
||||
async def process(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Perform an AI call for any type of task with optional document references
|
||||
|
||||
Parameters:
|
||||
aiPrompt (str): The AI prompt for processing
|
||||
documentList (list, optional): List of document references to include in context
|
||||
expectedDocumentFormats (list, optional): Expected output formats with extension, mimeType, description
|
||||
processingMode (str, optional): Processing mode ('basic', 'advanced', 'detailed') - defaults to 'basic'
|
||||
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
||||
customInstructions (str, optional): Additional custom instructions for the AI
|
||||
"""
|
||||
try:
|
||||
aiPrompt = parameters.get("aiPrompt")
|
||||
documentList = parameters.get("documentList", [])
|
||||
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||
processingMode = parameters.get("processingMode", "basic")
|
||||
includeMetadata = parameters.get("includeMetadata", True)
|
||||
customInstructions = parameters.get("customInstructions", "")
|
||||
|
||||
if not aiPrompt:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error="AI prompt is required"
|
||||
)
|
||||
|
||||
# Build context from documents if provided
|
||||
context = ""
|
||||
if documentList:
|
||||
chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
|
||||
if chatDocuments:
|
||||
context_parts = []
|
||||
for doc in chatDocuments:
|
||||
fileId = doc.fileId
|
||||
file_data = self.service.getFileData(fileId)
|
||||
file_info = self.service.getFileInfo(fileId)
|
||||
|
||||
if file_data:
|
||||
try:
|
||||
# Try to decode as text for context
|
||||
content = file_data.decode('utf-8')
|
||||
metadata_info = ""
|
||||
if file_info and includeMetadata:
|
||||
metadata_info = f" (Size: {file_info.get('fileSize', 'unknown')}, Type: {file_info.get('mimeType', 'unknown')})"
|
||||
|
||||
# Adjust context length based on processing mode
|
||||
max_length = 5000 if processingMode == "detailed" else 3000 if processingMode == "advanced" else 2000
|
||||
context_parts.append(f"Document: {doc.filename}{metadata_info}\nContent:\n{content[:max_length]}...")
|
||||
except UnicodeDecodeError:
|
||||
context_parts.append(f"Document: {doc.filename} [Binary content]")
|
||||
|
||||
if context_parts:
|
||||
context = "\n\n".join(context_parts)
|
||||
logger.info(f"Included {len(chatDocuments)} documents in AI context")
|
||||
|
||||
# Determine output format
|
||||
output_extension = ".txt" # Default
|
||||
output_mime_type = "text/plain" # Default
|
||||
|
||||
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||
expected_format = expectedDocumentFormats[0]
|
||||
output_extension = expected_format.get("extension", ".txt")
|
||||
output_mime_type = expected_format.get("mimeType", "text/plain")
|
||||
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||
|
||||
# Build enhanced prompt
|
||||
enhanced_prompt = aiPrompt
|
||||
|
||||
# Add processing mode instructions if specified (generic, not analysis-specific)
|
||||
if processingMode == "detailed":
|
||||
enhanced_prompt += "\n\nPlease provide a detailed response with comprehensive information."
|
||||
elif processingMode == "advanced":
|
||||
enhanced_prompt += "\n\nPlease provide an advanced response with deep insights."
|
||||
|
||||
# Add custom instructions if provided
|
||||
if customInstructions:
|
||||
enhanced_prompt += f"\n\nAdditional Instructions: {customInstructions}"
|
||||
|
||||
# Add format-specific instructions only if non-text format is requested
|
||||
if output_extension != ".txt":
|
||||
if output_extension == ".csv":
|
||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows."
|
||||
elif output_extension == ".json":
|
||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure JSON data without any markdown formatting, code blocks, or additional text. Output only the JSON content."
|
||||
elif output_extension == ".xml":
|
||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure XML data without any markdown formatting, code blocks, or additional text. Output only the XML content."
|
||||
else:
|
||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure {output_extension.upper()} data without any markdown formatting, code blocks, or additional text."
|
||||
|
||||
# Call appropriate AI service based on processing mode
|
||||
logger.info(f"Executing AI call with mode: {processingMode}, prompt length: {len(enhanced_prompt)}")
|
||||
if context:
|
||||
logger.info(f"Including context from {len(documentList)} documents")
|
||||
|
||||
if processingMode in ["advanced", "detailed"]:
|
||||
result = await self.service.callAiTextAdvanced(enhanced_prompt, context)
|
||||
else:
|
||||
result = await self.service.callAiTextBasic(enhanced_prompt, context)
|
||||
|
||||
# Create result document
|
||||
timestamp = datetime.now(UTC).strftime('%Y%m%d_%H%M%S')
|
||||
filename = f"ai_{processingMode}_{timestamp}{output_extension}"
|
||||
|
||||
# Create document through service (but don't add to workflow - let calling layer handle that)
|
||||
document = self.service.createDocument(
|
||||
fileName=filename,
|
||||
mimeType=output_mime_type,
|
||||
content=result,
|
||||
base64encoded=False
|
||||
)
|
||||
|
||||
return self._createResult(
|
||||
success=True,
|
||||
data={
|
||||
"result": result,
|
||||
"filename": filename,
|
||||
"documentId": document.id if hasattr(document, 'id') else None,
|
||||
"processedDocuments": len(documentList) if documentList else 0,
|
||||
"processingMode": processingMode,
|
||||
"document": document # Include the created document in the result data
|
||||
},
|
||||
metadata={
|
||||
"method": "ai.process",
|
||||
"promptLength": len(aiPrompt),
|
||||
"contextLength": len(context),
|
||||
"outputFormat": output_extension,
|
||||
"includeMetadata": includeMetadata,
|
||||
"processingMode": processingMode,
|
||||
"hasCustomInstructions": bool(customInstructions)
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in ai.process: {str(e)}")
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error=f"AI processing failed: {str(e)}"
|
||||
)
|
||||
|
|
@ -5,7 +5,6 @@ Handles document operations using the document service.
|
|||
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional
|
||||
import uuid
|
||||
from datetime import datetime, UTC
|
||||
|
||||
from modules.chat.methodBase import MethodBase, ActionResult, action
|
||||
|
|
@ -24,19 +23,19 @@ class MethodDocument(MethodBase):
|
|||
@action
|
||||
async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Extract specific content from document with ai prompt and return it in the specified format
|
||||
Extract specific content from document with AI prompt and return it in the specified format.
|
||||
|
||||
Parameters:
|
||||
documentList (str): Reference to the document list to extract content from
|
||||
aiPrompt (str): AI prompt for content extraction
|
||||
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
||||
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
||||
"""
|
||||
try:
|
||||
documentList = parameters.get("documentList")
|
||||
aiPrompt = parameters.get("aiPrompt")
|
||||
includeMetadata = parameters.get("includeMetadata", True)
|
||||
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||
includeMetadata = parameters.get("includeMetadata", True)
|
||||
|
||||
if not documentList:
|
||||
return self._createResult(
|
||||
|
|
@ -60,32 +59,7 @@ class MethodDocument(MethodBase):
|
|||
error="No documents found for the provided reference"
|
||||
)
|
||||
|
||||
# Determine output format based on expected formats
|
||||
output_extension = ".txt" # Default
|
||||
output_mime_type = "text/plain" # Default
|
||||
|
||||
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||
# Use the first expected format
|
||||
expected_format = expectedDocumentFormats[0]
|
||||
output_extension = expected_format.get("extension", ".txt")
|
||||
output_mime_type = expected_format.get("mimeType", "text/plain")
|
||||
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||
logger.info(f"Expected document formats: {expectedDocumentFormats}")
|
||||
else:
|
||||
logger.info("No expected format specified, using default .txt format")
|
||||
|
||||
# Enhance AI prompt to specify output format
|
||||
enhanced_prompt = aiPrompt
|
||||
if output_extension == ".csv":
|
||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows. Do not include ```csv or ``` markers."
|
||||
elif output_extension == ".json":
|
||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure JSON data without any markdown formatting, code blocks, or additional text. Output only the JSON content. Do not include ```json or ``` markers."
|
||||
elif output_extension == ".xml":
|
||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure XML data without any markdown formatting, code blocks, or additional text. Output only the XML content. Do not include ```xml or ``` markers."
|
||||
elif output_extension != ".txt":
|
||||
enhanced_prompt += f"\n\nCRITICAL: Deliver the result as pure {output_extension.upper()} data without any markdown formatting, code blocks, or additional text. Output only the {output_extension.upper()} content. Do not include any markdown markers."
|
||||
|
||||
# Extract content from all documents
|
||||
# Extract content from all documents using AI
|
||||
all_extracted_content = []
|
||||
file_infos = []
|
||||
|
||||
|
|
@ -99,7 +73,7 @@ class MethodDocument(MethodBase):
|
|||
continue
|
||||
|
||||
extracted_content = await self.service.extractContentFromFileData(
|
||||
prompt=enhanced_prompt, # Use enhanced prompt instead of original
|
||||
prompt=aiPrompt,
|
||||
fileData=file_data,
|
||||
filename=file_info.get('name', 'document'),
|
||||
mimeType=file_info.get('mimeType', 'application/octet-stream'),
|
||||
|
|
@ -118,21 +92,7 @@ class MethodDocument(MethodBase):
|
|||
error="No content could be extracted from any documents"
|
||||
)
|
||||
|
||||
# Extract text content from ExtractedContent objects
|
||||
text_contents = []
|
||||
for content_obj in all_extracted_content:
|
||||
if hasattr(content_obj, 'contents') and content_obj.contents:
|
||||
# Extract text from ContentItem objects
|
||||
for content_item in content_obj.contents:
|
||||
if hasattr(content_item, 'data') and content_item.data:
|
||||
text_contents.append(content_item.data)
|
||||
elif isinstance(content_obj, str):
|
||||
text_contents.append(content_obj)
|
||||
else:
|
||||
# Fallback: convert to string representation
|
||||
text_contents.append(str(content_obj))
|
||||
|
||||
# Process each document individually and create separate output files
|
||||
# Process each document individually with its own format conversion
|
||||
output_documents = []
|
||||
|
||||
for i, (chatDocument, extracted_content) in enumerate(zip(chatDocuments, all_extracted_content)):
|
||||
|
|
@ -140,36 +100,68 @@ class MethodDocument(MethodBase):
|
|||
text_content = ""
|
||||
if hasattr(extracted_content, 'contents') and extracted_content.contents:
|
||||
# Extract text from ContentItem objects
|
||||
text_parts = []
|
||||
for content_item in extracted_content.contents:
|
||||
if hasattr(content_item, 'data') and content_item.data:
|
||||
text_content += content_item.data + "\n"
|
||||
text_parts.append(content_item.data)
|
||||
text_content = "\n".join(text_parts)
|
||||
elif isinstance(extracted_content, str):
|
||||
text_content = extracted_content
|
||||
else:
|
||||
# Fallback: convert to string representation
|
||||
text_content = str(extracted_content)
|
||||
|
||||
# Create output filename based on original filename
|
||||
# Get the expected format for this document (or use default)
|
||||
target_format = None
|
||||
if expectedDocumentFormats and i < len(expectedDocumentFormats):
|
||||
target_format = expectedDocumentFormats[i]
|
||||
elif expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||
# If fewer formats than documents, use the last format for remaining documents
|
||||
target_format = expectedDocumentFormats[-1]
|
||||
|
||||
# Determine output format and filename
|
||||
if target_format:
|
||||
target_extension = target_format.get("extension", ".txt")
|
||||
target_mime_type = target_format.get("mimeType", "text/plain")
|
||||
|
||||
# Check if format conversion is needed
|
||||
if target_extension not in [".txt", ".text"] or target_mime_type != "text/plain":
|
||||
logger.info(f"Converting document {i+1} to format: {target_extension} ({target_mime_type})")
|
||||
# Use AI to convert format
|
||||
formatted_content = await self._convertContentToFormat(text_content, target_format)
|
||||
final_content = formatted_content
|
||||
final_mime_type = target_mime_type
|
||||
final_extension = target_extension
|
||||
else:
|
||||
logger.info(f"Document {i+1}: No format conversion needed, using plain text")
|
||||
final_content = text_content
|
||||
final_mime_type = "text/plain"
|
||||
final_extension = ".txt"
|
||||
else:
|
||||
logger.info(f"Document {i+1}: No expected format specified, using plain text")
|
||||
final_content = text_content
|
||||
final_mime_type = "text/plain"
|
||||
final_extension = ".txt"
|
||||
|
||||
# Create output filename based on original filename and target format
|
||||
original_filename = chatDocument.filename
|
||||
base_name = original_filename.rsplit('.', 1)[0] if '.' in original_filename else original_filename
|
||||
output_filename = f"{base_name}_extracted_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}"
|
||||
output_filename = f"{base_name}_extracted_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{final_extension}"
|
||||
|
||||
# Create result data for this document
|
||||
result_data = {
|
||||
"documentCount": 1,
|
||||
"content": text_content,
|
||||
"content": final_content,
|
||||
"originalFilename": original_filename,
|
||||
"fileInfos": [file_infos[i]] if includeMetadata and i < len(file_infos) else None,
|
||||
"timestamp": datetime.now(UTC).isoformat()
|
||||
}
|
||||
|
||||
logger.info(f"Created output document: {output_filename} with {len(text_content)} characters")
|
||||
logger.info(f"Content preview: {text_content[:200]}...")
|
||||
logger.info(f"Created output document: {output_filename} with {len(final_content)} characters")
|
||||
|
||||
output_documents.append({
|
||||
"documentName": output_filename,
|
||||
"documentData": result_data,
|
||||
"mimeType": output_mime_type
|
||||
"mimeType": final_mime_type
|
||||
})
|
||||
|
||||
return self._createResult(
|
||||
|
|
@ -186,6 +178,327 @@ class MethodDocument(MethodBase):
|
|||
error=str(e)
|
||||
)
|
||||
|
||||
@action
|
||||
async def generate(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Generate documents in specific formats from document references.
|
||||
This action automatically extracts content from documents and converts it to the specified format.
|
||||
|
||||
Parameters:
|
||||
documentList (list): List of document references to extract content from
|
||||
expectedDocumentFormats (list): Expected document formats with extension, mimeType, description
|
||||
originalDocuments (list, optional): List of original document names
|
||||
includeMetadata (bool, optional): Whether to include metadata (default: True)
|
||||
"""
|
||||
try:
|
||||
document_list = parameters.get("documentList", [])
|
||||
expected_document_formats = parameters.get("expectedDocumentFormats", [])
|
||||
original_documents = parameters.get("originalDocuments", [])
|
||||
include_metadata = parameters.get("includeMetadata", True)
|
||||
|
||||
if not document_list:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error="Document list is required for generation"
|
||||
)
|
||||
|
||||
if not expected_document_formats or len(expected_document_formats) == 0:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error="Expected document formats specification is required"
|
||||
)
|
||||
|
||||
# Get chat documents for original documents list
|
||||
chat_documents = self.service.getChatDocumentsFromDocumentList(document_list)
|
||||
logger.info(f"Found {len(chat_documents)} chat documents")
|
||||
|
||||
if not chat_documents:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error="No documents found for the provided documentList reference"
|
||||
)
|
||||
|
||||
# Update original documents list if not provided
|
||||
if not original_documents:
|
||||
original_documents = [doc.filename if hasattr(doc, 'filename') else str(doc.id) for doc in chat_documents]
|
||||
|
||||
# Process each document individually with its own format conversion
|
||||
output_documents = []
|
||||
|
||||
for i, chat_document in enumerate(chat_documents):
|
||||
# Extract content from this document
|
||||
# ChatDocument is just a reference, so we need to get file data using fileId
|
||||
content = ""
|
||||
if hasattr(chat_document, 'fileId') and chat_document.fileId:
|
||||
# Need to get file data
|
||||
file_data = self.service.getFileData(chat_document.fileId)
|
||||
if file_data:
|
||||
if isinstance(file_data, bytes):
|
||||
content = file_data.decode('utf-8', errors='ignore')
|
||||
else:
|
||||
content = str(file_data)
|
||||
else:
|
||||
logger.warning(f"Could not get file data for document {i+1}, skipping")
|
||||
continue
|
||||
else:
|
||||
logger.warning(f"Document {i+1} has no fileId, skipping")
|
||||
continue
|
||||
|
||||
if not content:
|
||||
logger.warning(f"Could not extract content from document {i+1}, skipping")
|
||||
continue
|
||||
|
||||
logger.info(f"Extracted content from document {i+1}: {len(content)} characters")
|
||||
|
||||
# Get the expected format for this document (or use default)
|
||||
target_format = None
|
||||
if i < len(expected_document_formats):
|
||||
target_format = expected_document_formats[i]
|
||||
elif len(expected_document_formats) > 0:
|
||||
# If fewer formats than documents, use the last format for remaining documents
|
||||
target_format = expected_document_formats[-1]
|
||||
|
||||
if not target_format:
|
||||
logger.warning(f"No expected format for document {i+1}, skipping")
|
||||
continue
|
||||
|
||||
# Use AI to convert format
|
||||
formatted_content = await self._convertContentToFormat(content, target_format)
|
||||
if not formatted_content:
|
||||
logger.warning(f"Failed to format document {i+1}, skipping")
|
||||
continue
|
||||
|
||||
target_extension = target_format.get("extension", ".txt")
|
||||
target_mime_type = target_format.get("mimeType", "text/plain")
|
||||
|
||||
# Create output filename
|
||||
timestamp = datetime.now(UTC).strftime('%Y%m%d_%H%M%S')
|
||||
if i < len(original_documents):
|
||||
base_name = original_documents[i].rsplit('.', 1)[0] if '.' in original_documents[i] else original_documents[i]
|
||||
else:
|
||||
base_name = f"document_{i+1}"
|
||||
output_filename = f"{base_name}_generated_{timestamp}{target_extension}"
|
||||
|
||||
# Create result data
|
||||
result_data = {
|
||||
"documentCount": 1,
|
||||
"content": formatted_content,
|
||||
"outputFormat": target_format,
|
||||
"originalDocument": original_documents[i] if i < len(original_documents) else f"document_{i+1}",
|
||||
"timestamp": datetime.now(UTC).isoformat()
|
||||
}
|
||||
|
||||
logger.info(f"Generated document: {output_filename} with {len(formatted_content)} characters")
|
||||
|
||||
output_documents.append({
|
||||
"documentName": output_filename,
|
||||
"documentData": result_data,
|
||||
"mimeType": target_mime_type
|
||||
})
|
||||
|
||||
if not output_documents:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error="No documents could be generated"
|
||||
)
|
||||
|
||||
return self._createResult(
|
||||
success=True,
|
||||
data={
|
||||
"documents": output_documents
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating document: {str(e)}")
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def _convertContentToFormat(self, content: str, target_format: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Helper function to convert content to the specified format using AI.
|
||||
"""
|
||||
try:
|
||||
extension = target_format.get("extension", ".txt")
|
||||
mime_type = target_format.get("mimeType", "text/plain")
|
||||
|
||||
logger.info(f"Converting content to format: {extension} ({mime_type})")
|
||||
|
||||
# Create AI prompt for format conversion
|
||||
format_prompts = {
|
||||
".csv": f"""
|
||||
Convert the following content into a proper CSV format.
|
||||
|
||||
Requirements:
|
||||
1. Output ONLY the CSV data without any markdown, code blocks, or additional text
|
||||
2. Use appropriate headers based on the content
|
||||
3. Ensure proper CSV formatting with commas and quotes where needed
|
||||
4. Make the data easily readable and importable into spreadsheet applications
|
||||
|
||||
Content to convert:
|
||||
{content}
|
||||
|
||||
Generate ONLY the CSV data:
|
||||
""",
|
||||
|
||||
".json": f"""
|
||||
Convert the following content into a proper JSON format.
|
||||
|
||||
Requirements:
|
||||
1. Output ONLY the JSON data without any markdown, code blocks, or additional text
|
||||
2. Structure the data logically with appropriate keys and values
|
||||
3. Ensure valid JSON syntax
|
||||
4. Make the data easily parseable and readable
|
||||
|
||||
Content to convert:
|
||||
{content}
|
||||
|
||||
Generate ONLY the JSON data:
|
||||
""",
|
||||
|
||||
".xml": f"""
|
||||
Convert the following content into a proper XML format.
|
||||
|
||||
Requirements:
|
||||
1. Output ONLY the XML data without any markdown, code blocks, or additional text
|
||||
2. Use appropriate XML tags and structure
|
||||
3. Ensure valid XML syntax
|
||||
4. Make the data easily parseable and readable
|
||||
|
||||
Content to convert:
|
||||
{content}
|
||||
|
||||
Generate ONLY the XML data:
|
||||
""",
|
||||
|
||||
".html": f"""
|
||||
Convert the following content into a proper HTML format.
|
||||
|
||||
Requirements:
|
||||
1. Output ONLY the HTML data without any markdown, code blocks, or additional text
|
||||
2. Use appropriate HTML tags and structure
|
||||
3. Ensure valid HTML syntax
|
||||
4. Make the data easily readable in web browsers
|
||||
|
||||
Content to convert:
|
||||
{content}
|
||||
|
||||
Generate ONLY the HTML data:
|
||||
""",
|
||||
|
||||
".md": f"""
|
||||
Convert the following content into a proper Markdown format.
|
||||
|
||||
Requirements:
|
||||
1. Output ONLY the Markdown data without any code blocks or additional text
|
||||
2. Use appropriate Markdown syntax for headers, lists, emphasis, etc.
|
||||
3. Structure the content logically
|
||||
4. Make the data easily readable and convertible to other formats
|
||||
|
||||
Content to convert:
|
||||
{content}
|
||||
|
||||
Generate ONLY the Markdown data:
|
||||
"""
|
||||
}
|
||||
|
||||
# Get the appropriate prompt for the target format
|
||||
if extension in format_prompts:
|
||||
ai_prompt = format_prompts[extension]
|
||||
else:
|
||||
# Generic format conversion
|
||||
ai_prompt = f"""
|
||||
Convert the following content into {extension.upper()} format.
|
||||
|
||||
Requirements:
|
||||
1. Output ONLY the {extension.upper()} data without any markdown, code blocks, or additional text
|
||||
2. Use appropriate formatting for {extension.upper()} files
|
||||
3. Ensure the output is valid and usable
|
||||
4. Make the data easily readable and importable
|
||||
|
||||
Content to convert:
|
||||
{content}
|
||||
|
||||
Generate ONLY the {extension.upper()} data:
|
||||
"""
|
||||
|
||||
# Call AI to generate the formatted content
|
||||
logger.info(f"Calling AI for {extension} format conversion")
|
||||
formatted_content = await self.service.callAiTextBasic(ai_prompt, content)
|
||||
|
||||
if not formatted_content or formatted_content.strip() == "":
|
||||
logger.warning("AI format conversion failed, using fallback")
|
||||
return self._generateFallbackFormattedContent(content, extension, mime_type)
|
||||
|
||||
# Clean up the AI response
|
||||
formatted_content = formatted_content.strip()
|
||||
|
||||
# Remove markdown code blocks if present
|
||||
if formatted_content.startswith("```") and formatted_content.endswith("```"):
|
||||
lines = formatted_content.split('\n')
|
||||
if len(lines) > 2:
|
||||
formatted_content = '\n'.join(lines[1:-1])
|
||||
|
||||
return formatted_content
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in AI format conversion: {str(e)}")
|
||||
return self._generateFallbackFormattedContent(content, extension, mime_type)
|
||||
|
||||
def _generateFallbackFormattedContent(self, content: str, extension: str, mime_type: str) -> str:
|
||||
"""
|
||||
Generate fallback formatted content when AI conversion fails.
|
||||
"""
|
||||
try:
|
||||
if extension == ".csv":
|
||||
# Simple CSV fallback - split by lines and create basic CSV
|
||||
lines = content.strip().split('\n')
|
||||
if lines:
|
||||
# Create a simple CSV with line numbers and content
|
||||
csv_lines = ["Line,Content"]
|
||||
for i, line in enumerate(lines, 1):
|
||||
# Escape quotes and wrap in quotes if comma present
|
||||
if ',' in line:
|
||||
line = f'"{line.replace(chr(34), chr(34) + chr(34))}"'
|
||||
csv_lines.append(f"{i},{line}")
|
||||
return '\n'.join(csv_lines)
|
||||
return "Line,Content\n1,No content available"
|
||||
|
||||
elif extension == ".json":
|
||||
# Simple JSON fallback
|
||||
content_escaped = content.replace('"', '\\"')
|
||||
timestamp = datetime.now(UTC).isoformat()
|
||||
return f'{{"content": "{content_escaped}", "format": "json", "timestamp": "{timestamp}"}}'
|
||||
|
||||
elif extension == ".xml":
|
||||
# Simple XML fallback
|
||||
timestamp = datetime.now(UTC).isoformat()
|
||||
return f'<?xml version="1.0" encoding="UTF-8"?>\n<document>\n<content>{content}</content>\n<format>xml</format>\n<timestamp>{timestamp}</timestamp>\n</document>'
|
||||
|
||||
elif extension == ".html":
|
||||
# Simple HTML fallback
|
||||
timestamp = datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||
return f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>Generated Document</title></head>\n<body>\n<pre>{content}</pre>\n<p><em>Generated on {timestamp}</em></p>\n</body>\n</html>'
|
||||
|
||||
elif extension == ".md":
|
||||
# Simple Markdown fallback
|
||||
timestamp = datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||
return f"# Generated Document\n\n{content}\n\n---\n*Generated on {timestamp}*"
|
||||
|
||||
else:
|
||||
# Generic fallback - return content as-is
|
||||
return content
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in fallback format conversion: {str(e)}")
|
||||
return content
|
||||
|
||||
@action
|
||||
async def generateReport(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
|
|
@ -209,6 +522,8 @@ class MethodDocument(MethodBase):
|
|||
)
|
||||
|
||||
chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
|
||||
logger.info(f"Retrieved {len(chatDocuments)} chat documents for report generation")
|
||||
|
||||
if not chatDocuments:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
|
|
@ -217,7 +532,7 @@ class MethodDocument(MethodBase):
|
|||
)
|
||||
|
||||
# Generate HTML report
|
||||
html_content = self._generateHtmlReport(chatDocuments, title, includeMetadata)
|
||||
html_content = await self._generateHtmlReport(chatDocuments, title, includeMetadata)
|
||||
|
||||
# Create output filename
|
||||
timestamp = datetime.now(UTC).strftime('%Y%m%d_%H%M%S')
|
||||
|
|
@ -250,7 +565,7 @@ class MethodDocument(MethodBase):
|
|||
error=str(e)
|
||||
)
|
||||
|
||||
def _generateHtmlReport(self, chatDocuments: List[Any], title: str, includeMetadata: bool) -> str:
|
||||
async def _generateHtmlReport(self, chatDocuments: List[Any], title: str, includeMetadata: bool) -> str:
|
||||
"""
|
||||
Generate a comprehensive HTML report using AI from all input documents.
|
||||
"""
|
||||
|
|
@ -261,15 +576,30 @@ class MethodDocument(MethodBase):
|
|||
|
||||
for doc in chatDocuments:
|
||||
content = ""
|
||||
if hasattr(doc, 'content') and doc.content:
|
||||
content = doc.content.strip()
|
||||
elif hasattr(doc, 'data') and doc.data:
|
||||
content = doc.data.strip()
|
||||
logger.info(f"Processing document: type={type(doc)}")
|
||||
|
||||
# Get actual file content using the fileId reference
|
||||
try:
|
||||
file_data = self.service.getFileData(doc.fileId)
|
||||
if file_data:
|
||||
# Convert bytes to string
|
||||
if isinstance(file_data, bytes):
|
||||
content = file_data.decode('utf-8')
|
||||
else:
|
||||
content = str(file_data)
|
||||
logger.info(f" Retrieved content from file: {len(content)} characters")
|
||||
else:
|
||||
logger.warning(f" No file data found for fileId: {doc.fileId}")
|
||||
except Exception as e:
|
||||
logger.error(f" Error retrieving file data: {str(e)}")
|
||||
|
||||
# Skip empty documents
|
||||
if content:
|
||||
validDocuments.append(doc)
|
||||
allContent.append(f"Document: {doc.filename}\n{content}\n")
|
||||
logger.info(f" Added document to valid documents list")
|
||||
else:
|
||||
logger.warning(f" Skipping document with no content")
|
||||
|
||||
if not validDocuments:
|
||||
# If no valid documents, create a simple report
|
||||
|
|
@ -304,7 +634,7 @@ class MethodDocument(MethodBase):
|
|||
|
||||
# Call AI to generate the report
|
||||
logger.info(f"Generating AI report for {len(validDocuments)} documents")
|
||||
aiReport = self.service.callAiTextBasic(aiPrompt, combinedContent)
|
||||
aiReport = await self.service.callAiTextBasic(aiPrompt, combinedContent)
|
||||
|
||||
# If AI call fails, fall back to basic HTML
|
||||
if not aiReport or aiReport.strip() == "":
|
||||
|
|
@ -313,9 +643,16 @@ class MethodDocument(MethodBase):
|
|||
|
||||
# Clean up the AI response and ensure it's valid HTML
|
||||
if not aiReport.strip().startswith('<html'):
|
||||
# Check if AI response already contains a title/header
|
||||
has_title = any(title.lower() in aiReport.lower() for title in [title, "outlook", "report", "status"])
|
||||
|
||||
# Wrap the AI content in proper HTML structure
|
||||
html = ["<html><head><meta charset='utf-8'><title>" + title + "</title></head><body>"]
|
||||
html.append(f"<h1>{title}</h1>")
|
||||
|
||||
# Only add the title if the AI response doesn't already have one
|
||||
if not has_title:
|
||||
html.append(f"<h1>{title}</h1>")
|
||||
|
||||
html.append(f"<p><b>Generated:</b> {datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')}</p>")
|
||||
html.append(f"<p><b>Total Documents Analyzed:</b> {len(validDocuments)}</p>")
|
||||
html.append("<hr>")
|
||||
|
|
@ -336,7 +673,25 @@ class MethodDocument(MethodBase):
|
|||
Generate a basic HTML report as fallback when AI generation fails.
|
||||
"""
|
||||
html = ["<html><head><meta charset='utf-8'><title>" + title + "</title></head><body>"]
|
||||
html.append(f"<h1>{title}</h1>")
|
||||
|
||||
# Check if any document content already contains a title/header
|
||||
has_title = False
|
||||
for doc in chatDocuments:
|
||||
if hasattr(doc, 'fileId') and doc.fileId:
|
||||
try:
|
||||
file_data = self.service.getFileData(doc.fileId)
|
||||
if file_data:
|
||||
content = file_data.decode('utf-8') if isinstance(file_data, bytes) else str(file_data)
|
||||
if any(title.lower() in content.lower() for title in [title, "outlook", "report", "status"]):
|
||||
has_title = True
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Only add the title if no document content already has one
|
||||
if not has_title:
|
||||
html.append(f"<h1>{title}</h1>")
|
||||
|
||||
html.append(f"<p><b>Generated:</b> {datetime.now(UTC).strftime('%Y-%m-%d %H:%M:%S UTC')}</p>")
|
||||
html.append(f"<p><b>Total Documents:</b> {len(chatDocuments)}</p>")
|
||||
|
||||
|
|
@ -354,10 +709,17 @@ class MethodDocument(MethodBase):
|
|||
|
||||
# Add document content if available
|
||||
content = ""
|
||||
if hasattr(doc, 'content') and doc.content:
|
||||
content = doc.content
|
||||
elif hasattr(doc, 'data') and doc.data:
|
||||
content = doc.data
|
||||
if hasattr(doc, 'fileId') and doc.fileId:
|
||||
# ChatDocument is just a reference, so we need to get file data using fileId
|
||||
try:
|
||||
file_data = self.service.getFileData(doc.fileId)
|
||||
if file_data:
|
||||
if isinstance(file_data, bytes):
|
||||
content = file_data.decode('utf-8')
|
||||
else:
|
||||
content = str(file_data)
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not retrieve content for document {doc.filename}: {str(e)}")
|
||||
|
||||
if content:
|
||||
html.append(f"<div style='white-space:pre-wrap; border:1px solid #ccc; padding:0.5em; margin-bottom:1em; background-color:#f9f9f9;'>{content}</div>")
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -8,6 +8,9 @@ from typing import Dict, Any, List, Optional
|
|||
from datetime import datetime, UTC
|
||||
import json
|
||||
import uuid
|
||||
import aiohttp
|
||||
import asyncio
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from modules.chat.methodBase import MethodBase, ActionResult, action
|
||||
|
||||
|
|
@ -25,25 +28,136 @@ class MethodSharepoint(MethodBase):
|
|||
"""Get Microsoft connection from connection reference"""
|
||||
try:
|
||||
userConnection = self.service.getUserConnectionFromConnectionReference(connectionReference)
|
||||
if not userConnection or userConnection.authority != "msft" or userConnection.status != "active":
|
||||
if not userConnection:
|
||||
logger.warning(f"No user connection found for reference: {connectionReference}")
|
||||
return None
|
||||
|
||||
if userConnection.authority.value != "msft":
|
||||
logger.warning(f"Connection {userConnection.id} is not Microsoft (authority: {userConnection.authority.value})")
|
||||
return None
|
||||
|
||||
# Check if connection is active or pending (pending means OAuth in progress)
|
||||
if userConnection.status.value not in ["active", "pending"]:
|
||||
logger.warning(f"Connection {userConnection.id} status is not active/pending: {userConnection.status.value}")
|
||||
return None
|
||||
|
||||
# Get the corresponding token for this user and authority
|
||||
token = self.service.interfaceApp.getToken(userConnection.authority)
|
||||
token = self.service.interfaceApp.getToken(userConnection.authority.value)
|
||||
if not token:
|
||||
logger.warning(f"No token found for user {userConnection.userId} and authority {userConnection.authority}")
|
||||
logger.warning(f"No token found for user {userConnection.userId} and authority {userConnection.authority.value}")
|
||||
return None
|
||||
|
||||
# Check if token is expired
|
||||
if hasattr(token, 'expiresAt') and token.expiresAt:
|
||||
import time
|
||||
current_time = time.time()
|
||||
if current_time > token.expiresAt:
|
||||
logger.warning(f"Token for connection {userConnection.id} is expired (expiresAt: {token.expiresAt}, current: {current_time})")
|
||||
return None
|
||||
|
||||
logger.info(f"Successfully retrieved Microsoft connection: {userConnection.id}, status: {userConnection.status.value}, externalId: {userConnection.externalId}")
|
||||
|
||||
return {
|
||||
"id": userConnection.id,
|
||||
"accessToken": token.tokenAccess,
|
||||
"refreshToken": token.tokenRefresh,
|
||||
"scopes": ["Sites.ReadWrite.All", "User.Read"] # Default Microsoft scopes
|
||||
"scopes": ["Sites.ReadWrite.All", "Files.ReadWrite.All", "User.Read"] # SharePoint scopes
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting Microsoft connection: {str(e)}")
|
||||
return None
|
||||
|
||||
def _parseSiteUrl(self, siteUrl: str) -> Dict[str, str]:
|
||||
"""Parse SharePoint site URL to extract hostname and site path"""
|
||||
try:
|
||||
parsed = urlparse(siteUrl)
|
||||
hostname = parsed.hostname
|
||||
path = parsed.path.strip('/')
|
||||
|
||||
return {
|
||||
"hostname": hostname,
|
||||
"sitePath": path
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing site URL {siteUrl}: {str(e)}")
|
||||
return {"hostname": "", "sitePath": ""}
|
||||
|
||||
async def _makeGraphApiCall(self, access_token: str, endpoint: str, method: str = "GET", data: bytes = None) -> Dict[str, Any]:
|
||||
"""Make a Microsoft Graph API call with timeout and detailed logging"""
|
||||
try:
|
||||
headers = {
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"Content-Type": "application/json" if data and method != "PUT" else "application/octet-stream" if data else "application/json"
|
||||
}
|
||||
|
||||
url = f"https://graph.microsoft.com/v1.0/{endpoint}"
|
||||
logger.info(f"Making Graph API call: {method} {url}")
|
||||
|
||||
# Set timeout to 30 seconds
|
||||
timeout = aiohttp.ClientTimeout(total=30)
|
||||
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
if method == "GET":
|
||||
logger.debug(f"Starting GET request to {url}")
|
||||
async with session.get(url, headers=headers) as response:
|
||||
logger.info(f"Graph API response: {response.status}")
|
||||
if response.status == 200:
|
||||
result = await response.json()
|
||||
logger.debug(f"Graph API success: {len(str(result))} characters response")
|
||||
return result
|
||||
else:
|
||||
error_text = await response.text()
|
||||
logger.error(f"Graph API call failed: {response.status} - {error_text}")
|
||||
return {"error": f"API call failed: {response.status} - {error_text}"}
|
||||
|
||||
elif method == "PUT":
|
||||
logger.debug(f"Starting PUT request to {url}")
|
||||
async with session.put(url, headers=headers, data=data) as response:
|
||||
logger.info(f"Graph API response: {response.status}")
|
||||
if response.status in [200, 201]:
|
||||
result = await response.json()
|
||||
logger.debug(f"Graph API success: {len(str(result))} characters response")
|
||||
return result
|
||||
else:
|
||||
error_text = await response.text()
|
||||
logger.error(f"Graph API call failed: {response.status} - {error_text}")
|
||||
return {"error": f"API call failed: {response.status} - {error_text}"}
|
||||
|
||||
elif method == "POST":
|
||||
logger.debug(f"Starting POST request to {url}")
|
||||
async with session.post(url, headers=headers, data=data) as response:
|
||||
logger.info(f"Graph API response: {response.status}")
|
||||
if response.status in [200, 201]:
|
||||
result = await response.json()
|
||||
logger.debug(f"Graph API success: {len(str(result))} characters response")
|
||||
return result
|
||||
else:
|
||||
error_text = await response.text()
|
||||
logger.error(f"Graph API call failed: {response.status} - {error_text}")
|
||||
return {"error": f"API call failed: {response.status} - {error_text}"}
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
logger.error(f"Graph API call timed out after 30 seconds: {endpoint}")
|
||||
return {"error": f"API call timed out after 30 seconds: {endpoint}"}
|
||||
except Exception as e:
|
||||
logger.error(f"Error making Graph API call: {str(e)}")
|
||||
return {"error": f"Error making Graph API call: {str(e)}"}
|
||||
|
||||
async def _getSiteId(self, access_token: str, hostname: str, site_path: str) -> str:
|
||||
"""Get SharePoint site ID from hostname and site path"""
|
||||
try:
|
||||
endpoint = f"sites/{hostname}:/{site_path}"
|
||||
result = await self._makeGraphApiCall(access_token, endpoint)
|
||||
|
||||
if "error" in result:
|
||||
logger.error(f"Error getting site ID: {result['error']}")
|
||||
return ""
|
||||
|
||||
return result.get("id", "")
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting site ID: {str(e)}")
|
||||
return ""
|
||||
|
||||
@action
|
||||
async def findDocumentPath(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
|
|
@ -78,37 +192,98 @@ class MethodSharepoint(MethodBase):
|
|||
error="No valid Microsoft connection found for the provided connection reference"
|
||||
)
|
||||
|
||||
find_prompt = f"""
|
||||
Simulate finding document paths in Microsoft SharePoint based on a query.
|
||||
# Parse site URL to get hostname and site path
|
||||
site_info = self._parseSiteUrl(siteUrl)
|
||||
if not site_info["hostname"] or not site_info["sitePath"]:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error=f"Invalid SharePoint site URL: {siteUrl}"
|
||||
)
|
||||
|
||||
Connection: {connection['id']}
|
||||
Site URL: {siteUrl}
|
||||
Query: {query}
|
||||
Search Scope: {searchScope}
|
||||
# Get site ID
|
||||
site_id = await self._getSiteId(connection["accessToken"], site_info["hostname"], site_info["sitePath"])
|
||||
if not site_id:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error="Failed to get SharePoint site ID"
|
||||
)
|
||||
|
||||
Please provide:
|
||||
1. Matching document paths and locations
|
||||
2. Relevance scores for each match
|
||||
3. Document metadata and properties
|
||||
4. Alternative search suggestions
|
||||
5. Search statistics and coverage
|
||||
"""
|
||||
|
||||
find_result = await self.service.interfaceAiCalls.callAiTextAdvanced(find_prompt)
|
||||
|
||||
result_data = {
|
||||
"connectionReference": connectionReference,
|
||||
"siteUrl": siteUrl,
|
||||
"query": query,
|
||||
"searchScope": searchScope,
|
||||
"findResult": find_result,
|
||||
"connection": {
|
||||
"id": connection["id"],
|
||||
"authority": "microsoft",
|
||||
"reference": connectionReference
|
||||
},
|
||||
"timestamp": datetime.now(UTC).isoformat()
|
||||
}
|
||||
try:
|
||||
# Use Microsoft Graph search API
|
||||
search_query = query.replace("'", "''") # Escape single quotes for OData
|
||||
endpoint = f"sites/{site_id}/drive/root/search(q='{search_query}')"
|
||||
|
||||
# Make the search API call
|
||||
search_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
|
||||
|
||||
if "error" in search_result:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error=f"Search failed: {search_result['error']}"
|
||||
)
|
||||
|
||||
# Process search results
|
||||
items = search_result.get("value", [])
|
||||
found_documents = []
|
||||
|
||||
for item in items:
|
||||
# Filter by search scope if specified
|
||||
if searchScope == "documents" and "folder" in item:
|
||||
continue
|
||||
elif searchScope == "pages" and "file" in item and not item["file"].get("mimeType", "").startswith("text/html"):
|
||||
continue
|
||||
|
||||
doc_info = {
|
||||
"id": item.get("id"),
|
||||
"name": item.get("name"),
|
||||
"path": item.get("parentReference", {}).get("path", "") + "/" + item.get("name", ""),
|
||||
"size": item.get("size", 0),
|
||||
"createdDateTime": item.get("createdDateTime"),
|
||||
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
|
||||
"webUrl": item.get("webUrl"),
|
||||
"type": "folder" if "folder" in item else "file"
|
||||
}
|
||||
|
||||
# Add file-specific information
|
||||
if "file" in item:
|
||||
doc_info.update({
|
||||
"mimeType": item["file"].get("mimeType"),
|
||||
"downloadUrl": item.get("@microsoft.graph.downloadUrl")
|
||||
})
|
||||
|
||||
# Add folder-specific information
|
||||
if "folder" in item:
|
||||
doc_info.update({
|
||||
"childCount": item["folder"].get("childCount", 0)
|
||||
})
|
||||
|
||||
found_documents.append(doc_info)
|
||||
|
||||
result_data = {
|
||||
"connectionReference": connectionReference,
|
||||
"siteUrl": siteUrl,
|
||||
"query": query,
|
||||
"searchScope": searchScope,
|
||||
"totalResults": len(found_documents),
|
||||
"foundDocuments": found_documents,
|
||||
"connection": {
|
||||
"id": connection["id"],
|
||||
"authority": "microsoft",
|
||||
"reference": connectionReference
|
||||
},
|
||||
"timestamp": datetime.now(UTC).isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error searching SharePoint: {str(e)}")
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
# Determine output format based on expected formats
|
||||
output_extension = ".json" # Default
|
||||
|
|
@ -172,8 +347,23 @@ class MethodSharepoint(MethodBase):
|
|||
error="Document list reference, connection reference, site URL, and document paths are required"
|
||||
)
|
||||
|
||||
# Get documents from reference
|
||||
# Get documents from reference - ensure documentList is a list, not a string
|
||||
if isinstance(documentList, str):
|
||||
documentList = [documentList] # Convert string to list
|
||||
chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
|
||||
|
||||
# For testing: if no chat documents found, create mock documents based on document paths
|
||||
if not chatDocuments and documentPaths:
|
||||
logger.info("No chat documents found, creating mock documents for testing based on document paths")
|
||||
chatDocuments = []
|
||||
for i, path in enumerate(documentPaths):
|
||||
mock_doc = type('MockChatDocument', (), {
|
||||
'fileId': f'mock_file_id_{i}',
|
||||
'filename': path.split('/')[-1] if '/' in path else path
|
||||
})()
|
||||
chatDocuments.append(mock_doc)
|
||||
logger.info(f"Created {len(chatDocuments)} mock documents for testing")
|
||||
|
||||
if not chatDocuments:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
|
|
@ -189,37 +379,112 @@ class MethodSharepoint(MethodBase):
|
|||
error="No valid Microsoft connection found for the provided connection reference"
|
||||
)
|
||||
|
||||
# Parse site URL to get hostname and site path
|
||||
site_info = self._parseSiteUrl(siteUrl)
|
||||
if not site_info["hostname"] or not site_info["sitePath"]:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error=f"Invalid SharePoint site URL: {siteUrl}"
|
||||
)
|
||||
|
||||
# Get site ID
|
||||
site_id = await self._getSiteId(connection["accessToken"], site_info["hostname"], site_info["sitePath"])
|
||||
if not site_id:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error="Failed to get SharePoint site ID"
|
||||
)
|
||||
|
||||
# Process each document path
|
||||
read_results = []
|
||||
|
||||
for i, documentPath in enumerate(documentPaths):
|
||||
if i < len(chatDocuments):
|
||||
chatDocument = chatDocuments[i]
|
||||
fileId = chatDocument.fileId
|
||||
try:
|
||||
# Check if documentPath is actually a file ID (starts with 016GRP6V)
|
||||
if documentPath.startswith('016GRP6V'):
|
||||
# Use file ID directly
|
||||
file_endpoint = f"sites/{site_id}/drive/items/{documentPath}"
|
||||
logger.info(f"Reading file by ID: {documentPath}")
|
||||
else:
|
||||
# First, find the file by its path
|
||||
path_clean = documentPath.lstrip('/')
|
||||
file_endpoint = f"sites/{site_id}/drive/root:/{path_clean}"
|
||||
logger.info(f"Reading file by path: {path_clean}")
|
||||
|
||||
sharepoint_prompt = f"""
|
||||
Simulate reading a document from Microsoft SharePoint.
|
||||
# Get file metadata
|
||||
file_info_result = await self._makeGraphApiCall(connection["accessToken"], file_endpoint)
|
||||
|
||||
Connection: {connection['id']}
|
||||
Site URL: {siteUrl}
|
||||
Document Path: {documentPath}
|
||||
Include Metadata: {includeMetadata}
|
||||
File ID: {fileId}
|
||||
if "error" in file_info_result:
|
||||
read_results.append({
|
||||
"documentPath": documentPath,
|
||||
"error": f"File not found: {file_info_result['error']}",
|
||||
"content": None
|
||||
})
|
||||
continue
|
||||
|
||||
Please provide:
|
||||
1. Document content and structure
|
||||
2. File metadata and properties
|
||||
3. SharePoint site information
|
||||
4. Document permissions and sharing
|
||||
5. Version history if available
|
||||
"""
|
||||
file_id = file_info_result.get("id")
|
||||
if not file_id:
|
||||
read_results.append({
|
||||
"documentPath": documentPath,
|
||||
"error": "Could not get file ID",
|
||||
"content": None
|
||||
})
|
||||
continue
|
||||
|
||||
document_data = await self.service.interfaceAiCalls.callAiTextAdvanced(sharepoint_prompt)
|
||||
# Build result with metadata
|
||||
result_item = {
|
||||
"documentPath": documentPath,
|
||||
"fileId": file_id,
|
||||
"fileName": file_info_result.get("name"),
|
||||
"size": file_info_result.get("size", 0),
|
||||
"createdDateTime": file_info_result.get("createdDateTime"),
|
||||
"lastModifiedDateTime": file_info_result.get("lastModifiedDateTime"),
|
||||
"webUrl": file_info_result.get("webUrl")
|
||||
}
|
||||
|
||||
# Add metadata if requested
|
||||
if includeMetadata:
|
||||
result_item["metadata"] = {
|
||||
"mimeType": file_info_result.get("file", {}).get("mimeType"),
|
||||
"downloadUrl": file_info_result.get("@microsoft.graph.downloadUrl"),
|
||||
"createdBy": file_info_result.get("createdBy", {}),
|
||||
"lastModifiedBy": file_info_result.get("lastModifiedBy", {}),
|
||||
"parentReference": file_info_result.get("parentReference", {})
|
||||
}
|
||||
|
||||
# Get file content if it's a readable format
|
||||
mime_type = file_info_result.get("file", {}).get("mimeType", "")
|
||||
if mime_type.startswith("text/") or mime_type in [
|
||||
"application/json", "application/xml", "application/javascript"
|
||||
]:
|
||||
# Download the file content
|
||||
content_endpoint = f"sites/{site_id}/drive/items/{file_id}/content"
|
||||
|
||||
# For content download, we need to handle binary data
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
headers = {"Authorization": f"Bearer {connection['accessToken']}"}
|
||||
async with session.get(f"https://graph.microsoft.com/v1.0/{content_endpoint}", headers=headers) as response:
|
||||
if response.status == 200:
|
||||
content = await response.text()
|
||||
result_item["content"] = content
|
||||
else:
|
||||
result_item["content"] = f"Could not download content: HTTP {response.status}"
|
||||
except Exception as e:
|
||||
result_item["content"] = f"Error downloading content: {str(e)}"
|
||||
else:
|
||||
result_item["content"] = f"Binary file type ({mime_type}) - content not retrieved"
|
||||
|
||||
read_results.append(result_item)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading document {documentPath}: {str(e)}")
|
||||
read_results.append({
|
||||
"documentPath": documentPath,
|
||||
"fileId": fileId,
|
||||
"documentContent": document_data
|
||||
"error": str(e),
|
||||
"content": None
|
||||
})
|
||||
|
||||
result_data = {
|
||||
|
|
@ -306,7 +571,9 @@ class MethodSharepoint(MethodBase):
|
|||
error="No valid Microsoft connection found for the provided connection reference"
|
||||
)
|
||||
|
||||
# Get documents from reference
|
||||
# Get documents from reference - ensure documentList is a list, not a string
|
||||
if isinstance(documentList, str):
|
||||
documentList = [documentList] # Convert string to list
|
||||
chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList)
|
||||
if not chatDocuments:
|
||||
return self._createResult(
|
||||
|
|
@ -315,46 +582,107 @@ class MethodSharepoint(MethodBase):
|
|||
error="No documents found for the provided reference"
|
||||
)
|
||||
|
||||
# Parse site URL to get hostname and site path
|
||||
site_info = self._parseSiteUrl(siteUrl)
|
||||
if not site_info["hostname"] or not site_info["sitePath"]:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error=f"Invalid SharePoint site URL: {siteUrl}"
|
||||
)
|
||||
|
||||
# Get site ID
|
||||
site_id = await self._getSiteId(connection["accessToken"], site_info["hostname"], site_info["sitePath"])
|
||||
if not site_id:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error="Failed to get SharePoint site ID"
|
||||
)
|
||||
|
||||
# Process each document upload
|
||||
upload_results = []
|
||||
|
||||
for i, (documentPath, fileName) in enumerate(zip(documentPaths, fileNames)):
|
||||
if i < len(chatDocuments):
|
||||
chatDocument = chatDocuments[i]
|
||||
fileId = chatDocument.fileId
|
||||
file_data = self.service.getFileData(fileId)
|
||||
|
||||
if not file_data:
|
||||
logger.warning(f"File data not found for fileId: {fileId}")
|
||||
continue
|
||||
|
||||
# Create SharePoint upload prompt
|
||||
upload_prompt = f"""
|
||||
Simulate uploading a document to Microsoft SharePoint.
|
||||
|
||||
Connection: {connection['id']}
|
||||
Site URL: {siteUrl}
|
||||
Document Path: {documentPath}
|
||||
File Name: {fileName}
|
||||
File ID: {fileId}
|
||||
File Size: {len(file_data)} bytes
|
||||
|
||||
Please provide:
|
||||
1. Upload confirmation and status
|
||||
2. File metadata and properties
|
||||
3. SharePoint site integration details
|
||||
4. Permission and sharing settings
|
||||
5. Version control information
|
||||
"""
|
||||
|
||||
# Use AI to simulate SharePoint upload
|
||||
upload_result = await self.service.interfaceAiCalls.callAiTextAdvanced(upload_prompt)
|
||||
|
||||
try:
|
||||
if i < len(chatDocuments):
|
||||
chatDocument = chatDocuments[i]
|
||||
fileId = chatDocument.fileId
|
||||
file_data = self.service.getFileData(fileId)
|
||||
|
||||
if not file_data:
|
||||
logger.warning(f"File data not found for fileId: {fileId}")
|
||||
upload_results.append({
|
||||
"documentPath": documentPath,
|
||||
"fileName": fileName,
|
||||
"fileId": fileId,
|
||||
"error": "File data not found",
|
||||
"uploadStatus": "failed"
|
||||
})
|
||||
continue
|
||||
|
||||
# Prepare upload path
|
||||
upload_path = documentPath.rstrip('/') + '/' + fileName
|
||||
upload_path_clean = upload_path.lstrip('/')
|
||||
|
||||
# Upload endpoint for small files (< 4MB)
|
||||
if len(file_data) < 4 * 1024 * 1024: # 4MB
|
||||
upload_endpoint = f"sites/{site_id}/drive/root:/{upload_path_clean}:/content"
|
||||
|
||||
# Upload the file
|
||||
upload_result = await self._makeGraphApiCall(
|
||||
connection["accessToken"],
|
||||
upload_endpoint,
|
||||
method="PUT",
|
||||
data=file_data
|
||||
)
|
||||
|
||||
if "error" in upload_result:
|
||||
upload_results.append({
|
||||
"documentPath": documentPath,
|
||||
"fileName": fileName,
|
||||
"fileId": fileId,
|
||||
"error": upload_result["error"],
|
||||
"uploadStatus": "failed"
|
||||
})
|
||||
else:
|
||||
upload_results.append({
|
||||
"documentPath": documentPath,
|
||||
"fileName": fileName,
|
||||
"fileId": fileId,
|
||||
"uploadStatus": "success",
|
||||
"sharepointFileId": upload_result.get("id"),
|
||||
"webUrl": upload_result.get("webUrl"),
|
||||
"size": upload_result.get("size"),
|
||||
"createdDateTime": upload_result.get("createdDateTime")
|
||||
})
|
||||
else:
|
||||
# For large files, we would need to implement resumable upload
|
||||
# For now, return an error for large files
|
||||
upload_results.append({
|
||||
"documentPath": documentPath,
|
||||
"fileName": fileName,
|
||||
"fileId": fileId,
|
||||
"error": f"File too large ({len(file_data)} bytes). Files larger than 4MB require resumable upload (not implemented).",
|
||||
"uploadStatus": "failed"
|
||||
})
|
||||
else:
|
||||
upload_results.append({
|
||||
"documentPath": documentPath,
|
||||
"fileName": fileName,
|
||||
"fileId": None,
|
||||
"error": "No corresponding chat document found",
|
||||
"uploadStatus": "failed"
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading document {fileName}: {str(e)}")
|
||||
upload_results.append({
|
||||
"documentPath": documentPath,
|
||||
"fileName": fileName,
|
||||
"fileId": fileId,
|
||||
"uploadResult": upload_result
|
||||
"fileId": fileId if i < len(chatDocuments) else None,
|
||||
"error": str(e),
|
||||
"uploadStatus": "failed"
|
||||
})
|
||||
|
||||
# Create result data
|
||||
|
|
@ -423,7 +751,7 @@ class MethodSharepoint(MethodBase):
|
|||
connectionReference = parameters.get("connectionReference")
|
||||
siteUrl = parameters.get("siteUrl")
|
||||
folderPaths = parameters.get("folderPaths")
|
||||
includeSubfolders = parameters.get("includeSubfolders", False)
|
||||
includeSubfolders = parameters.get("includeSubfolders", False) # Default to False for better UX
|
||||
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||
|
||||
if not connectionReference or not siteUrl or not folderPaths:
|
||||
|
|
@ -442,34 +770,148 @@ class MethodSharepoint(MethodBase):
|
|||
error="No valid Microsoft connection found for the provided connection reference"
|
||||
)
|
||||
|
||||
logger.info(f"Starting SharePoint listDocuments for site: {siteUrl}")
|
||||
logger.debug(f"Connection ID: {connection['id']}")
|
||||
logger.debug(f"Folder paths: {folderPaths}")
|
||||
|
||||
# Parse site URL to get hostname and site path
|
||||
site_info = self._parseSiteUrl(siteUrl)
|
||||
logger.info(f"Parsed site info - hostname: {site_info['hostname']}, sitePath: {site_info['sitePath']}")
|
||||
|
||||
if not site_info["hostname"] or not site_info["sitePath"]:
|
||||
logger.error(f"Failed to parse site URL: {siteUrl}")
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error=f"Invalid SharePoint site URL: {siteUrl}"
|
||||
)
|
||||
|
||||
# Get site ID
|
||||
logger.info(f"Getting site ID for hostname: {site_info['hostname']}, path: {site_info['sitePath']}")
|
||||
site_id = await self._getSiteId(connection["accessToken"], site_info["hostname"], site_info["sitePath"])
|
||||
logger.info(f"Site ID result: {site_id}")
|
||||
|
||||
if not site_id:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error="Failed to get SharePoint site ID"
|
||||
)
|
||||
|
||||
# Process each folder path
|
||||
list_results = []
|
||||
|
||||
for folderPath in folderPaths:
|
||||
# Create SharePoint listing prompt
|
||||
list_prompt = f"""
|
||||
Simulate listing documents in Microsoft SharePoint folder.
|
||||
|
||||
Connection: {connection['id']}
|
||||
Site URL: {siteUrl}
|
||||
Folder Path: {folderPath}
|
||||
Include Subfolders: {includeSubfolders}
|
||||
|
||||
Please provide:
|
||||
1. List of documents and folders
|
||||
2. File metadata and properties
|
||||
3. Folder structure and hierarchy
|
||||
4. Permission and sharing information
|
||||
5. Document statistics and summary
|
||||
"""
|
||||
|
||||
# Use AI to simulate SharePoint listing
|
||||
list_result = await self.service.interfaceAiCalls.callAiTextAdvanced(list_prompt)
|
||||
|
||||
list_results.append({
|
||||
"folderPath": folderPath,
|
||||
"listResult": list_result
|
||||
})
|
||||
try:
|
||||
# Determine the endpoint based on folder path
|
||||
if folderPath in ["/", ""]:
|
||||
# Root folder
|
||||
endpoint = f"sites/{site_id}/drive/root/children"
|
||||
else:
|
||||
# Specific folder - remove leading slash if present
|
||||
folder_path_clean = folderPath.lstrip('/')
|
||||
endpoint = f"sites/{site_id}/drive/root:/{folder_path_clean}:/children"
|
||||
|
||||
# Make the API call to list folder contents
|
||||
api_result = await self._makeGraphApiCall(connection["accessToken"], endpoint)
|
||||
|
||||
if "error" in api_result:
|
||||
list_results.append({
|
||||
"folderPath": folderPath,
|
||||
"error": api_result["error"],
|
||||
"items": []
|
||||
})
|
||||
continue
|
||||
|
||||
# Process the results
|
||||
items = api_result.get("value", [])
|
||||
processed_items = []
|
||||
|
||||
for item in items:
|
||||
item_info = {
|
||||
"id": item.get("id"),
|
||||
"name": item.get("name"),
|
||||
"size": item.get("size", 0),
|
||||
"createdDateTime": item.get("createdDateTime"),
|
||||
"lastModifiedDateTime": item.get("lastModifiedDateTime"),
|
||||
"webUrl": item.get("webUrl"),
|
||||
"type": "folder" if "folder" in item else "file"
|
||||
}
|
||||
|
||||
# Add file-specific information
|
||||
if "file" in item:
|
||||
item_info.update({
|
||||
"mimeType": item["file"].get("mimeType"),
|
||||
"downloadUrl": item.get("@microsoft.graph.downloadUrl")
|
||||
})
|
||||
|
||||
# Add folder-specific information
|
||||
if "folder" in item:
|
||||
item_info.update({
|
||||
"childCount": item["folder"].get("childCount", 0)
|
||||
})
|
||||
|
||||
processed_items.append(item_info)
|
||||
|
||||
# If include subfolders is enabled, get ONLY direct subfolder contents (1 level deep only)
|
||||
if includeSubfolders:
|
||||
logger.info(f"Including subfolders - processing {len([item for item in processed_items if item['type'] == 'folder'])} folders")
|
||||
subfolder_count = 0
|
||||
max_subfolders = 10 # Limit to prevent infinite loops
|
||||
|
||||
for item in processed_items[:]: # Use slice to avoid modifying list during iteration
|
||||
if item["type"] == "folder" and subfolder_count < max_subfolders:
|
||||
subfolder_count += 1
|
||||
subfolder_path = f"{folderPath.rstrip('/')}/{item['name']}"
|
||||
subfolder_endpoint = f"sites/{site_id}/drive/items/{item['id']}/children"
|
||||
|
||||
logger.debug(f"Getting contents of subfolder: {item['name']}")
|
||||
subfolder_result = await self._makeGraphApiCall(connection["accessToken"], subfolder_endpoint)
|
||||
if "error" not in subfolder_result:
|
||||
subfolder_items = subfolder_result.get("value", [])
|
||||
logger.debug(f"Found {len(subfolder_items)} items in subfolder {item['name']}")
|
||||
|
||||
for subfolder_item in subfolder_items:
|
||||
# Only add files and direct subfolders, NO RECURSION
|
||||
subfolder_item_info = {
|
||||
"id": subfolder_item.get("id"),
|
||||
"name": subfolder_item.get("name"),
|
||||
"size": subfolder_item.get("size", 0),
|
||||
"createdDateTime": subfolder_item.get("createdDateTime"),
|
||||
"lastModifiedDateTime": subfolder_item.get("lastModifiedDateTime"),
|
||||
"webUrl": subfolder_item.get("webUrl"),
|
||||
"type": "folder" if "folder" in subfolder_item else "file",
|
||||
"parentPath": subfolder_path
|
||||
}
|
||||
|
||||
if "file" in subfolder_item:
|
||||
subfolder_item_info.update({
|
||||
"mimeType": subfolder_item["file"].get("mimeType"),
|
||||
"downloadUrl": subfolder_item.get("@microsoft.graph.downloadUrl")
|
||||
})
|
||||
|
||||
processed_items.append(subfolder_item_info)
|
||||
else:
|
||||
logger.warning(f"Failed to get contents of subfolder {item['name']}: {subfolder_result.get('error')}")
|
||||
elif subfolder_count >= max_subfolders:
|
||||
logger.warning(f"Reached maximum subfolder limit ({max_subfolders}), skipping remaining folders")
|
||||
break
|
||||
|
||||
logger.info(f"Processed {subfolder_count} subfolders, total items: {len(processed_items)}")
|
||||
|
||||
list_results.append({
|
||||
"folderPath": folderPath,
|
||||
"itemCount": len(processed_items),
|
||||
"items": processed_items
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing folder {folderPath}: {str(e)}")
|
||||
list_results.append({
|
||||
"folderPath": folderPath,
|
||||
"error": str(e),
|
||||
"items": []
|
||||
})
|
||||
|
||||
# Create result data
|
||||
result_data = {
|
||||
|
|
|
|||
|
|
@ -474,46 +474,107 @@ class MethodWeb(MethodBase):
|
|||
return approaches
|
||||
|
||||
@action
|
||||
def search(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
async def search(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Perform a web search and output a .txt file with a plain list of URLs (one per line).
|
||||
|
||||
Parameters:
|
||||
query (str): Search query to perform
|
||||
maxResults (int, optional): Maximum number of results (default: 10)
|
||||
filter (str, optional): Filter criteria for search results
|
||||
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||
"""
|
||||
query = parameters.get("query")
|
||||
max_results = parameters.get("maxResults", 10)
|
||||
filter_param = parameters.get("filter")
|
||||
if not query:
|
||||
return ActionResult.failure("Search query is required")
|
||||
if not self.srcApikey:
|
||||
return ActionResult.failure("SerpAPI key not configured")
|
||||
userLanguage = "en"
|
||||
if hasattr(self.service, 'user') and hasattr(self.service.user, 'language'):
|
||||
userLanguage = self.service.user.language
|
||||
params = {
|
||||
"engine": self.srcEngine,
|
||||
"q": query,
|
||||
"api_key": self.srcApikey,
|
||||
"num": min(max_results, self.maxResults),
|
||||
"hl": userLanguage
|
||||
}
|
||||
if filter_param:
|
||||
params["filter"] = filter_param
|
||||
try:
|
||||
query = parameters.get("query")
|
||||
max_results = parameters.get("maxResults", 10)
|
||||
filter_param = parameters.get("filter")
|
||||
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||
|
||||
if not query:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error="Search query is required"
|
||||
)
|
||||
|
||||
if not self.srcApikey:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error="SerpAPI key not configured"
|
||||
)
|
||||
|
||||
userLanguage = "en"
|
||||
if hasattr(self.service, 'user') and hasattr(self.service.user, 'language'):
|
||||
userLanguage = self.service.user.language
|
||||
|
||||
params = {
|
||||
"engine": self.srcEngine,
|
||||
"q": query,
|
||||
"api_key": self.srcApikey,
|
||||
"num": min(max_results, self.maxResults),
|
||||
"hl": userLanguage
|
||||
}
|
||||
|
||||
if filter_param:
|
||||
params["filter"] = filter_param
|
||||
|
||||
response = requests.get("https://serpapi.com/search", params=params, timeout=self.timeout)
|
||||
response.raise_for_status()
|
||||
search_results = response.json()
|
||||
results = []
|
||||
|
||||
if "organic_results" in search_results:
|
||||
results = search_results["organic_results"][:max_results]
|
||||
|
||||
# Assume 'results' is a list of dicts with 'url' keys
|
||||
urls = [item['url'] for item in results if 'url' in item and isinstance(item['url'], str)]
|
||||
url_list_str = "\n".join(urls)
|
||||
filename = f"web_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.txt"
|
||||
with open(filename, "w", encoding="utf-8") as f:
|
||||
f.write(url_list_str)
|
||||
return ActionResult.success(documents=[filename], resultLabel=parameters.get("resultLabel"))
|
||||
|
||||
# Determine output format based on expected formats
|
||||
output_extension = ".txt" # Default
|
||||
output_mime_type = "text/plain" # Default
|
||||
|
||||
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||
# Use the first expected format
|
||||
expected_format = expectedDocumentFormats[0]
|
||||
output_extension = expected_format.get("extension", ".txt")
|
||||
output_mime_type = expected_format.get("mimeType", "text/plain")
|
||||
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||
else:
|
||||
logger.info("No expected format specified, using default .txt format")
|
||||
|
||||
# Create result data
|
||||
result_data = {
|
||||
"query": query,
|
||||
"maxResults": max_results,
|
||||
"filter": filter_param,
|
||||
"totalResults": len(urls),
|
||||
"urls": urls,
|
||||
"urlList": url_list_str,
|
||||
"timestamp": datetime.now(UTC).isoformat()
|
||||
}
|
||||
|
||||
return self._createResult(
|
||||
success=True,
|
||||
data={
|
||||
"documents": [
|
||||
{
|
||||
"documentName": f"web_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
|
||||
"documentData": result_data,
|
||||
"mimeType": output_mime_type
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error searching web: {str(e)}")
|
||||
return ActionResult.failure(error=str(e))
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
def _selenium_extract_content(self, url: str) -> Optional[str]:
|
||||
"""Use Selenium to fetch and extract main content from a JS-heavy page."""
|
||||
|
|
@ -540,70 +601,126 @@ class MethodWeb(MethodBase):
|
|||
return None
|
||||
|
||||
@action
|
||||
def crawl(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
async def crawl(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""
|
||||
Crawl a list of URLs provided in a document (.txt) with URLs separated by newline, comma, or semicolon.
|
||||
|
||||
Parameters:
|
||||
document (str): Document containing URL list
|
||||
expectedDocumentFormats (list, optional): Expected document formats with extension, mimeType, description
|
||||
"""
|
||||
document = parameters.get("document")
|
||||
if not document:
|
||||
return ActionResult.failure("No document with URL list provided.")
|
||||
# Read the document content
|
||||
with open(document, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
# Split URLs by newline, comma, or semicolon
|
||||
import re
|
||||
urls = re.split(r'[\n,;]+', content)
|
||||
urls = [u.strip() for u in urls if u.strip()]
|
||||
if not urls:
|
||||
return ActionResult.failure("No valid URLs provided in the document.")
|
||||
crawl_results = []
|
||||
for url in urls:
|
||||
try:
|
||||
logger.info(f"Crawling URL: {url}")
|
||||
# Try Selenium first
|
||||
content = self._selenium_extract_content(url)
|
||||
if not content:
|
||||
# Fallback to requests/BeautifulSoup
|
||||
soup = self._readUrl(url)
|
||||
content = self._extractMainContent(soup)
|
||||
title = self._extractTitle(BeautifulSoup(content, 'html.parser'), url) if content else "No title"
|
||||
meta_info = {"url": url, "title": title}
|
||||
content_length = len(content) if content else 0
|
||||
crawl_results.append({
|
||||
"url": url,
|
||||
"title": title,
|
||||
"content": content,
|
||||
"content_length": content_length,
|
||||
"meta_info": meta_info,
|
||||
"timestamp": datetime.now(UTC).isoformat()
|
||||
})
|
||||
logger.info(f"Successfully crawled {url} - extracted {content_length} characters")
|
||||
except Exception as e:
|
||||
logger.error(f"Error crawling web page {url}: {str(e)}")
|
||||
crawl_results.append({
|
||||
"error": str(e),
|
||||
"url": url,
|
||||
"suggestions": [
|
||||
"Check if the URL is accessible",
|
||||
"Try with a different user agent",
|
||||
"Verify the site doesn't block automated access"
|
||||
try:
|
||||
document = parameters.get("document")
|
||||
expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
|
||||
|
||||
if not document:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error="No document with URL list provided."
|
||||
)
|
||||
|
||||
# Read the document content
|
||||
with open(document, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
# Split URLs by newline, comma, or semicolon
|
||||
import re
|
||||
urls = re.split(r'[\n,;]+', content)
|
||||
urls = [u.strip() for u in urls if u.strip()]
|
||||
|
||||
if not urls:
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error="No valid URLs provided in the document."
|
||||
)
|
||||
|
||||
crawl_results = []
|
||||
for url in urls:
|
||||
try:
|
||||
logger.info(f"Crawling URL: {url}")
|
||||
# Try Selenium first
|
||||
content = self._selenium_extract_content(url)
|
||||
if not content:
|
||||
# Fallback to requests/BeautifulSoup
|
||||
soup = self._readUrl(url)
|
||||
content = self._extractMainContent(soup)
|
||||
|
||||
title = self._extractTitle(BeautifulSoup(content, 'html.parser'), url) if content else "No title"
|
||||
meta_info = {"url": url, "title": title}
|
||||
content_length = len(content) if content else 0
|
||||
|
||||
crawl_results.append({
|
||||
"url": url,
|
||||
"title": title,
|
||||
"content": content,
|
||||
"content_length": content_length,
|
||||
"meta_info": meta_info,
|
||||
"timestamp": datetime.now(UTC).isoformat()
|
||||
})
|
||||
logger.info(f"Successfully crawled {url} - extracted {content_length} characters")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error crawling web page {url}: {str(e)}")
|
||||
crawl_results.append({
|
||||
"error": str(e),
|
||||
"url": url,
|
||||
"suggestions": [
|
||||
"Check if the URL is accessible",
|
||||
"Try with a different user agent",
|
||||
"Verify the site doesn't block automated access"
|
||||
]
|
||||
})
|
||||
|
||||
# Determine output format based on expected formats
|
||||
output_extension = ".json" # Default
|
||||
output_mime_type = "application/json" # Default
|
||||
|
||||
if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
|
||||
# Use the first expected format
|
||||
expected_format = expectedDocumentFormats[0]
|
||||
output_extension = expected_format.get("extension", ".json")
|
||||
output_mime_type = expected_format.get("mimeType", "application/json")
|
||||
logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
|
||||
else:
|
||||
logger.info("No expected format specified, using default .json format")
|
||||
|
||||
result_data = {
|
||||
"urls": urls,
|
||||
"maxDepth": 1, # Simplified crawl
|
||||
"includeImages": False,
|
||||
"followLinks": True,
|
||||
"crawlResults": crawl_results,
|
||||
"summary": {
|
||||
"total_urls": len(urls),
|
||||
"successful_crawls": len([r for r in crawl_results if "error" not in r]),
|
||||
"failed_crawls": len([r for r in crawl_results if "error" in r]),
|
||||
"total_content_chars": sum([r.get("content_length", 0) for r in crawl_results if "content_length" in r])
|
||||
},
|
||||
"timestamp": datetime.now(UTC).isoformat()
|
||||
}
|
||||
|
||||
return self._createResult(
|
||||
success=True,
|
||||
data={
|
||||
"documents": [
|
||||
{
|
||||
"documentName": f"web_crawl_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}{output_extension}",
|
||||
"documentData": result_data,
|
||||
"mimeType": output_mime_type
|
||||
}
|
||||
]
|
||||
})
|
||||
result_data = {
|
||||
"urls": urls,
|
||||
"maxDepth": 1, # Simplified crawl
|
||||
"includeImages": False,
|
||||
"followLinks": True,
|
||||
"crawlResults": crawl_results,
|
||||
"summary": {
|
||||
"total_urls": len(urls),
|
||||
"successful_crawls": len([r for r in crawl_results if "error" not in r]),
|
||||
"failed_crawls": len([r for r in crawl_results if "error" in r]),
|
||||
"total_content_chars": sum([r.get("content_length", 0) for r in crawl_results if "content_length" in r])
|
||||
},
|
||||
"timestamp": datetime.now(UTC).isoformat()
|
||||
}
|
||||
return ActionResult.success(result=result_data, resultLabel=parameters.get("resultLabel"))
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error crawling web pages: {str(e)}")
|
||||
return self._createResult(
|
||||
success=False,
|
||||
data={},
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
@action
|
||||
async def scrape(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
|
|
|
|||
|
|
@ -34,8 +34,7 @@ async def get_connections(
|
|||
interface = getInterface(currentUser)
|
||||
|
||||
# Clear connections cache to ensure fresh data
|
||||
if "connections" in interface.db._tablesCache:
|
||||
del interface.db._tablesCache["connections"]
|
||||
interface.db.clearTableCache("connections")
|
||||
|
||||
if currentUser.privilege in ['admin', 'sysadmin']:
|
||||
# Admins can see all connections
|
||||
|
|
@ -61,7 +60,7 @@ async def create_connection(
|
|||
connection_data: Dict[str, Any] = Body(...),
|
||||
currentUser: User = Depends(getCurrentUser)
|
||||
) -> UserConnection:
|
||||
"""Create a new connection for the current user or update existing one"""
|
||||
"""Create a new connection for the current user"""
|
||||
try:
|
||||
interface = getInterface(currentUser)
|
||||
|
||||
|
|
@ -86,57 +85,31 @@ async def create_connection(
|
|||
detail="User not found"
|
||||
)
|
||||
|
||||
# Check for existing connection of the same authority
|
||||
existing_connection = None
|
||||
connections = interface.getUserConnections(currentUser.id)
|
||||
for conn in connections:
|
||||
if conn.authority == authority:
|
||||
existing_connection = conn
|
||||
break
|
||||
# Always create a new connection with PENDING status
|
||||
connection = interface.addUserConnection(
|
||||
userId=currentUser.id,
|
||||
authority=authority,
|
||||
externalId="", # Will be set after OAuth
|
||||
externalUsername="", # Will be set after OAuth
|
||||
status=ConnectionStatus.PENDING # Start with PENDING status
|
||||
)
|
||||
|
||||
if existing_connection:
|
||||
# Update existing connection
|
||||
existing_connection.status = ConnectionStatus.PENDING
|
||||
existing_connection.lastChecked = datetime.now()
|
||||
existing_connection.externalId = "" # Reset for new OAuth flow
|
||||
existing_connection.externalUsername = "" # Reset for new OAuth flow
|
||||
|
||||
# Convert connection to dict and ensure datetime fields are serialized
|
||||
connection_dict = existing_connection.to_dict()
|
||||
for field in ['connectedAt', 'lastChecked', 'expiresAt']:
|
||||
if field in connection_dict and connection_dict[field] is not None:
|
||||
if isinstance(connection_dict[field], datetime):
|
||||
connection_dict[field] = connection_dict[field].isoformat()
|
||||
elif isinstance(connection_dict[field], (int, float)):
|
||||
connection_dict[field] = datetime.fromtimestamp(connection_dict[field]).isoformat()
|
||||
|
||||
# Update connection record directly
|
||||
interface.db.recordModify("connections", existing_connection.id, connection_dict)
|
||||
|
||||
return existing_connection
|
||||
else:
|
||||
# Create new connection with PENDING status
|
||||
connection = interface.addUserConnection(
|
||||
userId=currentUser.id,
|
||||
authority=authority,
|
||||
externalId="", # Will be set after OAuth
|
||||
externalUsername="", # Will be set after OAuth
|
||||
status=ConnectionStatus.PENDING # Start with PENDING status
|
||||
)
|
||||
|
||||
# Convert connection to dict and ensure datetime fields are serialized
|
||||
connection_dict = connection.to_dict()
|
||||
for field in ['connectedAt', 'lastChecked', 'expiresAt']:
|
||||
if field in connection_dict and connection_dict[field] is not None:
|
||||
if isinstance(connection_dict[field], datetime):
|
||||
connection_dict[field] = connection_dict[field].isoformat()
|
||||
elif isinstance(connection_dict[field], (int, float)):
|
||||
connection_dict[field] = datetime.fromtimestamp(connection_dict[field]).isoformat()
|
||||
|
||||
# Save connection record
|
||||
interface.db.recordModify("connections", connection.id, connection_dict)
|
||||
|
||||
return connection
|
||||
# Convert connection to dict and ensure datetime fields are serialized
|
||||
connection_dict = connection.to_dict()
|
||||
for field in ['connectedAt', 'lastChecked', 'expiresAt']:
|
||||
if field in connection_dict and connection_dict[field] is not None:
|
||||
if isinstance(connection_dict[field], datetime):
|
||||
connection_dict[field] = connection_dict[field].isoformat()
|
||||
elif isinstance(connection_dict[field], (int, float)):
|
||||
connection_dict[field] = datetime.fromtimestamp(connection_dict[field]).isoformat()
|
||||
|
||||
# Save connection record
|
||||
interface.db.recordModify("connections", connection.id, connection_dict)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
interface.db.clearTableCache("connections")
|
||||
|
||||
return connection
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
|
|
@ -147,6 +120,80 @@ async def create_connection(
|
|||
detail=f"Failed to create connection: {str(e)}"
|
||||
)
|
||||
|
||||
@router.put("/{connectionId}", response_model=UserConnection)
|
||||
@limiter.limit("10/minute")
|
||||
async def update_connection(
|
||||
request: Request,
|
||||
connectionId: str = Path(..., description="The ID of the connection to update"),
|
||||
connection_data: Dict[str, Any] = Body(...),
|
||||
currentUser: User = Depends(getCurrentUser)
|
||||
) -> UserConnection:
|
||||
"""Update an existing connection"""
|
||||
try:
|
||||
interface = getInterface(currentUser)
|
||||
|
||||
# Find the connection
|
||||
connection = None
|
||||
if currentUser.privilege in ['admin', 'sysadmin']:
|
||||
# Admins can update any connection
|
||||
users = interface.getAllUsers()
|
||||
for user in users:
|
||||
connections = interface.getUserConnections(user.id)
|
||||
for conn in connections:
|
||||
if conn.id == connectionId:
|
||||
connection = conn
|
||||
break
|
||||
if connection:
|
||||
break
|
||||
else:
|
||||
# Regular users can only update their own connections
|
||||
connections = interface.getUserConnections(currentUser.id)
|
||||
for conn in connections:
|
||||
if conn.id == connectionId:
|
||||
connection = conn
|
||||
break
|
||||
|
||||
if not connection:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail="Connection not found"
|
||||
)
|
||||
|
||||
# Update connection fields
|
||||
for field, value in connection_data.items():
|
||||
if hasattr(connection, field):
|
||||
setattr(connection, field, value)
|
||||
|
||||
# Update lastChecked timestamp
|
||||
connection.lastChecked = datetime.now()
|
||||
|
||||
# Convert connection to dict and ensure datetime fields are serialized
|
||||
connection_dict = connection.to_dict()
|
||||
for field in ['connectedAt', 'lastChecked', 'expiresAt']:
|
||||
if field in connection_dict and connection_dict[field] is not None:
|
||||
if isinstance(connection_dict[field], datetime):
|
||||
connection_dict[field] = connection_dict[field].isoformat()
|
||||
elif isinstance(connection_dict[field], (int, float)):
|
||||
connection_dict[field] = datetime.fromtimestamp(connection_dict[field]).isoformat()
|
||||
|
||||
# Update connection
|
||||
interface.db.recordModify("connections", connectionId, connection_dict)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
interface.db.clearTableCache("connections")
|
||||
|
||||
# Get updated connection
|
||||
return connection
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating connection: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to update connection: {str(e)}"
|
||||
)
|
||||
|
||||
@router.post("/{connectionId}/connect")
|
||||
@limiter.limit("10/minute")
|
||||
async def connect_service(
|
||||
|
|
@ -265,6 +312,9 @@ async def disconnect_service(
|
|||
# Update connection record
|
||||
interface.db.recordModify("connections", connectionId, connection.to_dict())
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
interface.db.clearTableCache("connections")
|
||||
|
||||
return {"message": "Service disconnected successfully"}
|
||||
|
||||
except HTTPException:
|
||||
|
|
|
|||
|
|
@ -8,10 +8,8 @@ import logging
|
|||
import json
|
||||
from typing import Dict, Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
from google.oauth2.credentials import Credentials
|
||||
from google_auth_oauthlib.flow import Flow
|
||||
from google.auth.transport.requests import Request as GoogleRequest
|
||||
from googleapiclient.discovery import build
|
||||
from requests_oauthlib import OAuth2Session
|
||||
import httpx
|
||||
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
from modules.interfaces.interfaceAppObjects import getInterface, getRootInterface
|
||||
|
|
@ -42,9 +40,25 @@ REDIRECT_URI = APP_CONFIG.get("Service_GOOGLE_REDIRECT_URI")
|
|||
SCOPES = [
|
||||
"https://www.googleapis.com/auth/gmail.readonly",
|
||||
"https://www.googleapis.com/auth/userinfo.profile",
|
||||
"https://www.googleapis.com/auth/userinfo.email"
|
||||
"https://www.googleapis.com/auth/userinfo.email",
|
||||
"openid"
|
||||
]
|
||||
|
||||
@router.get("/config")
|
||||
async def get_config():
|
||||
"""Debug endpoint to check Google OAuth configuration"""
|
||||
return {
|
||||
"client_id": CLIENT_ID,
|
||||
"client_secret": "***" if CLIENT_SECRET else None,
|
||||
"redirect_uri": REDIRECT_URI,
|
||||
"scopes": SCOPES,
|
||||
"config_loaded": bool(CLIENT_ID and CLIENT_SECRET and REDIRECT_URI),
|
||||
"config_source": {
|
||||
"client_id_from": "config.ini" if CLIENT_ID and "354925410565" in CLIENT_ID else "env file",
|
||||
"redirect_uri_from": "config.ini" if REDIRECT_URI and "gateway-int.poweron-center.net" in REDIRECT_URI else "env file"
|
||||
}
|
||||
}
|
||||
|
||||
@router.get("/login")
|
||||
@limiter.limit("5/minute")
|
||||
async def login(
|
||||
|
|
@ -54,19 +68,30 @@ async def login(
|
|||
) -> RedirectResponse:
|
||||
"""Initiate Google login"""
|
||||
try:
|
||||
# Create OAuth flow
|
||||
flow = Flow.from_client_config(
|
||||
{
|
||||
"web": {
|
||||
"client_id": CLIENT_ID,
|
||||
"client_secret": CLIENT_SECRET,
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"redirect_uris": [REDIRECT_URI]
|
||||
}
|
||||
},
|
||||
scopes=SCOPES
|
||||
)
|
||||
# Debug: Log configuration values
|
||||
logger.info(f"Google OAuth Configuration - CLIENT_ID: {CLIENT_ID}, REDIRECT_URI: {REDIRECT_URI}")
|
||||
|
||||
# Validate required configuration
|
||||
if not CLIENT_ID:
|
||||
logger.error("Google OAuth CLIENT_ID is not configured")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Google OAuth CLIENT_ID is not configured"
|
||||
)
|
||||
|
||||
if not CLIENT_SECRET:
|
||||
logger.error("Google OAuth CLIENT_SECRET is not configured")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Google OAuth CLIENT_SECRET is not configured"
|
||||
)
|
||||
|
||||
if not REDIRECT_URI:
|
||||
logger.error("Google OAuth REDIRECT_URI is not configured")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Google OAuth REDIRECT_URI is not configured"
|
||||
)
|
||||
|
||||
# Generate auth URL with state - use state as is if it's already JSON, otherwise create new state
|
||||
try:
|
||||
|
|
@ -80,14 +105,25 @@ async def login(
|
|||
"connectionId": connectionId
|
||||
})
|
||||
|
||||
# Generate auth URL with state
|
||||
auth_url, _ = flow.authorization_url(
|
||||
logger.info(f"Using state parameter: {state_param}")
|
||||
|
||||
# Use OAuth2Session directly - it works reliably
|
||||
oauth = OAuth2Session(
|
||||
client_id=CLIENT_ID,
|
||||
redirect_uri=REDIRECT_URI,
|
||||
scope=SCOPES
|
||||
)
|
||||
|
||||
auth_url, state = oauth.authorization_url(
|
||||
"https://accounts.google.com/o/oauth2/auth",
|
||||
access_type="offline",
|
||||
include_granted_scopes="true",
|
||||
state=state_param,
|
||||
prompt="select_account" # Force account selection screen
|
||||
prompt="select_account"
|
||||
)
|
||||
|
||||
logger.info(f"Generated Google OAuth URL using OAuth2Session: {auth_url}")
|
||||
|
||||
return RedirectResponse(auth_url)
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -109,27 +145,54 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
|
||||
logger.info(f"Processing Google auth callback: state_type={state_type}, connection_id={connection_id}, user_id={user_id}")
|
||||
|
||||
# Create OAuth flow
|
||||
flow = Flow.from_client_config(
|
||||
{
|
||||
"web": {
|
||||
"client_id": CLIENT_ID,
|
||||
"client_secret": CLIENT_SECRET,
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"redirect_uris": [REDIRECT_URI]
|
||||
}
|
||||
},
|
||||
scopes=SCOPES
|
||||
# Use OAuth2Session directly for token exchange
|
||||
oauth = OAuth2Session(
|
||||
client_id=CLIENT_ID,
|
||||
redirect_uri=REDIRECT_URI
|
||||
)
|
||||
|
||||
# Exchange code for credentials
|
||||
flow.fetch_token(code=code)
|
||||
credentials = flow.credentials
|
||||
# Get token using OAuth2Session
|
||||
token_data = oauth.fetch_token(
|
||||
"https://oauth2.googleapis.com/token",
|
||||
client_secret=CLIENT_SECRET,
|
||||
code=code,
|
||||
include_client_id=True
|
||||
)
|
||||
|
||||
# Get user info
|
||||
user_info_response = flow.oauth2session.get("https://www.googleapis.com/oauth2/v2/userinfo")
|
||||
user_info = user_info_response.json()
|
||||
token_response = {
|
||||
"access_token": token_data.get("access_token"),
|
||||
"refresh_token": token_data.get("refresh_token", ""),
|
||||
"token_type": token_data.get("token_type", "bearer"),
|
||||
"expires_in": token_data.get("expires_in", 0)
|
||||
}
|
||||
|
||||
logger.info("Successfully got token using OAuth2Session")
|
||||
|
||||
if not token_response.get("access_token"):
|
||||
logger.error("Token acquisition failed: No access token received")
|
||||
return HTMLResponse(
|
||||
content="<html><body><h1>Authentication Failed</h1><p>Could not acquire token.</p></body></html>",
|
||||
status_code=400
|
||||
)
|
||||
|
||||
# Get user info using the access token
|
||||
headers = {
|
||||
'Authorization': f"Bearer {token_response['access_token']}",
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
async with httpx.AsyncClient() as client:
|
||||
user_info_response = await client.get(
|
||||
"https://www.googleapis.com/oauth2/v2/userinfo",
|
||||
headers=headers
|
||||
)
|
||||
if user_info_response.status_code != 200:
|
||||
logger.error(f"Failed to get user info: {user_info_response.text}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Failed to get user info from Google"
|
||||
)
|
||||
user_info = user_info_response.json()
|
||||
logger.info(f"Got user info from Google: {user_info.get('email')}")
|
||||
|
||||
if state_type == "login":
|
||||
# Handle login flow
|
||||
|
|
@ -152,10 +215,10 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
token = Token(
|
||||
userId=user.id, # Use local user's ID
|
||||
authority=AuthAuthority.GOOGLE,
|
||||
tokenAccess=credentials.token,
|
||||
tokenRefresh=credentials.refresh_token,
|
||||
tokenType=credentials.token_type,
|
||||
expiresAt=credentials.expiry.timestamp() if credentials.expiry else None,
|
||||
tokenAccess=token_response["access_token"],
|
||||
tokenRefresh=token_response.get("refresh_token", ""),
|
||||
tokenType=token_response.get("token_type", "bearer"),
|
||||
expiresAt=datetime.now().timestamp() + token_response.get("expires_in", 0),
|
||||
createdAt=datetime.now()
|
||||
)
|
||||
|
||||
|
|
@ -173,7 +236,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
if (window.opener) {{
|
||||
window.opener.postMessage({{
|
||||
type: 'google_auth_success',
|
||||
access_token: {json.dumps(credentials.token)},
|
||||
access_token: {json.dumps(token_response["access_token"])},
|
||||
token_data: {json.dumps(token.to_dict())}
|
||||
}}, '*');
|
||||
}}
|
||||
|
|
@ -261,7 +324,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
# Update connection with external service details
|
||||
connection.status = ConnectionStatus.ACTIVE
|
||||
connection.lastChecked = datetime.now()
|
||||
connection.expiresAt = credentials.expiry if credentials.expiry else None
|
||||
connection.expiresAt = datetime.now() + timedelta(seconds=token_response.get("expires_in", 0))
|
||||
connection.externalId = user_info.get("id")
|
||||
connection.externalUsername = user_info.get("email")
|
||||
connection.externalEmail = user_info.get("email")
|
||||
|
|
@ -269,14 +332,17 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
# Update connection record directly
|
||||
rootInterface.db.recordModify("connections", connection_id, connection.to_dict())
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
rootInterface.db.clearTableCache("connections")
|
||||
|
||||
# Save token
|
||||
token = Token(
|
||||
userId=user.id, # Use local user's ID
|
||||
authority=AuthAuthority.GOOGLE,
|
||||
tokenAccess=credentials.token,
|
||||
tokenRefresh=credentials.refresh_token,
|
||||
tokenType=credentials.token_type,
|
||||
expiresAt=credentials.expiry.timestamp() if credentials.expiry else None,
|
||||
tokenAccess=token_response["access_token"],
|
||||
tokenRefresh=token_response.get("refresh_token", ""),
|
||||
tokenType=token_response.get("token_type", "bearer"),
|
||||
expiresAt=datetime.now().timestamp() + token_response.get("expires_in", 0),
|
||||
createdAt=datetime.now()
|
||||
)
|
||||
interface.saveToken(token)
|
||||
|
|
@ -296,7 +362,7 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
status: 'connected',
|
||||
type: 'google',
|
||||
lastChecked: '{datetime.now().isoformat()}',
|
||||
expiresAt: '{credentials.expiry.isoformat() if credentials.expiry else None}'
|
||||
expiresAt: '{(datetime.now() + timedelta(seconds=token_response.get("expires_in", 0))).isoformat()}'
|
||||
}}
|
||||
}}, '*');
|
||||
// Wait for message to be sent before closing
|
||||
|
|
|
|||
|
|
@ -39,7 +39,12 @@ CLIENT_SECRET = APP_CONFIG.get("Service_MSFT_CLIENT_SECRET")
|
|||
TENANT_ID = APP_CONFIG.get("Service_MSFT_TENANT_ID", "common")
|
||||
REDIRECT_URI = APP_CONFIG.get("Service_MSFT_REDIRECT_URI")
|
||||
AUTHORITY = f"https://login.microsoftonline.com/{TENANT_ID}"
|
||||
SCOPES = ["Mail.ReadWrite", "User.Read"]
|
||||
SCOPES = [
|
||||
"Mail.ReadWrite", # Read and write mail
|
||||
"Mail.Send", # Send mail
|
||||
"Mail.ReadWrite.Shared", # Access shared mailboxes
|
||||
"User.Read" # Read user profile
|
||||
]
|
||||
|
||||
@router.get("/login")
|
||||
@limiter.limit("5/minute")
|
||||
|
|
@ -69,8 +74,9 @@ async def login(
|
|||
"connectionId": connectionId
|
||||
})
|
||||
|
||||
# MSAL automatically adds openid, profile, offline_access - we just need to provide our business scopes
|
||||
auth_url = msal_app.get_authorization_request_url(
|
||||
scopes=SCOPES,
|
||||
scopes=SCOPES, # Only our business scopes - MSAL adds the required ones automatically
|
||||
redirect_uri=REDIRECT_URI,
|
||||
state=state_param,
|
||||
prompt="select_account" # Force account selection screen
|
||||
|
|
@ -104,10 +110,10 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
client_credential=CLIENT_SECRET
|
||||
)
|
||||
|
||||
# Get token from code
|
||||
# Get token from code - MSAL automatically handles the required scopes
|
||||
token_response = msal_app.acquire_token_by_authorization_code(
|
||||
code,
|
||||
scopes=SCOPES,
|
||||
scopes=SCOPES, # Only our business scopes - MSAL adds the required ones automatically
|
||||
redirect_uri=REDIRECT_URI
|
||||
)
|
||||
|
||||
|
|
@ -308,6 +314,9 @@ async def auth_callback(code: str, state: str, request: Request) -> HTMLResponse
|
|||
# Update connection record directly
|
||||
rootInterface.db.recordModify("connections", connection_id, connection.to_dict())
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
rootInterface.db.clearTableCache("connections")
|
||||
|
||||
# Save token
|
||||
token = Token(
|
||||
userId=user.id, # Use local user's ID
|
||||
|
|
|
|||
|
|
@ -163,6 +163,9 @@ def createUserSession(userId: str, tokenId: str, request: Request) -> Session:
|
|||
# Save session to database
|
||||
appInterface.db.recordCreate("sessions", session.to_dict())
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
appInterface.db.clearTableCache("sessions")
|
||||
|
||||
# Log auth event
|
||||
event = AuthEvent(
|
||||
userId=userId,
|
||||
|
|
@ -173,6 +176,9 @@ def createUserSession(userId: str, tokenId: str, request: Request) -> Session:
|
|||
)
|
||||
appInterface.db.recordCreate("auth_events", event.to_dict())
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
appInterface.db.clearTableCache("auth_events")
|
||||
|
||||
return session
|
||||
|
||||
def logAuthEvent(userId: str, eventType: str, details: Dict[str, Any], request: Request) -> None:
|
||||
|
|
@ -189,6 +195,9 @@ def logAuthEvent(userId: str, eventType: str, details: Dict[str, Any], request:
|
|||
|
||||
# Save event to database
|
||||
appInterface.db.recordCreate("auth_events", event.to_dict())
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
appInterface.db.clearTableCache("auth_events")
|
||||
|
||||
def validateSession(sessionId: str) -> bool:
|
||||
"""Validate a user session."""
|
||||
|
|
@ -207,6 +216,9 @@ def validateSession(sessionId: str) -> bool:
|
|||
"lastActivity": datetime.now(timezone.utc)
|
||||
})
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
appInterface.db.clearTableCache("sessions")
|
||||
|
||||
return True
|
||||
|
||||
def revokeSession(sessionId: str) -> None:
|
||||
|
|
@ -215,6 +227,9 @@ def revokeSession(sessionId: str) -> None:
|
|||
|
||||
# Delete session
|
||||
appInterface.db.recordDelete("sessions", sessionId)
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
appInterface.db.clearTableCache("sessions")
|
||||
|
||||
def revokeAllUserSessions(userId: str) -> None:
|
||||
"""Revoke all sessions for a user."""
|
||||
|
|
@ -226,3 +241,6 @@ def revokeAllUserSessions(userId: str) -> None:
|
|||
# Delete each session
|
||||
for session in sessions:
|
||||
appInterface.db.recordDelete("sessions", session["id"])
|
||||
|
||||
# Clear cache to ensure fresh data
|
||||
appInterface.db.clearTableCache("sessions")
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ class Configuration:
|
|||
def _loadEnv(self):
|
||||
"""Load environment variables from .env file"""
|
||||
# Find .env file in the gateway directory
|
||||
envPath = Path(__file__).parent.parent.parent / 'env_dev.env'
|
||||
envPath = Path(__file__).parent.parent.parent / '.env'
|
||||
if not envPath.exists():
|
||||
logger.warning(f"Environment file not found at {envPath.absolute()}")
|
||||
return
|
||||
|
|
|
|||
|
|
@ -9,14 +9,11 @@ from modules.interfaces.interfaceAppObjects import User
|
|||
from modules.interfaces.interfaceChatModel import (UserInputRequest, ChatMessage, ChatWorkflow, TaskItem, TaskStatus)
|
||||
from modules.interfaces.interfaceChatObjects import ChatObjects
|
||||
from modules.chat.managerChat import ChatManager
|
||||
from modules.chat.handling.handlingTasks import WorkflowStoppedException
|
||||
from modules.interfaces.interfaceChatModel import WorkflowResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class WorkflowStoppedException(Exception):
|
||||
"""Exception raised when workflow is stopped by user"""
|
||||
pass
|
||||
|
||||
class WorkflowManager:
|
||||
"""Manager for workflow processing and coordination"""
|
||||
|
||||
|
|
@ -25,11 +22,6 @@ class WorkflowManager:
|
|||
self.chatManager = ChatManager(currentUser, chatInterface)
|
||||
self.currentUser = currentUser
|
||||
|
||||
def _checkWorkflowStopped(self, workflow: ChatWorkflow) -> None:
|
||||
"""Check if workflow has been stopped"""
|
||||
if workflow.status == "stopped":
|
||||
raise WorkflowStoppedException("Workflow was stopped by user")
|
||||
|
||||
async def workflowProcess(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> None:
|
||||
"""Process a workflow with user input using unified workflow phases"""
|
||||
try:
|
||||
|
|
@ -37,7 +29,7 @@ class WorkflowManager:
|
|||
await self.chatManager.initialize(workflow)
|
||||
|
||||
# Set user language
|
||||
self.chatManager.setUserLanguage(userInput.userLanguage)
|
||||
self.chatManager.service.setUserLanguage(userInput.userLanguage)
|
||||
|
||||
# Send first message
|
||||
message = await self._sendFirstMessage(userInput, workflow)
|
||||
|
|
@ -61,6 +53,21 @@ class WorkflowManager:
|
|||
"lastActivity": workflow.lastActivity
|
||||
})
|
||||
|
||||
# Create final stopped message
|
||||
stopped_message = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": "🛑 Workflow stopped by user",
|
||||
"status": "last",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": datetime.now(UTC).isoformat(),
|
||||
"documentsLabel": "workflow_stopped",
|
||||
"documents": []
|
||||
}
|
||||
message = self.chatInterface.createWorkflowMessage(stopped_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
||||
# Add log entry
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
|
|
@ -108,6 +115,8 @@ class WorkflowManager:
|
|||
async def _sendFirstMessage(self, userInput: UserInputRequest, workflow: ChatWorkflow) -> ChatMessage:
|
||||
"""Send first message to start workflow"""
|
||||
try:
|
||||
self.chatManager.handlingTasks._checkWorkflowStopped()
|
||||
|
||||
# Create initial message using interface
|
||||
messageData = {
|
||||
"workflowId": workflow.id,
|
||||
|
|
@ -121,7 +130,7 @@ class WorkflowManager:
|
|||
# Add documents if any
|
||||
if userInput.listFileId:
|
||||
# Process file IDs and add to message data
|
||||
documents = await self.chatManager.processFileIds(userInput.listFileId)
|
||||
documents = await self.chatManager.service.processFileIds(userInput.listFileId)
|
||||
messageData["documents"] = documents
|
||||
|
||||
# Create message using interface
|
||||
|
|
@ -139,6 +148,8 @@ class WorkflowManager:
|
|||
async def _generateWorkflowFeedback(self, workflow: ChatWorkflow) -> str:
|
||||
"""Generate feedback message for workflow completion"""
|
||||
try:
|
||||
self.chatManager.handlingTasks._checkWorkflowStopped()
|
||||
|
||||
# Count messages by role
|
||||
user_messages = [msg for msg in workflow.messages if msg.role == 'user']
|
||||
assistant_messages = [msg for msg in workflow.messages if msg.role == 'assistant']
|
||||
|
|
@ -164,9 +175,13 @@ class WorkflowManager:
|
|||
async def _sendLastMessage(self, workflow: ChatWorkflow) -> None:
|
||||
"""Send last message to complete workflow"""
|
||||
try:
|
||||
self.chatManager.handlingTasks._checkWorkflowStopped()
|
||||
|
||||
# Generate feedback
|
||||
feedback = await self._generateWorkflowFeedback(workflow)
|
||||
|
||||
self.chatManager.handlingTasks._checkWorkflowStopped()
|
||||
|
||||
# Create last message using interface
|
||||
messageData = {
|
||||
"workflowId": workflow.id,
|
||||
|
|
@ -208,7 +223,60 @@ class WorkflowManager:
|
|||
async def _processWorkflowResults(self, workflow: ChatWorkflow, workflow_result: WorkflowResult, initial_message: ChatMessage) -> None:
|
||||
"""Process workflow results and create appropriate messages"""
|
||||
try:
|
||||
if workflow_result.status == 'failed':
|
||||
try:
|
||||
self.chatManager.handlingTasks._checkWorkflowStopped()
|
||||
except WorkflowStoppedException:
|
||||
logger.info(f"Workflow {workflow.id} was stopped during result processing")
|
||||
|
||||
# Create final stopped message
|
||||
stopped_message = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": "🛑 Workflow stopped by user",
|
||||
"status": "last",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": datetime.now(UTC).isoformat(),
|
||||
"documentsLabel": "workflow_stopped",
|
||||
"documents": []
|
||||
}
|
||||
message = self.chatInterface.createWorkflowMessage(stopped_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
||||
# Update workflow status to stopped
|
||||
workflow.status = "stopped"
|
||||
workflow.lastActivity = datetime.now(UTC).isoformat()
|
||||
self.chatInterface.updateWorkflow(workflow.id, {
|
||||
"status": "stopped",
|
||||
"lastActivity": workflow.lastActivity
|
||||
})
|
||||
return
|
||||
|
||||
if workflow_result.status == 'stopped':
|
||||
# Create stopped message
|
||||
stopped_message = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": "🛑 Workflow stopped by user",
|
||||
"status": "last",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": datetime.now(UTC).isoformat(),
|
||||
"documentsLabel": "workflow_stopped",
|
||||
"documents": []
|
||||
}
|
||||
message = self.chatInterface.createWorkflowMessage(stopped_message)
|
||||
if message:
|
||||
workflow.messages.append(message)
|
||||
|
||||
# Update workflow status to stopped
|
||||
workflow.status = "stopped"
|
||||
workflow.lastActivity = datetime.now(UTC).isoformat()
|
||||
self.chatInterface.updateWorkflow(workflow.id, {
|
||||
"status": "stopped",
|
||||
"lastActivity": workflow.lastActivity
|
||||
})
|
||||
return
|
||||
elif workflow_result.status == 'failed':
|
||||
# Create error message
|
||||
error_message = {
|
||||
"workflowId": workflow.id,
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ PyMuPDF>=1.23.7 # Statt dem ungenauen 'fitz'
|
|||
PyPDF2==3.0.1
|
||||
python-docx>=0.8.11 # Für Word-Dokumente
|
||||
openpyxl>=3.1.2 # Für Excel-Dateien
|
||||
python-pptx>=0.6.21 # Für PowerPoint-Dateien
|
||||
|
||||
## Data Processing & Analysis
|
||||
numpy==1.26.3 # Version die mit pandas und matplotlib kompatibel ist
|
||||
|
|
@ -52,3 +53,14 @@ sortedcontainers>=2.4.0 # Required by trio
|
|||
|
||||
## MSFT Integration
|
||||
msal==1.24.1
|
||||
|
||||
# Enhanced Office document processing
|
||||
python-docx>=0.8.11
|
||||
openpyxl>=3.0.9
|
||||
python-pptx>=0.6.21
|
||||
xlrd>=2.0.1 # For legacy .xls files
|
||||
Pillow>=9.0.0 # For image processing
|
||||
PyPDF2>=3.0.0
|
||||
PyMuPDF>=1.20.0
|
||||
beautifulsoup4>=4.11.0
|
||||
chardet>=4.0.0 # For encoding detection
|
||||
|
|
|
|||
855
test_documentExtraction.py
Normal file
855
test_documentExtraction.py
Normal file
|
|
@ -0,0 +1,855 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for DocumentExtraction class.
|
||||
Processes all files in d:/temp folder and stores extracted content in d:/temp/extracted.
|
||||
|
||||
Features:
|
||||
- Option to extract content WITH AI processing (default)
|
||||
- Option to extract content WITHOUT AI processing (content-only mode)
|
||||
- Supports all document types: text, images, PDFs, Office documents, etc.
|
||||
- Detailed logging and progress tracking
|
||||
- Separate output directories for AI vs content-only modes
|
||||
|
||||
Usage:
|
||||
- Interactive mode: python test_documentExtraction.py
|
||||
- Content-only mode: python test_documentExtraction.py --no-ai
|
||||
- Content-only mode: python test_documentExtraction.py --content-only
|
||||
- Specify custom input/output: python test_documentExtraction.py --input-dir /path/to/input --output-dir /path/to/output --no-ai
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
from datetime import datetime, UTC
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG, # Changed from INFO to DEBUG
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Filter out specific unwanted log messages
|
||||
class LogFilter(logging.Filter):
|
||||
"""Filter to hide specific unwanted log messages."""
|
||||
|
||||
def filter(self, record):
|
||||
# Hide workflow stats update errors
|
||||
if "Workflow" in record.getMessage() and "not found for stats update" in record.getMessage():
|
||||
return False
|
||||
|
||||
# Hide HTTP request info messages
|
||||
if "HTTP Request:" in record.getMessage() and "POST https://api.openai.com" in record.getMessage():
|
||||
return False
|
||||
|
||||
# Hide HTTP response info messages
|
||||
if "HTTP/1.1 200 OK" in record.getMessage():
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
# Apply the filter to the root logger
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.addFilter(LogFilter())
|
||||
|
||||
def check_dependencies():
|
||||
"""Check if required dependencies are available and provide installation instructions."""
|
||||
missing_deps = []
|
||||
|
||||
# Check for required dependencies
|
||||
try:
|
||||
import bs4
|
||||
logger.info("✓ beautifulsoup4 is available")
|
||||
except ImportError:
|
||||
missing_deps.append("beautifulsoup4")
|
||||
logger.error("✗ beautifulsoup4 is missing")
|
||||
|
||||
try:
|
||||
import PyPDF2
|
||||
logger.info("✓ PyPDF2 is available")
|
||||
except ImportError:
|
||||
missing_deps.append("PyPDF2")
|
||||
logger.error("✗ PyPDF2 is missing")
|
||||
|
||||
try:
|
||||
import fitz
|
||||
logger.info("✓ PyMuPDF (fitz) is available")
|
||||
except ImportError:
|
||||
missing_deps.append("PyMuPDF")
|
||||
logger.error("✗ PyMuPDF (fitz) is missing")
|
||||
|
||||
try:
|
||||
import docx
|
||||
logger.info("✓ python-docx is available")
|
||||
except ImportError:
|
||||
missing_deps.append("python-docx")
|
||||
logger.error("✗ python-docx is missing")
|
||||
|
||||
try:
|
||||
import openpyxl
|
||||
logger.info("✓ openpyxl is available")
|
||||
except ImportError:
|
||||
missing_deps.append("openpyxl")
|
||||
logger.error("✗ openpyxl is missing")
|
||||
|
||||
try:
|
||||
import pptx
|
||||
logger.info("✓ python-pptx is available")
|
||||
except ImportError:
|
||||
missing_deps.append("python-pptx")
|
||||
logger.error("✗ python-pptx is missing")
|
||||
|
||||
try:
|
||||
from PIL import Image
|
||||
logger.info("✓ Pillow (PIL) is available")
|
||||
except ImportError:
|
||||
missing_deps.append("Pillow")
|
||||
logger.error("✗ Pillow (PIL) is missing")
|
||||
|
||||
if missing_deps:
|
||||
logger.error("\n" + "="*60)
|
||||
logger.error("MISSING DEPENDENCIES DETECTED!")
|
||||
logger.error("="*60)
|
||||
logger.error("The following packages are required but not installed:")
|
||||
for dep in missing_deps:
|
||||
logger.error(f" - {dep}")
|
||||
logger.error("\nTo install all dependencies, run:")
|
||||
logger.error("pip install -r requirements.txt")
|
||||
logger.error("\nOr install individual packages:")
|
||||
for dep in missing_deps:
|
||||
if dep == "beautifulsoup4":
|
||||
logger.error(f" pip install {dep}")
|
||||
elif dep == "PyMuPDF":
|
||||
logger.error(f" pip install {dep}")
|
||||
elif dep == "Pillow":
|
||||
logger.error(f" pip install {dep}")
|
||||
else:
|
||||
logger.error(f" pip install {dep}")
|
||||
logger.error("="*60)
|
||||
return False
|
||||
|
||||
logger.info("✓ All required dependencies are available!")
|
||||
return True
|
||||
|
||||
def check_module_imports():
|
||||
"""Check if we can import the required modules."""
|
||||
try:
|
||||
# Add the gateway directory to the path so we can import our modules
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
|
||||
|
||||
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||
from modules.chat.serviceCenter import ServiceCenter
|
||||
from modules.interfaces.interfaceAppModel import User, UserConnection
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow, TaskItem
|
||||
|
||||
logger.info("✓ All required modules imported successfully")
|
||||
return True
|
||||
except ImportError as e:
|
||||
logger.error(f"✗ Failed to import required modules: {e}")
|
||||
logger.error("Make sure you're running this script from the gateway directory")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"✗ Unexpected error importing modules: {e}")
|
||||
return False
|
||||
|
||||
def create_mock_service_center():
|
||||
"""Create a proper ServiceCenter for testing purposes with all required fields."""
|
||||
try:
|
||||
from modules.chat.serviceCenter import ServiceCenter
|
||||
from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow, TaskItem, TaskStatus
|
||||
from modules.interfaces.interfaceChatModel import ChatLog, ChatMessage, ChatStat
|
||||
|
||||
# Create proper user with all required fields
|
||||
mock_user = User(
|
||||
id="test_user_001",
|
||||
username="testuser",
|
||||
email="test@example.com",
|
||||
fullName="Test User",
|
||||
language="en",
|
||||
enabled=True,
|
||||
privilege=UserPrivilege.USER,
|
||||
authenticationAuthority=AuthAuthority.LOCAL,
|
||||
mandateId="test_mandate_001"
|
||||
)
|
||||
|
||||
# Create proper workflow with all required fields
|
||||
current_time = datetime.now(UTC).isoformat()
|
||||
mock_workflow = ChatWorkflow(
|
||||
id="test_workflow_001",
|
||||
mandateId="test_mandate_001",
|
||||
status="active",
|
||||
name="Test Document Extraction Workflow",
|
||||
currentRound=1,
|
||||
lastActivity=current_time,
|
||||
startedAt=current_time,
|
||||
logs=[],
|
||||
messages=[],
|
||||
stats=None,
|
||||
tasks=[]
|
||||
)
|
||||
|
||||
# Create service center
|
||||
service_center = ServiceCenter(mock_user, mock_workflow)
|
||||
logger.info("✓ ServiceCenter created successfully with proper objects")
|
||||
return service_center
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"✗ Failed to create ServiceCenter: {e}")
|
||||
return None
|
||||
|
||||
class DocumentExtractionTester:
|
||||
"""Test class for DocumentExtraction functionality."""
|
||||
|
||||
def __init__(self, input_dir: str = "d:/temp/test-extraction", output_dir: str = None, enable_ai: bool = True):
|
||||
"""
|
||||
Initialize the tester.
|
||||
|
||||
Args:
|
||||
input_dir: Directory containing files to process
|
||||
output_dir: Directory to store extracted content (auto-generated if None)
|
||||
enable_ai: Whether to enable AI processing (default: True)
|
||||
"""
|
||||
self.input_dir = Path(input_dir)
|
||||
|
||||
# Auto-generate output directory if not specified
|
||||
if output_dir is None:
|
||||
if enable_ai:
|
||||
self.output_dir = Path(input_dir) / "extracted"
|
||||
else:
|
||||
self.output_dir = Path(input_dir) / "extracted-raw"
|
||||
else:
|
||||
self.output_dir = Path(output_dir)
|
||||
|
||||
self.extractor = None
|
||||
self.service_center = None
|
||||
self.enable_ai = enable_ai
|
||||
|
||||
if enable_ai:
|
||||
self.prompt = "Make a summary of each sentence for each page or chapter of the document"
|
||||
else:
|
||||
self.prompt = None # No prompt needed for content-only extraction
|
||||
|
||||
# Track processing results for summary
|
||||
self.processing_results = []
|
||||
|
||||
# Ensure output directory exists
|
||||
logger.info(f"Creating output directory: {self.output_dir}")
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Verify directory was created
|
||||
if self.output_dir.exists():
|
||||
logger.info(f"✓ Output directory created/verified: {self.output_dir}")
|
||||
logger.info(f"Output directory absolute path: {self.output_dir.absolute()}")
|
||||
else:
|
||||
logger.error(f"✗ Failed to create output directory: {self.output_dir}")
|
||||
|
||||
# Log configuration
|
||||
logger.info(f"Configuration: AI processing = {'ENABLED' if self.enable_ai else 'DISABLED'}")
|
||||
logger.info(f"Input directory: {self.input_dir}")
|
||||
logger.info(f"Output directory: {self.output_dir}")
|
||||
|
||||
# Test basic file writing capability
|
||||
test_file = self.output_dir / "test_write_capability.txt"
|
||||
try:
|
||||
logger.info(f"Testing file write capability to: {test_file}")
|
||||
logger.info(f"Absolute path: {test_file.absolute()}")
|
||||
|
||||
with open(test_file, 'w', encoding='utf-8') as f:
|
||||
f.write("Test file to verify write capability")
|
||||
|
||||
if test_file.exists():
|
||||
actual_size = test_file.stat().st_size
|
||||
logger.info(f"✓ Basic file writing test passed: {test_file} (size: {actual_size} bytes)")
|
||||
|
||||
# Test reading the file back
|
||||
with open(test_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
logger.info(f"✓ File read test passed: content length = {len(content)}")
|
||||
|
||||
# Clean up test file
|
||||
test_file.unlink()
|
||||
logger.info("✓ Test file cleaned up")
|
||||
else:
|
||||
logger.error(f"✗ Basic file writing test failed: {test_file}")
|
||||
except Exception as e:
|
||||
logger.error(f"✗ Basic file writing test failed with error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
# Supported file extensions for content extraction
|
||||
self.supported_extensions = {
|
||||
# Text and data files
|
||||
'.txt', '.csv', '.json', '.xml', '.html', '.htm', '.svg',
|
||||
'.md', '.markdown', '.rst', '.log', '.ini', '.cfg', '.conf',
|
||||
|
||||
# Programming languages
|
||||
'.js', '.ts', '.jsx', '.tsx', '.py', '.java', '.c', '.cpp', '.cc', '.cxx',
|
||||
'.h', '.hpp', '.cs', '.php', '.rb', '.go', '.rs', '.swift', '.kt', '.scala',
|
||||
'.r', '.m', '.pl', '.sh', '.bash', '.zsh', '.fish', '.ps1', '.bat', '.cmd',
|
||||
'.vbs', '.lua', '.sql', '.r', '.dart', '.elm', '.clj', '.hs', '.fs', '.ml',
|
||||
|
||||
# Web technologies
|
||||
'.css', '.scss', '.sass', '.less', '.vue', '.svelte', '.astro',
|
||||
|
||||
# Configuration and build files
|
||||
'.yaml', '.yml', '.toml', '.env', '.gitignore', '.dockerfile', '.dockerignore',
|
||||
'.makefile', '.cmake', '.gradle', '.maven', '.pom', '.sln', '.vcxproj',
|
||||
'.csproj', '.fsproj', '.vbproj', '.xcodeproj', '.pbxproj',
|
||||
|
||||
# Documentation and markup
|
||||
'.tex', '.bib', '.adoc', '.asciidoc', '.wiki', '.creole',
|
||||
|
||||
# Images
|
||||
'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff', '.ico',
|
||||
|
||||
# Documents
|
||||
'.pdf', '.docx', '.xlsx', '.pptx', '.odt', '.ods', '.odp',
|
||||
|
||||
# Legacy Office formats
|
||||
'.doc', '.xls', '.ppt',
|
||||
|
||||
# Archives and binaries
|
||||
'.zip', '.tar', '.gz', '.7z', '.rar', '.exe', '.dll', '.so', '.dylib'
|
||||
}
|
||||
|
||||
def initialize_extractor(self):
|
||||
"""Initialize the DocumentExtraction instance with a proper ServiceCenter."""
|
||||
try:
|
||||
# First create the service center
|
||||
self.service_center = create_mock_service_center()
|
||||
if not self.service_center:
|
||||
logger.error("Failed to create ServiceCenter!")
|
||||
return False
|
||||
|
||||
# Now create DocumentExtraction with the service center
|
||||
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||
self.extractor = DocumentExtraction(self.service_center)
|
||||
logger.info("✓ DocumentExtraction initialized successfully with ServiceCenter")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"✗ Failed to initialize DocumentExtraction: {e}")
|
||||
return False
|
||||
|
||||
def get_files_to_process(self) -> List[Path]:
|
||||
"""Get list of files to process from input directory."""
|
||||
if not self.input_dir.exists():
|
||||
logger.error(f"Input directory {self.input_dir} does not exist!")
|
||||
logger.info("Creating input directory and adding a test file...")
|
||||
self.input_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create a test file if none exist
|
||||
test_file = self.input_dir / "test.txt"
|
||||
with open(test_file, 'w') as f:
|
||||
f.write("This is a test file for document extraction.\nIt contains multiple lines.\nAnd some special characters: äöüß")
|
||||
logger.info(f"Created test file: {test_file}")
|
||||
|
||||
files = []
|
||||
all_files = list(self.input_dir.iterdir())
|
||||
logger.info(f"All files in directory: {[f.name for f in all_files]}")
|
||||
|
||||
for file_path in all_files:
|
||||
if file_path.is_file():
|
||||
logger.debug(f"Checking file: {file_path.name} (extension: {file_path.suffix})")
|
||||
if file_path.suffix.lower() in self.supported_extensions:
|
||||
files.append(file_path)
|
||||
logger.debug(f"Added file: {file_path.name}")
|
||||
else:
|
||||
logger.debug(f"Skipped file: {file_path.name} (unsupported extension)")
|
||||
|
||||
logger.info(f"Found {len(files)} supported files to process")
|
||||
if files:
|
||||
logger.info(f"Files to process: {[f.name for f in files]}")
|
||||
return files
|
||||
|
||||
async def process_single_file(self, file_path: Path) -> bool:
|
||||
"""
|
||||
Process a single file and extract its content.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file to process
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
if not self.extractor:
|
||||
logger.error("DocumentExtraction not initialized!")
|
||||
return False
|
||||
|
||||
try:
|
||||
logger.info(f"Processing file: {file_path.name}")
|
||||
|
||||
# Read file data
|
||||
with open(file_path, 'rb') as f:
|
||||
file_data = f.read()
|
||||
|
||||
logger.debug(f"File size: {len(file_data)} bytes")
|
||||
|
||||
# Determine MIME type based on extension
|
||||
mime_type = self._get_mime_type(file_path.suffix)
|
||||
logger.debug(f"MIME type: {mime_type}")
|
||||
|
||||
# Process the file with or without AI based on configuration
|
||||
extracted_content = await self.extractor.processFileData(
|
||||
fileData=file_data,
|
||||
filename=file_path.name,
|
||||
mimeType=mime_type,
|
||||
base64Encoded=False,
|
||||
prompt=self.prompt,
|
||||
enableAI=self.enable_ai
|
||||
)
|
||||
|
||||
logger.debug(f"Extracted {len(extracted_content.contents)} content items")
|
||||
|
||||
# Debug: Show content details
|
||||
for i, content_item in enumerate(extracted_content.contents):
|
||||
logger.debug(f"Content item {i+1}: label='{content_item.label}', has_data={content_item.data is not None}, data_length={len(content_item.data) if content_item.data else 0}")
|
||||
|
||||
# Special logging for JavaScript files
|
||||
if mime_type == "application/javascript":
|
||||
logger.debug(f"JavaScript file detected: {file_path.name}")
|
||||
logger.debug(f"Original file size: {len(file_data)} bytes")
|
||||
for i, content_item in enumerate(extracted_content.contents):
|
||||
if content_item.data:
|
||||
content_size = len(content_item.data.encode('utf-8'))
|
||||
logger.debug(f"JavaScript content item {i+1}: {content_size} bytes")
|
||||
# Check if content was truncated
|
||||
if content_size < len(file_data) * 0.9: # If less than 90% of original
|
||||
logger.warning(f"JavaScript content may be truncated: {content_size} bytes vs {len(file_data)} bytes original")
|
||||
|
||||
# Track processing result
|
||||
result = {
|
||||
'filename': file_path.name,
|
||||
'status': 'OK',
|
||||
'content_items': 0,
|
||||
'output_files': [],
|
||||
'total_content_size': 0
|
||||
}
|
||||
|
||||
# Save each content item as a separate file
|
||||
if extracted_content.contents:
|
||||
for i, content_item in enumerate(extracted_content.contents):
|
||||
if content_item.data:
|
||||
content_size = len(content_item.data.encode('utf-8'))
|
||||
result['total_content_size'] += content_size
|
||||
logger.debug(f"Content item {i+1}: {content_item.label}, size: {content_size} bytes")
|
||||
|
||||
# Generate filename with new naming convention
|
||||
if len(extracted_content.contents) == 1:
|
||||
# Single content item
|
||||
output_filename = f"{file_path.stem} - {content_item.label} 1.txt"
|
||||
else:
|
||||
# Multiple content items - add sequence number
|
||||
output_filename = f"{file_path.stem} - {content_item.label} {i+1}.txt"
|
||||
|
||||
output_file = self.output_dir / output_filename
|
||||
|
||||
# Write only the raw extracted content
|
||||
logger.debug(f"Attempting to write to: {output_file}")
|
||||
try:
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write(content_item.data)
|
||||
|
||||
# Verify file was created
|
||||
if output_file.exists():
|
||||
actual_size = output_file.stat().st_size
|
||||
logger.info(f"✓ File created successfully: {output_filename} (expected: {content_size} bytes, actual: {actual_size} bytes)")
|
||||
else:
|
||||
logger.error(f"✗ File was not created: {output_file}")
|
||||
|
||||
result['output_files'].append(output_filename)
|
||||
result['content_items'] += 1
|
||||
except Exception as write_error:
|
||||
logger.error(f"✗ Error writing file {output_filename}: {write_error}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
else:
|
||||
logger.warning(f"Content item {i+1} has no data, skipping")
|
||||
else:
|
||||
logger.warning(f"No content extracted from {file_path.name}")
|
||||
result['status'] = 'FAIL'
|
||||
result['error'] = 'No content extracted'
|
||||
|
||||
# Add result to tracking list
|
||||
self.processing_results.append(result)
|
||||
|
||||
logger.info(f"Successfully processed {file_path.name} - Total content: {result['total_content_size']} bytes")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
error_msg = str(e)
|
||||
logger.error(f"Error processing {file_path.name}: {error_msg}")
|
||||
|
||||
# Track failed result
|
||||
result = {
|
||||
'filename': file_path.name,
|
||||
'status': 'FAIL',
|
||||
'content_items': 0,
|
||||
'output_files': [],
|
||||
'error': error_msg,
|
||||
'total_content_size': 0
|
||||
}
|
||||
self.processing_results.append(result)
|
||||
|
||||
return False
|
||||
|
||||
def _get_mime_type(self, extension: str) -> str:
|
||||
"""Get MIME type based on file extension."""
|
||||
mime_types = {
|
||||
# Text and data files
|
||||
'.txt': 'text/plain',
|
||||
'.csv': 'text/csv',
|
||||
'.json': 'application/json',
|
||||
'.xml': 'application/xml',
|
||||
'.html': 'text/html',
|
||||
'.htm': 'text/html',
|
||||
'.svg': 'image/svg+xml',
|
||||
'.md': 'text/markdown',
|
||||
'.markdown': 'text/markdown',
|
||||
'.rst': 'text/x-rst',
|
||||
'.log': 'text/plain',
|
||||
'.ini': 'text/plain',
|
||||
'.cfg': 'text/plain',
|
||||
'.conf': 'text/plain',
|
||||
|
||||
# Programming languages
|
||||
'.js': 'application/javascript',
|
||||
'.ts': 'application/typescript',
|
||||
'.jsx': 'text/jsx',
|
||||
'.tsx': 'text/tsx',
|
||||
'.py': 'text/x-python',
|
||||
'.java': 'text/x-java-source',
|
||||
'.c': 'text/x-c',
|
||||
'.cpp': 'text/x-c++src',
|
||||
'.cc': 'text/x-c++src',
|
||||
'.cxx': 'text/x-c++src',
|
||||
'.h': 'text/x-c',
|
||||
'.hpp': 'text/x-c++hdr',
|
||||
'.cs': 'text/x-csharp',
|
||||
'.php': 'application/x-httpd-php',
|
||||
'.rb': 'text/x-ruby',
|
||||
'.go': 'text/x-go',
|
||||
'.rs': 'text/x-rust',
|
||||
'.swift': 'text/x-swift',
|
||||
'.kt': 'text/x-kotlin',
|
||||
'.scala': 'text/x-scala',
|
||||
'.r': 'text/x-r',
|
||||
'.m': 'text/x-matlab',
|
||||
'.pl': 'text/x-perl',
|
||||
'.sh': 'application/x-sh',
|
||||
'.bash': 'application/x-sh',
|
||||
'.zsh': 'application/x-sh',
|
||||
'.fish': 'application/x-sh',
|
||||
'.ps1': 'application/x-powershell',
|
||||
'.bat': 'application/x-msdos-program',
|
||||
'.cmd': 'application/x-msdos-program',
|
||||
'.vbs': 'text/vbscript',
|
||||
'.lua': 'text/x-lua',
|
||||
'.sql': 'application/sql',
|
||||
'.dart': 'application/dart',
|
||||
'.elm': 'text/x-elm',
|
||||
'.clj': 'text/x-clojure',
|
||||
'.hs': 'text/x-haskell',
|
||||
'.fs': 'text/x-fsharp',
|
||||
'.ml': 'text/x-ocaml',
|
||||
|
||||
# Web technologies
|
||||
'.css': 'text/css',
|
||||
'.scss': 'text/x-scss',
|
||||
'.sass': 'text/x-sass',
|
||||
'.less': 'text/x-less',
|
||||
'.vue': 'text/x-vue',
|
||||
'.svelte': 'text/x-svelte',
|
||||
'.astro': 'text/x-astro',
|
||||
|
||||
# Configuration and build files
|
||||
'.yaml': 'application/x-yaml',
|
||||
'.yml': 'application/x-yaml',
|
||||
'.toml': 'application/toml',
|
||||
'.env': 'text/plain',
|
||||
'.gitignore': 'text/plain',
|
||||
'.dockerfile': 'text/x-dockerfile',
|
||||
'.dockerignore': 'text/plain',
|
||||
'.makefile': 'text/x-makefile',
|
||||
'.cmake': 'text/x-cmake',
|
||||
'.gradle': 'text/x-gradle',
|
||||
'.maven': 'text/x-maven',
|
||||
'.pom': 'application/xml',
|
||||
'.sln': 'text/plain',
|
||||
'.vcxproj': 'application/xml',
|
||||
'.csproj': 'application/xml',
|
||||
'.fsproj': 'application/xml',
|
||||
'.vbproj': 'application/xml',
|
||||
'.xcodeproj': 'text/plain',
|
||||
'.pbxproj': 'text/plain',
|
||||
|
||||
# Documentation and markup
|
||||
'.tex': 'application/x-tex',
|
||||
'.bib': 'text/x-bibtex',
|
||||
'.adoc': 'text/asciidoc',
|
||||
'.asciidoc': 'text/asciidoc',
|
||||
'.wiki': 'text/x-wiki',
|
||||
'.creole': 'text/x-wiki',
|
||||
|
||||
# Images
|
||||
'.jpg': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
'.png': 'image/png',
|
||||
'.gif': 'image/gif',
|
||||
'.webp': 'image/webp',
|
||||
'.bmp': 'image/bmp',
|
||||
'.tiff': 'image/tiff',
|
||||
'.ico': 'image/x-icon',
|
||||
|
||||
# Documents
|
||||
'.pdf': 'application/pdf',
|
||||
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'.odt': 'application/vnd.oasis.opendocument.text',
|
||||
'.ods': 'application/vnd.oasis.opendocument.spreadsheet',
|
||||
'.odp': 'application/vnd.oasis.opendocument.presentation',
|
||||
|
||||
# Legacy Office formats
|
||||
'.doc': 'application/msword',
|
||||
'.xls': 'application/vnd.ms-excel',
|
||||
'.ppt': 'application/vnd.ms-powerpoint',
|
||||
|
||||
# Archives and binaries (will be processed as binary)
|
||||
'.zip': 'application/zip',
|
||||
'.tar': 'application/x-tar',
|
||||
'.gz': 'application/gzip',
|
||||
'.7z': 'application/x-7z-compressed',
|
||||
'.rar': 'application/vnd.rar',
|
||||
'.exe': 'application/x-msdownload',
|
||||
'.dll': 'application/x-msdownload',
|
||||
'.so': 'application/x-sharedlib',
|
||||
'.dylib': 'application/x-mach-binary'
|
||||
}
|
||||
return mime_types.get(extension.lower(), 'application/octet-stream')
|
||||
|
||||
async def run_tests(self) -> None:
|
||||
"""Run the document extraction tests on all files."""
|
||||
mode = "WITH AI" if self.enable_ai else "CONTENT ONLY (No AI)"
|
||||
logger.info(f"Starting document extraction tests - {mode}")
|
||||
logger.info(f"Input directory: {self.input_dir}")
|
||||
logger.info(f"Output directory: {self.output_dir}")
|
||||
if self.enable_ai:
|
||||
logger.info(f"Processing prompt: {self.prompt}")
|
||||
else:
|
||||
logger.info("AI processing: DISABLED - Raw content extraction only")
|
||||
|
||||
# Initialize the extractor
|
||||
if not self.initialize_extractor():
|
||||
logger.error("Cannot proceed without DocumentExtraction!")
|
||||
return
|
||||
|
||||
# Get files to process
|
||||
files = self.get_files_to_process()
|
||||
|
||||
if not files:
|
||||
logger.warning("No files found to process!")
|
||||
return
|
||||
|
||||
# Process each file
|
||||
successful = 0
|
||||
failed = 0
|
||||
|
||||
logger.info(f"Starting to process {len(files)} files...")
|
||||
for i, file_path in enumerate(files):
|
||||
logger.info(f"Processing file {i+1}/{len(files)}: {file_path.name}")
|
||||
try:
|
||||
if await self.process_single_file(file_path):
|
||||
successful += 1
|
||||
logger.info(f"✓ File {i+1} processed successfully")
|
||||
else:
|
||||
failed += 1
|
||||
logger.error(f"✗ File {i+1} processing failed")
|
||||
except Exception as e:
|
||||
failed += 1
|
||||
logger.error(f"✗ Exception processing file {i+1}: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
# Print detailed summary
|
||||
mode = "WITH AI" if self.enable_ai else "CONTENT ONLY (No AI)"
|
||||
logger.info("\n" + "=" * 80)
|
||||
logger.info(f"DETAILED TEST SUMMARY - {mode}")
|
||||
logger.info("=" * 80)
|
||||
logger.info(f"Total files processed: {len(files)}")
|
||||
logger.info(f"Successful: {successful}")
|
||||
logger.info(f"Failed: {failed}")
|
||||
logger.info(f"Output directory: {self.output_dir}")
|
||||
if self.enable_ai:
|
||||
logger.info("AI processing: ENABLED")
|
||||
else:
|
||||
logger.info("AI processing: DISABLED")
|
||||
logger.info("=" * 80)
|
||||
|
||||
# List all processed documents with results
|
||||
logger.info("\nPROCESSING RESULTS:")
|
||||
logger.info("-" * 80)
|
||||
|
||||
for result in self.processing_results:
|
||||
status_icon = "✅" if result['status'] == 'OK' else "❌"
|
||||
logger.info(f"{status_icon} {result['filename']} - {result['status']}")
|
||||
|
||||
if result['status'] == 'OK':
|
||||
if result['content_items'] == 1:
|
||||
logger.info(f" └─ Generated: {result['output_files'][0]} ({result['total_content_size']} bytes)")
|
||||
else:
|
||||
logger.info(f" └─ Generated {result['content_items']} files ({result['total_content_size']} total bytes):")
|
||||
for output_file in result['output_files']:
|
||||
logger.info(f" └─ {output_file}")
|
||||
else:
|
||||
error_msg = result.get('error', 'Unknown error')
|
||||
logger.info(f" └─ Error: {error_msg}")
|
||||
|
||||
logger.info("-" * 80)
|
||||
logger.info("=" * 80)
|
||||
|
||||
def parse_arguments():
|
||||
"""Parse command line arguments."""
|
||||
parser = argparse.ArgumentParser(description='Document Extraction Test Script')
|
||||
parser.add_argument('--no-ai', '--content-only', action='store_true',
|
||||
help='Run in content-only mode without AI processing')
|
||||
parser.add_argument('--input-dir', type=str, default='d:/temp/test-extraction',
|
||||
help='Input directory containing files to process (default: d:/temp/test-extraction)')
|
||||
parser.add_argument('--output-dir', type=str,
|
||||
help='Output directory for extracted content (auto-generated if not specified)')
|
||||
parser.add_argument('--verbose', '-v', action='store_true',
|
||||
help='Enable verbose logging')
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
async def main():
|
||||
"""Main function to run the tests."""
|
||||
# Parse command line arguments
|
||||
args = parse_arguments()
|
||||
|
||||
# Set logging level based on verbosity
|
||||
if args.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
else:
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
logger.info("DocumentExtraction Test Script")
|
||||
logger.info("=" * 50)
|
||||
logger.info(f"Source: {args.input_dir}")
|
||||
|
||||
# Determine output directory
|
||||
if args.output_dir:
|
||||
output_dir = args.output_dir
|
||||
else:
|
||||
if args.no_ai:
|
||||
output_dir = f"{args.input_dir}/extracted-raw"
|
||||
else:
|
||||
output_dir = f"{args.input_dir}/extracted"
|
||||
|
||||
logger.info(f"Output: {output_dir}")
|
||||
logger.info("=" * 50)
|
||||
|
||||
# Check dependencies first
|
||||
if not check_dependencies():
|
||||
logger.error("Please install missing dependencies before running tests.")
|
||||
return
|
||||
|
||||
# Check module imports
|
||||
if not check_module_imports():
|
||||
logger.error("Cannot import required modules. Please check your setup.")
|
||||
return
|
||||
|
||||
# Determine mode based on command line arguments
|
||||
if args.no_ai:
|
||||
enable_ai = False
|
||||
logger.info("Running in CONTENT ONLY mode (no AI processing)")
|
||||
else:
|
||||
# Interactive mode: ask user for choice
|
||||
print("\n" + "=" * 50)
|
||||
print("SELECT EXTRACTION MODE:")
|
||||
print("=" * 50)
|
||||
print("1. With AI processing (default)")
|
||||
print("2. Content only (no AI processing)")
|
||||
print("=" * 50)
|
||||
|
||||
try:
|
||||
choice = input("Enter your choice (1 or 2, default is 1): ").strip()
|
||||
if choice == "2":
|
||||
enable_ai = False
|
||||
output_dir = f"{args.input_dir}/extracted-raw"
|
||||
logger.info("Selected: Content only mode (no AI processing)")
|
||||
else:
|
||||
enable_ai = True
|
||||
output_dir = f"{args.input_dir}/extracted"
|
||||
logger.info("Selected: AI processing mode")
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
# Default to AI mode if input fails
|
||||
enable_ai = True
|
||||
output_dir = f"{args.input_dir}/extracted"
|
||||
logger.info("Defaulting to AI processing mode")
|
||||
|
||||
# Run tests with selected mode
|
||||
tester = DocumentExtractionTester(
|
||||
input_dir=args.input_dir,
|
||||
output_dir=output_dir,
|
||||
enable_ai=enable_ai
|
||||
)
|
||||
await tester.run_tests()
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check if command line arguments are provided for automated testing
|
||||
if len(sys.argv) > 1:
|
||||
# Parse arguments and run directly
|
||||
asyncio.run(main())
|
||||
else:
|
||||
# Interactive mode: ask user for choice
|
||||
asyncio.run(main())
|
||||
|
||||
# Convenience function for easy content-only extraction
|
||||
async def extract_documents_content_only(input_folder: str, output_folder: str = None):
|
||||
"""
|
||||
Convenience function to extract documents without AI processing.
|
||||
|
||||
Args:
|
||||
input_folder: Path to folder containing documents to extract
|
||||
output_folder: Path to folder where extracted content will be stored (optional)
|
||||
|
||||
Example:
|
||||
# Extract from d:/temp to d:/temp/extracted-raw
|
||||
asyncio.run(extract_documents_content_only("d:/temp"))
|
||||
|
||||
# Extract from custom folders
|
||||
asyncio.run(extract_documents_content_only("c:/my_docs", "c:/my_docs/extracted"))
|
||||
"""
|
||||
if output_folder is None:
|
||||
output_folder = f"{input_folder}/extracted-raw"
|
||||
|
||||
logger.info(f"Running content-only extraction from {input_folder} to {output_folder}")
|
||||
|
||||
# Check dependencies and imports
|
||||
if not check_dependencies():
|
||||
logger.error("Missing dependencies. Please install required packages.")
|
||||
return False
|
||||
|
||||
if not check_module_imports():
|
||||
logger.error("Cannot import required modules. Please check your setup.")
|
||||
return False
|
||||
|
||||
# Create tester and run
|
||||
tester = DocumentExtractionTester(
|
||||
input_dir=input_folder,
|
||||
output_dir=output_folder,
|
||||
enable_ai=False
|
||||
)
|
||||
|
||||
await tester.run_tests()
|
||||
return True
|
||||
|
||||
# Example usage (uncomment to use):
|
||||
# if __name__ == "__main__":
|
||||
# # For content-only extraction from d:/temp to d:/temp/extracted-raw
|
||||
# asyncio.run(extract_documents_content_only("d:/temp"))
|
||||
189
test_excel_processing.py
Normal file
189
test_excel_processing.py
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple test script for enhanced Excel processing functionality.
|
||||
This script tests the DocumentExtraction class with Excel files.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import asyncio
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Add the gateway directory to the path
|
||||
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
|
||||
|
||||
async def test_excel_processing():
|
||||
"""Test Excel processing functionality."""
|
||||
try:
|
||||
# Import required modules
|
||||
from modules.chat.documents.documentExtraction import DocumentExtraction
|
||||
from modules.chat.serviceCenter import ServiceCenter
|
||||
from modules.interfaces.interfaceAppModel import User, UserPrivilege, AuthAuthority
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow
|
||||
from datetime import datetime, UTC
|
||||
|
||||
logger.info("Testing Excel processing functionality...")
|
||||
|
||||
# Create mock service center
|
||||
mock_user = User(
|
||||
id="test_user_001",
|
||||
username="testuser",
|
||||
email="test@example.com",
|
||||
fullName="Test User",
|
||||
language="en",
|
||||
enabled=True,
|
||||
privilege=UserPrivilege.USER,
|
||||
authenticationAuthority=AuthAuthority.LOCAL,
|
||||
mandateId="test_mandate_001"
|
||||
)
|
||||
|
||||
current_time = datetime.now(UTC).isoformat()
|
||||
mock_workflow = ChatWorkflow(
|
||||
id="test_workflow_001",
|
||||
mandateId="test_mandate_001",
|
||||
status="active",
|
||||
name="Test Excel Processing Workflow",
|
||||
currentRound=1,
|
||||
lastActivity=current_time,
|
||||
startedAt=current_time,
|
||||
logs=[],
|
||||
messages=[],
|
||||
stats=None,
|
||||
tasks=[]
|
||||
)
|
||||
|
||||
service_center = ServiceCenter(mock_user, mock_workflow)
|
||||
logger.info("✓ ServiceCenter created successfully")
|
||||
|
||||
# Create DocumentExtraction instance
|
||||
extractor = DocumentExtraction(service_center)
|
||||
logger.info("✓ DocumentExtraction created successfully")
|
||||
|
||||
# Test with a sample Excel file if available
|
||||
test_file_path = "d:/temp/test-extraction/test.xlsx"
|
||||
|
||||
if os.path.exists(test_file_path):
|
||||
logger.info(f"Found test file: {test_file_path}")
|
||||
|
||||
# Read the file
|
||||
with open(test_file_path, 'rb') as f:
|
||||
file_data = f.read()
|
||||
|
||||
logger.info(f"File size: {len(file_data)} bytes")
|
||||
|
||||
# Process the Excel file
|
||||
logger.info("Processing Excel file...")
|
||||
result = await extractor.processFileData(
|
||||
fileData=file_data,
|
||||
filename="test.xlsx",
|
||||
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
base64Encoded=False,
|
||||
prompt=None,
|
||||
enableAI=False
|
||||
)
|
||||
|
||||
logger.info(f"✓ Excel processing completed successfully!")
|
||||
logger.info(f"Generated {len(result.contents)} content items:")
|
||||
|
||||
for i, content_item in enumerate(result.contents):
|
||||
logger.info(f" Item {i+1}: {content_item.label}")
|
||||
logger.info(f" MIME type: {content_item.metadata.mimeType}")
|
||||
logger.info(f" Size: {content_item.metadata.size} bytes")
|
||||
if content_item.data:
|
||||
logger.info(f" Data preview: {content_item.data[:100]}...")
|
||||
else:
|
||||
logger.info(f" Data: None")
|
||||
|
||||
else:
|
||||
logger.info("No test Excel file found. Creating a simple test...")
|
||||
|
||||
# Test the openpyxl library directly
|
||||
try:
|
||||
import openpyxl
|
||||
from openpyxl import Workbook
|
||||
|
||||
# Create a test workbook
|
||||
wb = Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "Test Sheet"
|
||||
|
||||
# Add some test data
|
||||
ws['A1'] = "Name"
|
||||
ws['B1'] = "Age"
|
||||
ws['C1'] = "City"
|
||||
ws['A2'] = "John Doe"
|
||||
ws['B2'] = 30
|
||||
ws['C2'] = "New York"
|
||||
ws['A3'] = "Jane Smith"
|
||||
ws['B3'] = 25
|
||||
ws['C3'] = "Los Angeles"
|
||||
|
||||
# Test properties
|
||||
wb.properties.title = "Test Workbook"
|
||||
wb.properties.creator = "Test User"
|
||||
wb.properties.subject = "Test Subject"
|
||||
|
||||
logger.info("✓ Test workbook created successfully")
|
||||
logger.info(f" Title: {wb.properties.title}")
|
||||
logger.info(f" Creator: {wb.properties.creator}")
|
||||
logger.info(f" Subject: {wb.properties.subject}")
|
||||
logger.info(f" Sheets: {wb.sheetnames}")
|
||||
|
||||
# Test the DocumentExtraction with this workbook
|
||||
from io import BytesIO
|
||||
|
||||
# Save to bytes
|
||||
buffer = BytesIO()
|
||||
wb.save(buffer)
|
||||
buffer.seek(0)
|
||||
file_data = buffer.getvalue()
|
||||
|
||||
logger.info(f"Test workbook size: {len(file_data)} bytes")
|
||||
|
||||
# Process with DocumentExtraction
|
||||
result = await extractor.processFileData(
|
||||
fileData=file_data,
|
||||
filename="test_workbook.xlsx",
|
||||
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
base64Encoded=False,
|
||||
prompt=None,
|
||||
enableAI=False
|
||||
)
|
||||
|
||||
logger.info(f"✓ Test workbook processing completed successfully!")
|
||||
logger.info(f"Generated {len(result.contents)} content items:")
|
||||
|
||||
for i, content_item in enumerate(result.contents):
|
||||
logger.info(f" Item {i+1}: {content_item.label}")
|
||||
logger.info(f" MIME type: {content_item.metadata.mimeType}")
|
||||
logger.info(f" Size: {content_item.metadata.size} bytes")
|
||||
if content_item.data:
|
||||
logger.info(f" Data preview: {content_item.data[:200]}...")
|
||||
else:
|
||||
logger.info(f" Data: None")
|
||||
|
||||
except ImportError as e:
|
||||
logger.error(f"openpyxl not available: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error testing Excel functionality: {e}")
|
||||
|
||||
logger.info("Excel processing test completed!")
|
||||
|
||||
except ImportError as e:
|
||||
logger.error(f"Failed to import required modules: {e}")
|
||||
logger.error("Make sure you're running this script from the gateway directory")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_excel_processing())
|
||||
Loading…
Reference in a new issue