stats table included
This commit is contained in:
parent
86fe43e987
commit
d1aac4099d
30 changed files with 455 additions and 5169 deletions
|
|
@ -1,114 +0,0 @@
|
|||
# Document Extraction Test
|
||||
|
||||
This test procedure validates the DocumentManager's ability to extract content from files using AI-powered analysis.
|
||||
|
||||
## Files Created
|
||||
|
||||
- `test_document_extraction.py` - Main test script
|
||||
- `test_sample_document.txt` - Sample document for testing
|
||||
- `run_document_test.ps1` - PowerShell wrapper script
|
||||
- `test_document_extraction.log` - Generated log file (cleared on each run)
|
||||
|
||||
## Usage
|
||||
|
||||
### Method 1: Using PowerShell Script (Recommended)
|
||||
|
||||
```powershell
|
||||
# Test with default sample file
|
||||
.\run_document_test.ps1
|
||||
|
||||
# Test with custom file
|
||||
.\run_document_test.ps1 "path\to\your\document.pdf"
|
||||
```
|
||||
|
||||
### Method 2: Direct Python Execution
|
||||
|
||||
```bash
|
||||
# Test with default sample file
|
||||
python test_document_extraction.py test_sample_document.txt
|
||||
|
||||
# Test with custom file
|
||||
python test_document_extraction.py "path/to/your/document.docx"
|
||||
```
|
||||
|
||||
## Test Features
|
||||
|
||||
1. **File Validation**: Checks if the specified file exists
|
||||
2. **MIME Type Detection**: Automatically detects file type based on extension
|
||||
3. **Content Extraction**: Uses the DocumentManager to extract content
|
||||
4. **AI Processing**: Applies the prompt "summarize the content and give list of the major topics"
|
||||
5. **Comprehensive Logging**: Logs all steps and results to `test_document_extraction.log`
|
||||
6. **Log Cleanup**: Clears the log file on each test run
|
||||
|
||||
## Supported File Types
|
||||
|
||||
- Text files (.txt, .md)
|
||||
- CSV files (.csv)
|
||||
- JSON files (.json)
|
||||
- XML files (.xml)
|
||||
- HTML files (.html, .htm)
|
||||
- Images (.jpg, .jpeg, .png, .gif, .svg)
|
||||
- PDF files (.pdf)
|
||||
- Office documents (.docx, .xlsx, .pptx)
|
||||
- And more (fallback to binary processing)
|
||||
|
||||
## Test Output
|
||||
|
||||
The test generates detailed logs including:
|
||||
|
||||
- File information (path, size, MIME type)
|
||||
- Extraction process details
|
||||
- Extracted content summary
|
||||
- AI-processed results
|
||||
- Error details if any issues occur
|
||||
|
||||
## Example Output
|
||||
|
||||
```
|
||||
=== STARTING DOCUMENT EXTRACTION TEST ===
|
||||
File information: {
|
||||
"file_path": "test_sample_document.txt",
|
||||
"filename": "test_sample_document.txt",
|
||||
"mime_type": "text/plain",
|
||||
"file_size_bytes": 2048,
|
||||
"file_size_mb": 0.0
|
||||
}
|
||||
Document extraction completed successfully: {
|
||||
"extracted_content_id": "test-doc-1234567890",
|
||||
"content_items_count": 1,
|
||||
"object_type": "ExtractedContent"
|
||||
}
|
||||
COMPLETE EXTRACTED CONTENT: {
|
||||
"total_length": 1500,
|
||||
"content": "PowerOn System Architecture Overview... [AI processed summary]"
|
||||
}
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
The test includes comprehensive error handling for:
|
||||
|
||||
- File not found errors
|
||||
- File reading errors
|
||||
- Document processing errors
|
||||
- AI processing errors
|
||||
- Import errors
|
||||
|
||||
All errors are logged with detailed information for debugging.
|
||||
|
||||
## Configuration
|
||||
|
||||
The test uses the same configuration as other tests:
|
||||
|
||||
- Environment variable: `POWERON_CONFIG_FILE = 'test_config.ini'`
|
||||
- Log file: `test_document_extraction.log`
|
||||
- Log level: DEBUG
|
||||
|
||||
## Dependencies
|
||||
|
||||
The test requires the same dependencies as the main PowerOn system:
|
||||
|
||||
- Python 3.8+
|
||||
- Required Python packages (see requirements.txt)
|
||||
- Access to AI services (if AI processing is enabled)
|
||||
- Proper configuration in test_config.ini
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,537 +0,0 @@
|
|||
"""
|
||||
Documentation agent for generating structured documentation.
|
||||
Provides comprehensive documentation generation capabilities.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional
|
||||
import json
|
||||
import re
|
||||
from datetime import datetime
|
||||
import os
|
||||
import hashlib
|
||||
import base64
|
||||
import uuid
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
import traceback
|
||||
import sys
|
||||
import importlib.util
|
||||
import inspect
|
||||
from pydantic import BaseModel
|
||||
|
||||
from modules.workflow.agentBase import AgentBase
|
||||
from modules.interfaces.serviceChatModel import ChatContent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AgentDocumentation(AgentBase):
|
||||
"""AI-driven agent for creating documentation and structured content using multi-step generation"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the documentation agent"""
|
||||
super().__init__()
|
||||
self.name = "documentation"
|
||||
self.label = "Documentation"
|
||||
self.description = "Creates structured documentation, reports, and content using AI with multi-step generation"
|
||||
self.capabilities = [
|
||||
"report_generation",
|
||||
"documentation",
|
||||
"content_structuring",
|
||||
"technical_writing",
|
||||
"knowledge_organization"
|
||||
]
|
||||
|
||||
def setDependencies(self, serviceBase=None):
|
||||
"""Set external dependencies for the agent."""
|
||||
self.setService(serviceBase)
|
||||
|
||||
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Process a task by focusing on required outputs and using AI to generate them.
|
||||
|
||||
Args:
|
||||
task: Task dictionary with prompt, inputDocuments, outputSpecifications
|
||||
|
||||
Returns:
|
||||
Dictionary with feedback and documents
|
||||
"""
|
||||
try:
|
||||
# Extract task information
|
||||
prompt = task.get("prompt", "")
|
||||
inputDocuments = task.get("inputDocuments", [])
|
||||
outputSpecs = task.get("outputSpecifications", [])
|
||||
|
||||
# Check AI service
|
||||
if not self.service or not self.service.base:
|
||||
return {
|
||||
"feedback": "The Documentation agent requires an AI service to function.",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
# Extract context from input documents - focusing only on dataExtracted
|
||||
documentContext = self._extractDocumentContext(inputDocuments)
|
||||
|
||||
# Create task analysis to understand the requirements
|
||||
documentationPlan = await self._analyzeTask(prompt, documentContext, outputSpecs)
|
||||
logger.debug(f"Documentation plan: {documentationPlan}")
|
||||
|
||||
# Generate all required output documents
|
||||
documents = []
|
||||
|
||||
# If no output specs provided, create default document
|
||||
if not outputSpecs:
|
||||
defaultFormat = documentationPlan.get("recommendedFormat", "markdown")
|
||||
defaultTitle = documentationPlan.get("title", "Documentation")
|
||||
safeTitle = self._sanitizeFilename(defaultTitle)
|
||||
|
||||
outputSpecs = [
|
||||
{"label": f"{safeTitle}.{defaultFormat}", "description": "Comprehensive documentation"}
|
||||
]
|
||||
|
||||
# Process each output specification
|
||||
for spec in outputSpecs:
|
||||
outputLabel = spec.get("label", "")
|
||||
outputDescription = spec.get("description", "")
|
||||
|
||||
# Generate the document using multi-step approach
|
||||
document = await self._createDocumentMultiStep(
|
||||
prompt,
|
||||
documentContext,
|
||||
outputLabel,
|
||||
outputDescription,
|
||||
documentationPlan
|
||||
)
|
||||
|
||||
documents.append(document)
|
||||
|
||||
# Generate feedback
|
||||
feedback = documentationPlan.get("feedback", f"Created {len(documents)} documents based on your requirements.")
|
||||
|
||||
return {
|
||||
"feedback": feedback,
|
||||
"documents": documents
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in documentation generation: {str(e)}", exc_info=True)
|
||||
return {
|
||||
"feedback": f"Error during documentation generation: {str(e)}",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
def _extractDocumentContext(self, documents: List[Dict[str, Any]]) -> str:
|
||||
"""
|
||||
Extract context from input documents, focusing on dataExtracted.
|
||||
|
||||
Args:
|
||||
documents: List of document objects
|
||||
|
||||
Returns:
|
||||
Extracted context as text
|
||||
"""
|
||||
contextParts = []
|
||||
|
||||
for doc in documents:
|
||||
docName = doc.get("name", "unnamed")
|
||||
if doc.get("ext"):
|
||||
docName = f"{docName}.{doc.get('ext')}"
|
||||
|
||||
contextParts.append(f"\n\n--- {docName} ---\n")
|
||||
|
||||
# Process contents for dataExtracted
|
||||
for content in doc.get("contents", []):
|
||||
if content.get("dataExtracted"):
|
||||
contextParts.append(content.get("dataExtracted", ""))
|
||||
|
||||
return "\n".join(contextParts)
|
||||
|
||||
def _sanitizeFilename(self, filename: str) -> str:
|
||||
"""
|
||||
Sanitize a filename by removing invalid characters.
|
||||
|
||||
Args:
|
||||
filename: Filename to sanitize
|
||||
|
||||
Returns:
|
||||
Sanitized filename
|
||||
"""
|
||||
# Replace invalid characters with underscores
|
||||
invalidChars = r'<>:"/\|?*'
|
||||
for char in invalidChars:
|
||||
filename = filename.replace(char, '_')
|
||||
|
||||
# Trim filename if too long
|
||||
if len(filename) > 100:
|
||||
filename = filename[:97] + "..."
|
||||
|
||||
return filename
|
||||
|
||||
async def _analyzeTask(self, prompt: str, context: str, outputSpecs: List) -> Dict:
|
||||
"""
|
||||
Use AI to analyze the task and create a documentation plan.
|
||||
|
||||
Args:
|
||||
prompt: The task prompt
|
||||
context: Document context
|
||||
outputSpecs: Output specifications
|
||||
|
||||
Returns:
|
||||
Documentation plan dictionary
|
||||
"""
|
||||
analysisPrompt = f"""
|
||||
Analyze this documentation task and create a detailed plan.
|
||||
|
||||
TASK: {prompt}
|
||||
|
||||
DOCUMENT CONTEXT SAMPLE:
|
||||
{context[:1000]}... (truncated)
|
||||
|
||||
OUTPUT REQUIREMENTS:
|
||||
{json.dumps(outputSpecs, indent=2)}
|
||||
|
||||
Create a detailed documentation plan in JSON format with the following structure:
|
||||
{{
|
||||
"title": "Document Title",
|
||||
"documentType": "report|manual|guide|whitepaper|etc",
|
||||
"audience": "technical|general|executive|etc",
|
||||
"detailedStructure": [
|
||||
{{
|
||||
"title": "Chapter/Section Title",
|
||||
"keyPoints": ["point1", "point2", ...],
|
||||
"subsections": ["subsection1", "subsection2", ...],
|
||||
"importance": "high|medium|low",
|
||||
"estimatedLength": "short|medium|long"
|
||||
}},
|
||||
... more sections ...
|
||||
],
|
||||
"keyTopics": ["topic1", "topic2", ...],
|
||||
"tone": "formal|conversational|instructional|etc",
|
||||
"recommendedFormat": "markdown|html|text|etc",
|
||||
"formattingRequirements": ["requirement1", "requirement2", ...],
|
||||
"executiveSummary": "Brief description of what the document will cover",
|
||||
"feedback": "Brief message explaining the documentation approach"
|
||||
}}
|
||||
|
||||
Only return valid JSON. No preamble or explanations.
|
||||
"""
|
||||
|
||||
try:
|
||||
response = await self.service.base.callAi([
|
||||
{"role": "system", "content": "You are a documentation expert. Respond with valid JSON only."},
|
||||
{"role": "user", "content": analysisPrompt}
|
||||
])
|
||||
|
||||
# Extract JSON from response
|
||||
jsonStart = response.find('{')
|
||||
jsonEnd = response.rfind('}') + 1
|
||||
|
||||
if jsonStart >= 0 and jsonEnd > jsonStart:
|
||||
plan = json.loads(response[jsonStart:jsonEnd])
|
||||
return plan
|
||||
else:
|
||||
# Fallback if JSON not found
|
||||
return {
|
||||
"title": "Documentation (DEFAULT)",
|
||||
"documentType": "report",
|
||||
"audience": "general",
|
||||
"detailedStructure": [
|
||||
{
|
||||
"title": "Introduction",
|
||||
"keyPoints": ["Purpose", "Scope"],
|
||||
"subsections": [],
|
||||
"importance": "high",
|
||||
"estimatedLength": "short"
|
||||
},
|
||||
{
|
||||
"title": "Main Content",
|
||||
"keyPoints": ["Core Information"],
|
||||
"subsections": ["Key Findings", "Analysis"],
|
||||
"importance": "high",
|
||||
"estimatedLength": "long"
|
||||
},
|
||||
{
|
||||
"title": "Conclusion",
|
||||
"keyPoints": ["Summary", "Next Steps"],
|
||||
"subsections": [],
|
||||
"importance": "medium",
|
||||
"estimatedLength": "short"
|
||||
}
|
||||
],
|
||||
"keyTopics": ["General Information"],
|
||||
"tone": "formal",
|
||||
"recommendedFormat": "markdown",
|
||||
"formattingRequirements": ["Clear headings", "Professional formatting"],
|
||||
"executiveSummary": "A comprehensive documentation covering the requested topics.",
|
||||
"feedback": "Created documentation based on your requirements."
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error creating documentation plan: {str(e)}")
|
||||
return {
|
||||
"title": "Documentation",
|
||||
"documentType": "report",
|
||||
"audience": "general",
|
||||
"detailedStructure": [
|
||||
{
|
||||
"title": "Introduction",
|
||||
"keyPoints": ["Purpose", "Scope"],
|
||||
"subsections": [],
|
||||
"importance": "high",
|
||||
"estimatedLength": "short"
|
||||
},
|
||||
{
|
||||
"title": "Main Content",
|
||||
"keyPoints": ["Core Information"],
|
||||
"subsections": ["Key Findings", "Analysis"],
|
||||
"importance": "high",
|
||||
"estimatedLength": "long"
|
||||
},
|
||||
{
|
||||
"title": "Conclusion",
|
||||
"keyPoints": ["Summary", "Next Steps"],
|
||||
"subsections": [],
|
||||
"importance": "medium",
|
||||
"estimatedLength": "short"
|
||||
}
|
||||
],
|
||||
"keyTopics": ["General Information"],
|
||||
"tone": "formal",
|
||||
"recommendedFormat": "markdown",
|
||||
"formattingRequirements": ["Clear headings", "Professional formatting"],
|
||||
"executiveSummary": "A comprehensive documentation covering the requested topics.",
|
||||
"feedback": "Created documentation based on your requirements."
|
||||
}
|
||||
|
||||
async def _createDocumentMultiStep(self, prompt: str, context: str, outputLabel: str,
|
||||
outputDescription: str, documentationPlan: Dict) -> ChatContent:
|
||||
"""
|
||||
Create a document using a multi-step approach with separate AI calls for each section.
|
||||
|
||||
Args:
|
||||
prompt: Original task prompt
|
||||
context: Document context
|
||||
outputLabel: Output filename
|
||||
outputDescription: Description of desired output
|
||||
documentationPlan: Documentation plan from AI
|
||||
|
||||
Returns:
|
||||
ChatContent object
|
||||
"""
|
||||
try:
|
||||
# Determine format from filename
|
||||
formatType = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "md"
|
||||
|
||||
# Map format to contentType
|
||||
contentTypeMap = {
|
||||
"md": "text/markdown",
|
||||
"markdown": "text/markdown",
|
||||
"html": "text/html",
|
||||
"txt": "text/plain",
|
||||
"text": "text/plain",
|
||||
"json": "application/json",
|
||||
"csv": "text/csv"
|
||||
}
|
||||
|
||||
contentType = contentTypeMap.get(formatType, "text/plain")
|
||||
|
||||
# Get document information
|
||||
title = documentationPlan.get("title", "Documentation")
|
||||
documentType = documentationPlan.get("documentType", "document")
|
||||
audience = documentationPlan.get("audience", "general")
|
||||
tone = documentationPlan.get("tone", "formal")
|
||||
keyTopics = documentationPlan.get("keyTopics", [])
|
||||
formattingRequirements = documentationPlan.get("formattingRequirements", [])
|
||||
|
||||
# Get the detailed structure
|
||||
detailedStructure = documentationPlan.get("detailedStructure", [])
|
||||
|
||||
# Step 1: Generate executive summary
|
||||
summaryPrompt = f"""
|
||||
Create an executive summary for a {documentType} titled "{title}".
|
||||
|
||||
DOCUMENT OVERVIEW:
|
||||
- Type: {documentType}
|
||||
- Audience: {audience}
|
||||
- Key Topics: {', '.join(keyTopics)}
|
||||
|
||||
TASK CONTEXT: {prompt}
|
||||
|
||||
The executive summary should:
|
||||
1. Provide a concise overview of the document's purpose
|
||||
2. Highlight key points and findings
|
||||
3. Be clear and engaging for the target audience
|
||||
4. Set expectations for the document's content
|
||||
|
||||
Keep the summary brief but comprehensive.
|
||||
"""
|
||||
|
||||
executiveSummary = await self.service.base.callAi([
|
||||
{"role": "system", "content": f"You are a documentation expert creating an executive summary in {formatType} format."},
|
||||
{"role": "user", "content": summaryPrompt}
|
||||
], produceUserAnswer = True)
|
||||
|
||||
# Step 2: Generate introduction
|
||||
introPrompt = f"""
|
||||
Create an introduction for a {documentType} titled "{title}".
|
||||
|
||||
DOCUMENT OVERVIEW:
|
||||
- Type: {documentType}
|
||||
- Audience: {audience}
|
||||
- Key Topics: {', '.join(keyTopics)}
|
||||
|
||||
TASK CONTEXT: {prompt}
|
||||
|
||||
The introduction should:
|
||||
1. Set the context and purpose of the document
|
||||
2. Outline the scope and objectives
|
||||
3. Preview the main topics to be covered
|
||||
4. Engage the reader's interest
|
||||
|
||||
Format the introduction according to {formatType} standards.
|
||||
"""
|
||||
|
||||
introduction = await self.service.base.callAi([
|
||||
{"role": "system", "content": f"You are a documentation expert creating an introduction in {formatType} format."},
|
||||
{"role": "user", "content": introPrompt}
|
||||
], produceUserAnswer = True)
|
||||
|
||||
# Step 3: Generate main sections
|
||||
sections = []
|
||||
for section in detailedStructure:
|
||||
sectionTitle = section.get("title", "Section")
|
||||
keyPoints = section.get("keyPoints", [])
|
||||
subsections = section.get("subsections", [])
|
||||
importance = section.get("importance", "medium")
|
||||
estimatedLength = section.get("estimatedLength", "medium")
|
||||
|
||||
sectionPrompt = f"""
|
||||
Create the {sectionTitle} section for a {documentType} titled "{title}".
|
||||
|
||||
SECTION DETAILS:
|
||||
- Title: {sectionTitle}
|
||||
- Key Points: {', '.join(keyPoints)}
|
||||
- Subsections: {', '.join(subsections)}
|
||||
- Importance: {importance}
|
||||
- Estimated Length: {estimatedLength}
|
||||
|
||||
DOCUMENT CONTEXT:
|
||||
- Type: {documentType}
|
||||
- Audience: {audience}
|
||||
- Key Topics: {', '.join(keyTopics)}
|
||||
|
||||
TASK CONTEXT: {prompt}
|
||||
|
||||
The section should:
|
||||
1. Cover all key points thoroughly
|
||||
2. Include relevant subsections
|
||||
3. Maintain appropriate depth based on importance
|
||||
4. Follow the document's tone and style
|
||||
|
||||
Format the section according to {formatType} standards.
|
||||
"""
|
||||
|
||||
sectionContent = await self.service.base.callAi([
|
||||
{"role": "system", "content": f"You are a documentation expert creating a section in {formatType} format."},
|
||||
{"role": "user", "content": sectionPrompt}
|
||||
], produceUserAnswer = True)
|
||||
|
||||
sections.append(sectionContent)
|
||||
|
||||
# Step 4: Generate conclusion
|
||||
conclusionPrompt = f"""
|
||||
Create the conclusion for a {documentType} titled "{title}".
|
||||
|
||||
DOCUMENT OVERVIEW:
|
||||
- Type: {documentType}
|
||||
- Audience: {audience}
|
||||
- Key Topics: {', '.join(keyTopics)}
|
||||
|
||||
TASK CONTEXT: {prompt}
|
||||
|
||||
This conclusion should:
|
||||
1. Summarize the key points covered in the document
|
||||
2. Provide closure to the topics discussed
|
||||
3. Include any relevant recommendations or next steps
|
||||
4. Leave the reader with a clear understanding of the document's significance
|
||||
|
||||
The conclusion should be professional and impactful, formatted according to {formatType} standards.
|
||||
"""
|
||||
|
||||
conclusion = await self.service.base.callAi([
|
||||
{"role": "system", "content": f"You are a documentation expert creating a conclusion in {formatType} format."},
|
||||
{"role": "user", "content": conclusionPrompt}
|
||||
], produceUserAnswer = True)
|
||||
|
||||
# Step 5: Assemble the complete document
|
||||
if formatType in ["md", "markdown"]:
|
||||
# Markdown format
|
||||
documentContent = f"# {title}\n\n"
|
||||
|
||||
if executiveSummary:
|
||||
documentContent += f"## Executive Summary\n\n{executiveSummary}\n\n"
|
||||
|
||||
documentContent += f"{introduction}\n\n"
|
||||
|
||||
for i, sectionContent in enumerate(sections):
|
||||
# Ensure section starts with heading if not already
|
||||
sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
|
||||
if not sectionContent.strip().startswith("#"):
|
||||
documentContent += f"## {sectionTitle}\n\n"
|
||||
documentContent += f"{sectionContent}\n\n"
|
||||
|
||||
documentContent += f"## Conclusion\n\n{conclusion}\n"
|
||||
|
||||
elif formatType == "html":
|
||||
# HTML format
|
||||
documentContent = f"<html>\n<head>\n<title>{title}</title>\n</head>\n<body>\n"
|
||||
documentContent += f"<h1>{title}</h1>\n\n"
|
||||
|
||||
if executiveSummary:
|
||||
documentContent += f"<h2>Executive Summary</h2>\n<div>{executiveSummary}</div>\n\n"
|
||||
|
||||
documentContent += f"<div>{introduction}</div>\n\n"
|
||||
|
||||
for i, sectionContent in enumerate(sections):
|
||||
sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
|
||||
documentContent += f"<h2>{sectionTitle}</h2>\n<div>{sectionContent}</div>\n\n"
|
||||
|
||||
documentContent += f"<h2>Conclusion</h2>\n<div>{conclusion}</div>\n"
|
||||
documentContent += "</body>\n</html>"
|
||||
|
||||
else:
|
||||
# Plain text format
|
||||
documentContent = f"{title}\n{'=' * len(title)}\n\n"
|
||||
|
||||
if executiveSummary:
|
||||
documentContent += f"EXECUTIVE SUMMARY\n{'-' * 17}\n\n{executiveSummary}\n\n"
|
||||
|
||||
documentContent += f"{introduction}\n\n"
|
||||
|
||||
for i, sectionContent in enumerate(sections):
|
||||
sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
|
||||
documentContent += f"{sectionTitle}\n{'-' * len(sectionTitle)}\n\n{sectionContent}\n\n"
|
||||
|
||||
documentContent += f"CONCLUSION\n{'-' * 10}\n\n{conclusion}\n"
|
||||
|
||||
# Create document object
|
||||
return self.formatAgentDocumentOutput(outputLabel, documentContent, contentType)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating document: {str(e)}", exc_info=True)
|
||||
|
||||
# Create a simple error document
|
||||
if formatType in ["md", "markdown"]:
|
||||
content = f"# Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"
|
||||
elif formatType == "html":
|
||||
content = f"<html><body><h1>Error in Documentation</h1><p>There was an error generating the documentation: {str(e)}</p></body></html>"
|
||||
else:
|
||||
content = f"Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"
|
||||
|
||||
return self.formatAgentDocumentOutput(outputLabel, content, contentType)
|
||||
|
||||
|
||||
# Factory function for the Documentation agent
|
||||
def getAgentDocumentation():
|
||||
"""Returns an instance of the Documentation agent."""
|
||||
return AgentDocumentation()
|
||||
|
|
@ -1,380 +0,0 @@
|
|||
"""
|
||||
Email Agent Module.
|
||||
Handles email-related tasks using Microsoft Graph API.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
import uuid
|
||||
import os
|
||||
|
||||
from modules.workflow.agentBase import AgentBase
|
||||
from modules.interfaces.serviceChatModel import Task, ChatDocument, ChatContent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AgentEmail(AgentBase):
|
||||
"""Agent for handling email-related tasks."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the email agent."""
|
||||
super().__init__()
|
||||
self.name = "email"
|
||||
self.label = "Email Agent"
|
||||
self.description = "Handles email composition and sending using Microsoft Graph API"
|
||||
self.capabilities = [
|
||||
"email_composition",
|
||||
"email_draft_creation",
|
||||
"email_template_generation"
|
||||
]
|
||||
self.serviceBase = None
|
||||
|
||||
def setDependencies(self, serviceBase=None):
|
||||
"""Set external dependencies for the agent."""
|
||||
self.serviceBase = serviceBase
|
||||
|
||||
async def processTask(self, task: Task) -> Dict[str, Any]:
|
||||
"""
|
||||
Process an email-related task.
|
||||
|
||||
Args:
|
||||
task: Task object containing:
|
||||
- prompt: Instructions for the agent
|
||||
- inputDocuments: List of documents to process
|
||||
- outputSpecifications: List of required output documents
|
||||
- context: Additional context including workflow info
|
||||
|
||||
Returns:
|
||||
Dictionary containing:
|
||||
- feedback: Text response explaining what was done
|
||||
- documents: List of created documents
|
||||
"""
|
||||
try:
|
||||
# Extract task information
|
||||
prompt = task.prompt
|
||||
inputDocuments = task.filesInput
|
||||
outputSpecs = task.filesOutput
|
||||
|
||||
# Check AI service
|
||||
if not self.service.base:
|
||||
return {
|
||||
"feedback": "The Email agent requires an AI service to function.",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
# Check if Microsoft connector is available
|
||||
if not hasattr(self.service, 'msft'):
|
||||
return {
|
||||
"feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
# Get Microsoft token
|
||||
token_data = self.service.msft.getMsftToken()
|
||||
if not token_data:
|
||||
# Create authentication trigger document
|
||||
auth_doc = self._createFrontendAuthTriggerDocument()
|
||||
return {
|
||||
"feedback": "Microsoft authentication required. Please authenticate to continue.",
|
||||
"documents": [auth_doc]
|
||||
}
|
||||
|
||||
# Extract document data from input
|
||||
documentContents, attachments = self._processInputDocuments(inputDocuments)
|
||||
|
||||
# Generate email subject and body using AI
|
||||
emailTemplate = await self._generateEmailTemplate(prompt, documentContents)
|
||||
|
||||
# Create HTML preview of the email
|
||||
htmlPreview = self._createHtmlPreview(emailTemplate)
|
||||
|
||||
# Attempt to create a draft email using Microsoft Graph API
|
||||
draft_result = self.service.msft.createDraftEmail(
|
||||
emailTemplate["recipient"],
|
||||
emailTemplate["subject"],
|
||||
emailTemplate["htmlBody"],
|
||||
attachments
|
||||
)
|
||||
|
||||
# Prepare output documents
|
||||
documents = []
|
||||
|
||||
# Process output specifications
|
||||
for spec in outputSpecs:
|
||||
label = spec.get("label", "")
|
||||
description = spec.get("description", "")
|
||||
|
||||
if label.endswith(".html"):
|
||||
# Create the HTML template file
|
||||
templateDoc = self.formatAgentDocumentOutput(
|
||||
label,
|
||||
emailTemplate["htmlBody"], # Use the actual HTML body, not the preview
|
||||
"text/html"
|
||||
)
|
||||
documents.append(templateDoc)
|
||||
elif label.endswith(".json"):
|
||||
# Create JSON template if requested
|
||||
templateJson = json.dumps(emailTemplate, indent=2)
|
||||
templateDoc = self.formatAgentDocumentOutput(
|
||||
label,
|
||||
templateJson,
|
||||
"application/json"
|
||||
)
|
||||
documents.append(templateDoc)
|
||||
else:
|
||||
# Default to preview for other cases
|
||||
previewDoc = self.formatAgentDocumentOutput(
|
||||
label,
|
||||
htmlPreview,
|
||||
"text/html"
|
||||
)
|
||||
documents.append(previewDoc)
|
||||
|
||||
# Prepare feedback message
|
||||
if draft_result:
|
||||
feedback = f"Email draft created successfully for {emailTemplate.get('recipient')}. The subject is: '{emailTemplate['subject']}'"
|
||||
if attachments:
|
||||
feedback += f" with {len(attachments)} attachment(s)"
|
||||
feedback += ". You can open and edit it in your Outlook draft folder."
|
||||
else:
|
||||
feedback = "Email template created but could not save as draft. HTML preview and template are available as documents."
|
||||
|
||||
return {
|
||||
"feedback": feedback,
|
||||
"documents": documents
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in email agent: {str(e)}")
|
||||
return {
|
||||
"feedback": f"Error processing email task: {str(e)}",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
def _createFrontendAuthTriggerDocument(self) -> ChatDocument:
|
||||
"""Create a document that triggers Microsoft authentication in the frontend."""
|
||||
return ChatDocument(
|
||||
id=str(uuid.uuid4()),
|
||||
name="microsoft_auth",
|
||||
ext="html",
|
||||
data="""
|
||||
<div>
|
||||
<h2>Microsoft Authentication Required</h2>
|
||||
<p>Please click the button below to authenticate with Microsoft:</p>
|
||||
<button onclick="window.location.href='/api/auth/microsoft'">Authenticate with Microsoft</button>
|
||||
</div>
|
||||
""",
|
||||
contents=[
|
||||
ChatContent(
|
||||
name="main",
|
||||
data="""
|
||||
<div>
|
||||
<h2>Microsoft Authentication Required</h2>
|
||||
<p>Please click the button below to authenticate with Microsoft:</p>
|
||||
<button onclick="window.location.href='/api/auth/microsoft'">Authenticate with Microsoft</button>
|
||||
</div>
|
||||
""",
|
||||
summary="Microsoft authentication trigger page",
|
||||
metadata={
|
||||
"contentType": "text/html",
|
||||
"isText": True
|
||||
}
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
def _processInputDocuments(self, input_docs: List[ChatDocument]) -> Tuple[str, List[Dict[str, Any]]]:
|
||||
"""
|
||||
Process input documents to extract content and prepare attachments.
|
||||
|
||||
Args:
|
||||
input_docs: List of input documents
|
||||
|
||||
Returns:
|
||||
Tuple of (document content text, list of attachments)
|
||||
"""
|
||||
documentContents = []
|
||||
attachments = []
|
||||
|
||||
for doc in input_docs:
|
||||
docName = doc.name
|
||||
if doc.ext:
|
||||
docName = f"{docName}.{doc.ext}"
|
||||
|
||||
# Add document name to contents
|
||||
documentContents.append(f"\n\n--- {docName} ---\n")
|
||||
|
||||
# Process document data directly
|
||||
if doc.data:
|
||||
# Add to attachments with proper metadata
|
||||
attachments.append({
|
||||
"name": docName,
|
||||
"document": {
|
||||
"data": doc.data,
|
||||
"mimeType": doc.contents[0].metadata.get("contentType", "application/octet-stream") if doc.contents else "application/octet-stream",
|
||||
"base64Encoded": doc.contents[0].metadata.get("base64Encoded", False) if doc.contents else False
|
||||
}
|
||||
})
|
||||
documentContents.append(f"Document attached: {docName}")
|
||||
else:
|
||||
documentContents.append(f"Document referenced: {docName}")
|
||||
|
||||
return "\n".join(documentContents), attachments
|
||||
|
||||
def formatAgentDocumentOutput(self, filename: str, content: str, contentType: str) -> ChatDocument:
|
||||
"""
|
||||
Format a document for agent output.
|
||||
|
||||
Args:
|
||||
filename: Output filename
|
||||
content: Document content
|
||||
contentType: MIME type of the content
|
||||
|
||||
Returns:
|
||||
ChatDocument object
|
||||
"""
|
||||
# Split filename into name and extension
|
||||
name, ext = os.path.splitext(filename)
|
||||
if ext.startswith('.'):
|
||||
ext = ext[1:]
|
||||
|
||||
# Create document object
|
||||
return ChatDocument(
|
||||
id=str(uuid.uuid4()),
|
||||
name=name,
|
||||
ext=ext,
|
||||
data=content,
|
||||
contents=[
|
||||
ChatContent(
|
||||
name="main",
|
||||
data=content,
|
||||
summary=f"Generated {filename}",
|
||||
metadata={"contentType": contentType}
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
async def _generateEmailTemplate(self, prompt: str, documentContents: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate email template using AI.
|
||||
|
||||
Args:
|
||||
prompt: The task prompt
|
||||
documentContents: Extracted document content
|
||||
|
||||
Returns:
|
||||
Email template dictionary with recipient, subject, body
|
||||
"""
|
||||
emailPrompt = f"""
|
||||
Create an email based on the following request:
|
||||
|
||||
REQUEST: {prompt}
|
||||
|
||||
DOCUMENT CONTENTS:
|
||||
{documentContents[:2000]}... (truncated if longer)
|
||||
|
||||
Generate an email template with:
|
||||
1. A relevant recipient (use placeholder or derive from content if possible)
|
||||
2. A concise but descriptive subject line
|
||||
3. A professional HTML-formatted email body
|
||||
4. Appropriate greeting and closing
|
||||
|
||||
Format your response as JSON with these fields:
|
||||
- recipient: email address
|
||||
- subject: subject line
|
||||
- plainBody: plain text version
|
||||
- htmlBody: HTML formatted version
|
||||
|
||||
Only return valid JSON. No preamble or explanations.
|
||||
"""
|
||||
|
||||
try:
|
||||
response = await self.service.base.callAi([
|
||||
{"role": "system", "content": "You are an email template specialist. Create professional emails. Respond with valid JSON only."},
|
||||
{"role": "user", "content": emailPrompt}
|
||||
])
|
||||
|
||||
# Extract JSON from response
|
||||
jsonStart = response.find('{')
|
||||
jsonEnd = response.rfind('}') + 1
|
||||
|
||||
if jsonStart >= 0 and jsonEnd > jsonStart:
|
||||
template = json.loads(response[jsonStart:jsonEnd])
|
||||
return template
|
||||
else:
|
||||
# Fallback plan
|
||||
logger.warning(f"Not able creating email template, generating fallback plan")
|
||||
return {
|
||||
"recipient": "recipient@example.com",
|
||||
"subject": "Information Regarding Your Request",
|
||||
"plainBody": f"This email is regarding your request: {prompt}",
|
||||
"htmlBody": f"<html><body><p>This email is regarding your request: {prompt}</p></body></html>"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error generating email template: {str(e)}")
|
||||
return {
|
||||
"recipient": "recipient@example.com",
|
||||
"subject": "Information Regarding Your Request",
|
||||
"plainBody": f"This email is regarding your request: {prompt}",
|
||||
"htmlBody": f"<html><body><p>This email is regarding your request: {prompt}</p></body></html>"
|
||||
}
|
||||
|
||||
def _createHtmlPreview(self, emailTemplate: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Create an HTML preview of the email template.
|
||||
|
||||
Args:
|
||||
emailTemplate: Email template dictionary
|
||||
|
||||
Returns:
|
||||
HTML string for preview
|
||||
"""
|
||||
html = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Email Preview: {emailTemplate.get('subject', 'Email Template')}</title>
|
||||
<style>
|
||||
body {{ font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5; }}
|
||||
.email-container {{ max-width: 600px; margin: 20px auto; background-color: white; border: 1px solid #ddd; border-radius: 5px; overflow: hidden; }}
|
||||
.email-header {{ background-color: #f0f0f0; padding: 15px; border-bottom: 1px solid #ddd; }}
|
||||
.email-content {{ padding: 20px; }}
|
||||
.email-footer {{ background-color: #f0f0f0; padding: 15px; border-top: 1px solid #ddd; font-size: 12px; color: #666; }}
|
||||
.field {{ margin-bottom: 10px; }}
|
||||
.field-label {{ font-weight: bold; color: #555; }}
|
||||
.email-body {{ margin-top: 20px; padding-top: 20px; border-top: 1px solid #eee; }}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="email-container">
|
||||
<div class="email-header">
|
||||
<h2>Email Template Preview</h2>
|
||||
</div>
|
||||
<div class="email-content">
|
||||
<div class="field">
|
||||
<div class="field-label">To:</div>
|
||||
<div>{emailTemplate.get('recipient', 'recipient@example.com')}</div>
|
||||
</div>
|
||||
<div class="field">
|
||||
<div class="field-label">Subject:</div>
|
||||
<div>{emailTemplate.get('subject', 'No Subject')}</div>
|
||||
</div>
|
||||
<div class="email-body">
|
||||
{emailTemplate.get('htmlBody', '<p>No content</p>')}
|
||||
</div>
|
||||
</div>
|
||||
<div class="email-footer">
|
||||
<p>This is a preview of the email template. The actual email may appear differently in various email clients.</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
return html
|
||||
|
||||
def getAgentEmail() -> AgentEmail:
|
||||
"""Factory function to create and return an EmailAgent instance."""
|
||||
return AgentEmail()
|
||||
|
|
@ -1,348 +0,0 @@
|
|||
"""
|
||||
SharePoint Agent Module.
|
||||
Handles SharePoint document search and data extraction using Microsoft Graph API.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional
|
||||
from modules.workflow.agentBase import AgentBase
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AgentSharepoint(AgentBase):
|
||||
"""Agent for handling SharePoint document operations."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the SharePoint agent."""
|
||||
super().__init__()
|
||||
self.name = "sharepoint"
|
||||
self.label = "SharePoint Agent"
|
||||
self.description = "Searches and extracts data from SharePoint documents using Microsoft Graph API"
|
||||
self.capabilities = [
|
||||
"document_search",
|
||||
"content_extraction",
|
||||
"metadata_analysis",
|
||||
"document_processing"
|
||||
]
|
||||
|
||||
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Process a SharePoint-related task.
|
||||
|
||||
Args:
|
||||
task: Task object containing:
|
||||
- prompt: Instructions for the agent
|
||||
- inputDocuments: List of documents to process
|
||||
- outputSpecifications: List of required output documents
|
||||
- context: Additional context including workflow info
|
||||
|
||||
Returns:
|
||||
Dictionary containing:
|
||||
- feedback: Text response explaining what was done
|
||||
- documents: List of created documents
|
||||
"""
|
||||
try:
|
||||
# Extract task information
|
||||
prompt = task.get("prompt", "")
|
||||
inputDocuments = task.get("inputDocuments", [])
|
||||
outputSpecs = task.get("outputSpecifications", [])
|
||||
|
||||
# Check AI service
|
||||
if not self.service.base:
|
||||
return {
|
||||
"feedback": "The SharePoint agent requires an AI service to function.",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
# Check if Microsoft connector is available
|
||||
if not hasattr(self.service, 'msft'):
|
||||
return {
|
||||
"feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
# Get Microsoft token
|
||||
token_data = self.service.msft.getMsftToken()
|
||||
if not token_data:
|
||||
# Create authentication trigger document
|
||||
auth_doc = self._createFrontendAuthTriggerDocument()
|
||||
return {
|
||||
"feedback": "Microsoft authentication required. Please authenticate to continue.",
|
||||
"documents": [auth_doc]
|
||||
}
|
||||
|
||||
# Parse the search query from the prompt
|
||||
searchQuery = await self._parseSearchQuery(prompt)
|
||||
|
||||
# Search SharePoint documents
|
||||
searchResults = await self._searchSharePointDocuments(searchQuery)
|
||||
|
||||
# Process search results
|
||||
documents = []
|
||||
for spec in outputSpecs:
|
||||
label = spec.get("label", "")
|
||||
description = spec.get("description", "")
|
||||
|
||||
if label.endswith(".json"):
|
||||
# Create JSON summary of search results
|
||||
summaryDoc = self._createSearchSummaryJson(searchResults, description)
|
||||
documents.append(summaryDoc)
|
||||
elif label.endswith(".csv"):
|
||||
# Create CSV summary of search results
|
||||
summaryDoc = self._createSearchSummaryCsv(searchResults, description)
|
||||
documents.append(summaryDoc)
|
||||
else:
|
||||
# Create text summary of search results
|
||||
summaryDoc = self._createSearchSummaryText(searchResults, description)
|
||||
documents.append(summaryDoc)
|
||||
|
||||
# Prepare feedback message
|
||||
feedback = f"Found {len(searchResults)} documents matching your search criteria. "
|
||||
if searchResults:
|
||||
feedback += "The results have been saved as documents."
|
||||
else:
|
||||
feedback += "No matching documents were found."
|
||||
|
||||
return {
|
||||
"feedback": feedback,
|
||||
"documents": documents
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in SharePoint agent: {str(e)}")
|
||||
return {
|
||||
"feedback": f"Error processing SharePoint task: {str(e)}",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
def _createFrontendAuthTriggerDocument(self) -> Dict[str, Any]:
|
||||
"""Create a document that triggers Microsoft authentication in the frontend."""
|
||||
return self.formatAgentDocumentOutput(
|
||||
"microsoft_auth.html",
|
||||
"""
|
||||
<div>
|
||||
<h2>Microsoft Authentication Required</h2>
|
||||
<p>Please click the button below to authenticate with Microsoft:</p>
|
||||
<button onclick="window.location.href='/api/auth/microsoft'">Authenticate with Microsoft</button>
|
||||
</div>
|
||||
""",
|
||||
"text/html"
|
||||
)
|
||||
|
||||
async def _parseSearchQuery(self, prompt: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse the search query from the prompt using AI.
|
||||
|
||||
Args:
|
||||
prompt: The task prompt
|
||||
|
||||
Returns:
|
||||
Dictionary containing search parameters
|
||||
"""
|
||||
try:
|
||||
# Use AI to parse the search query
|
||||
response = await self.service.base.callAi([
|
||||
{"role": "system", "content": "You are a SharePoint search query parser. Extract search parameters from the user's request."},
|
||||
{"role": "user", "content": f"""
|
||||
Parse the following SharePoint search request into structured parameters:
|
||||
|
||||
{prompt}
|
||||
|
||||
Return a JSON object with these fields:
|
||||
- query: The main search query
|
||||
- site: Optional SharePoint site name
|
||||
- folder: Optional folder path
|
||||
- fileTypes: List of file types to search for
|
||||
- dateRange: Optional date range for filtering
|
||||
- maxResults: Maximum number of results to return
|
||||
|
||||
Only return valid JSON. No preamble or explanations.
|
||||
"""}
|
||||
])
|
||||
|
||||
# Extract JSON from response
|
||||
jsonStart = response.find('{')
|
||||
jsonEnd = response.rfind('}') + 1
|
||||
|
||||
if jsonStart >= 0 and jsonEnd > jsonStart:
|
||||
return json.loads(response[jsonStart:jsonEnd])
|
||||
else:
|
||||
# Fallback to simple query
|
||||
return {
|
||||
"query": prompt,
|
||||
"maxResults": 10
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error parsing search query: {str(e)}")
|
||||
return {
|
||||
"query": prompt,
|
||||
"maxResults": 10
|
||||
}
|
||||
|
||||
async def _searchSharePointDocuments(self, searchParams: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search SharePoint documents using Microsoft Graph API.
|
||||
|
||||
Args:
|
||||
searchParams: Search parameters
|
||||
|
||||
Returns:
|
||||
List of search results
|
||||
"""
|
||||
try:
|
||||
# Get Microsoft token
|
||||
token = self.service.msft.getMsftToken()
|
||||
if not token:
|
||||
return []
|
||||
|
||||
# Prepare search query
|
||||
query = searchParams.get("query", "")
|
||||
site = searchParams.get("site", "")
|
||||
folder = searchParams.get("folder", "")
|
||||
fileTypes = searchParams.get("fileTypes", [])
|
||||
maxResults = searchParams.get("maxResults", 10)
|
||||
|
||||
# Build search URL
|
||||
searchUrl = "https://graph.microsoft.com/v1.0/sites/root/drives"
|
||||
if site:
|
||||
searchUrl = f"https://graph.microsoft.com/v1.0/sites/{site}/drives"
|
||||
|
||||
# Get drives (document libraries)
|
||||
response = self.service.msft.makeGraphRequest("GET", searchUrl)
|
||||
if not response or "value" not in response:
|
||||
return []
|
||||
|
||||
results = []
|
||||
for drive in response["value"]:
|
||||
# Search in each drive
|
||||
driveId = drive["id"]
|
||||
searchEndpoint = f"https://graph.microsoft.com/v1.0/drives/{driveId}/root/search(q='{query}')"
|
||||
|
||||
# Add file type filters if specified
|
||||
if fileTypes:
|
||||
typeFilter = " or ".join([f"fileType eq '{ft}'" for ft in fileTypes])
|
||||
searchEndpoint += f"&filter={typeFilter}"
|
||||
|
||||
# Add folder filter if specified
|
||||
if folder:
|
||||
searchEndpoint += f"&filter=parentReference/path eq '/{folder}'"
|
||||
|
||||
# Add result limit
|
||||
searchEndpoint += f"&top={maxResults}"
|
||||
|
||||
# Make the search request
|
||||
searchResponse = self.service.msft.makeGraphRequest("GET", searchEndpoint)
|
||||
if searchResponse and "value" in searchResponse:
|
||||
for item in searchResponse["value"]:
|
||||
# Get file content
|
||||
fileContent = await self._getFileContent(driveId, item["id"])
|
||||
|
||||
results.append({
|
||||
"name": item["name"],
|
||||
"id": item["id"],
|
||||
"driveId": driveId,
|
||||
"webUrl": item["webUrl"],
|
||||
"lastModified": item["lastModifiedDateTime"],
|
||||
"size": item["size"],
|
||||
"content": fileContent
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error searching SharePoint: {str(e)}")
|
||||
return []
|
||||
|
||||
async def _getFileContent(self, driveId: str, fileId: str) -> str:
|
||||
"""
|
||||
Get file content from SharePoint.
|
||||
|
||||
Args:
|
||||
driveId: Drive ID
|
||||
fileId: File ID
|
||||
|
||||
Returns:
|
||||
File content as string
|
||||
"""
|
||||
try:
|
||||
# Get file content URL
|
||||
contentUrl = f"https://graph.microsoft.com/v1.0/drives/{driveId}/items/{fileId}/content"
|
||||
|
||||
# Download file content
|
||||
response = self.service.msft.makeGraphRequest("GET", contentUrl, raw=True)
|
||||
if response:
|
||||
return response.decode('utf-8')
|
||||
return ""
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting file content: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _createSearchSummaryJson(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
|
||||
"""Create a JSON summary of search results."""
|
||||
summary = {
|
||||
"description": description,
|
||||
"totalResults": len(results),
|
||||
"results": []
|
||||
}
|
||||
|
||||
for result in results:
|
||||
summary["results"].append({
|
||||
"name": result["name"],
|
||||
"url": result["webUrl"],
|
||||
"lastModified": result["lastModified"],
|
||||
"size": result["size"]
|
||||
})
|
||||
|
||||
return self.formatAgentDocumentOutput(
|
||||
"sharepoint_search_results.json",
|
||||
json.dumps(summary, indent=2),
|
||||
"application/json"
|
||||
)
|
||||
|
||||
def _createSearchSummaryCsv(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
|
||||
"""Create a CSV summary of search results."""
|
||||
csvLines = ["Name,URL,Last Modified,Size (bytes)"]
|
||||
|
||||
for result in results:
|
||||
name = result["name"].replace('"', '""')
|
||||
url = result["webUrl"].replace('"', '""')
|
||||
lastModified = result["lastModified"].replace('"', '""')
|
||||
size = str(result["size"])
|
||||
|
||||
csvLines.append(f'"{name}","{url}","{lastModified}",{size}')
|
||||
|
||||
return self.formatAgentDocumentOutput(
|
||||
"sharepoint_search_results.csv",
|
||||
"\n".join(csvLines),
|
||||
"text/csv"
|
||||
)
|
||||
|
||||
def _createSearchSummaryText(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
|
||||
"""Create a text summary of search results."""
|
||||
textLines = [
|
||||
f"SharePoint Search Results",
|
||||
f"Description: {description}",
|
||||
f"Total Results: {len(results)}",
|
||||
"\nResults:"
|
||||
]
|
||||
|
||||
for result in results:
|
||||
textLines.extend([
|
||||
f"\nName: {result['name']}",
|
||||
f"URL: {result['webUrl']}",
|
||||
f"Last Modified: {result['lastModified']}",
|
||||
f"Size: {result['size']} bytes"
|
||||
])
|
||||
|
||||
return self.formatAgentDocumentOutput(
|
||||
"sharepoint_search_results.txt",
|
||||
"\n".join(textLines),
|
||||
"text/plain"
|
||||
)
|
||||
|
||||
def getAgentSharepoint() -> AgentSharepoint:
|
||||
"""Factory function to create and return a SharePointAgent instance."""
|
||||
return AgentSharepoint()
|
||||
|
|
@ -1,814 +0,0 @@
|
|||
"""
|
||||
Web crawler agent for gathering and analyzing web content.
|
||||
Provides web research and content extraction capabilities.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import os
|
||||
from typing import Dict, Any, List
|
||||
from urllib.parse import quote_plus, unquote
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import markdown
|
||||
|
||||
from modules.workflow.agentBase import AgentBase
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AgentWebcrawler(AgentBase):
|
||||
"""AI-driven agent for web research and information retrieval"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the web crawler agent"""
|
||||
super().__init__()
|
||||
self.name = "webcrawler"
|
||||
self.label = "Web Crawler"
|
||||
self.description = "Gathers and analyzes web content using AI with multi-step research"
|
||||
self.capabilities = [
|
||||
"web_research",
|
||||
"content_gathering",
|
||||
"data_extraction",
|
||||
"information_synthesis",
|
||||
"source_verification"
|
||||
]
|
||||
|
||||
# Web crawling configuration
|
||||
self.srcApikey = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_APIKEY","")
|
||||
self.srcEngine = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_ENGINE","google")
|
||||
self.srcCountry = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_COUNTRY","auto")
|
||||
self.maxUrl = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_URLS", "5"))
|
||||
self.maxSearchTerms = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_SEARCH_KEYWORDS", "3"))
|
||||
self.maxResults = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_SEARCH_RESULTS", "5"))
|
||||
self.timeout = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_TIMEOUT", "30"))
|
||||
self.userAgent = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
|
||||
|
||||
if not self.srcApikey:
|
||||
logger.error("SerpAPI key not configured")
|
||||
|
||||
def setDependencies(self, serviceBase=None):
|
||||
"""Set external dependencies for the agent."""
|
||||
self.setService(serviceBase)
|
||||
|
||||
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Process a task by focusing on required outputs and using AI to guide the research process.
|
||||
|
||||
Args:
|
||||
task: Task dictionary with prompt, inputDocuments, outputSpecifications
|
||||
|
||||
Returns:
|
||||
Dictionary with feedback and documents
|
||||
"""
|
||||
try:
|
||||
# Extract task information
|
||||
prompt = task.get("prompt", "")
|
||||
inputDocuments = task.get("inputDocuments", [])
|
||||
outputSpecs = task.get("outputSpecifications", [])
|
||||
workflow = task.get("context", {}).get("workflow", {})
|
||||
|
||||
# Check AI service
|
||||
if not self.service or not self.service.base:
|
||||
return {
|
||||
"feedback": "The Web Crawler agent requires an AI service to function.",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
# Create research plan
|
||||
if workflow:
|
||||
self.service.logAdd(workflow, "Creating research plan...", level="info", progress=35)
|
||||
researchPlan = await self._createResearchPlan(prompt)
|
||||
|
||||
# Check if this is truly a web research task
|
||||
if not researchPlan.get("requiresWebResearch", True):
|
||||
return {
|
||||
"feedback": "This task doesn't appear to require web research. Please try a different agent.",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
# Gather raw material through web research
|
||||
if workflow:
|
||||
self.service.logAdd(workflow, "Gathering research material...", level="info", progress=45)
|
||||
rawResults = await self._gatherResearchMaterial(researchPlan, workflow)
|
||||
|
||||
# Format results into requested output documents
|
||||
if workflow:
|
||||
self.service.logAdd(workflow, "Creating output documents...", level="info", progress=55)
|
||||
documents = await self._createOutputDocuments(
|
||||
prompt,
|
||||
rawResults,
|
||||
outputSpecs,
|
||||
researchPlan
|
||||
)
|
||||
|
||||
# Generate feedback
|
||||
feedback = researchPlan.get("feedback", f"I conducted web research on '{prompt[:50]}...' and gathered information from {len(rawResults)} relevant sources.")
|
||||
|
||||
return {
|
||||
"feedback": feedback,
|
||||
"documents": documents
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during web research: {str(e)}", exc_info=True)
|
||||
return {
|
||||
"feedback": f"Error during web research: {str(e)}",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
async def _createResearchPlan(self, prompt: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Use AI to create a detailed research plan.
|
||||
|
||||
Args:
|
||||
prompt: The research query
|
||||
|
||||
Returns:
|
||||
Research plan dictionary
|
||||
"""
|
||||
researchPrompt = f"""
|
||||
Create a detailed web research plan for this task: "{prompt}"
|
||||
|
||||
Analyze the request carefully and create a structured plan in JSON format with the following elements:
|
||||
{{
|
||||
"requiresWebResearch": true/false, # Whether this genuinely requires web research
|
||||
"researchQuestions": ["question1", "question2", ...], # 2-4 specific questions to answer
|
||||
"searchTerms": ["term1", "term2", ...], # Up to {self.maxSearchTerms} effective search terms
|
||||
"directUrls": ["url1", "url2", ...], # Any URLs directly mentioned in the request (up to {self.maxUrl})
|
||||
"expectedSources": ["type1", "type2", ...], # Types of sources that would be most valuable
|
||||
"contentFocus": "what specific content to extract or focus on",
|
||||
"feedback": "explanation of how the research will be conducted"
|
||||
}}
|
||||
|
||||
Respond with ONLY the JSON object, no additional text or explanations.
|
||||
"""
|
||||
|
||||
try:
|
||||
# Get research plan from AI
|
||||
response = await self.service.base.callAi([
|
||||
{"role": "system", "content": "You are a research expert. Respond with valid JSON only."},
|
||||
{"role": "user", "content": researchPrompt}
|
||||
])
|
||||
|
||||
# Extract JSON
|
||||
jsonStart = response.find('{')
|
||||
jsonEnd = response.rfind('}') + 1
|
||||
|
||||
if jsonStart >= 0 and jsonEnd > jsonStart:
|
||||
plan = json.loads(response[jsonStart:jsonEnd])
|
||||
|
||||
# Ensure we have the expected fields with defaults if missing
|
||||
if "searchTerms" not in plan:
|
||||
plan["searchTerms"] = [prompt]
|
||||
if "directUrls" not in plan:
|
||||
plan["directUrls"] = []
|
||||
if "researchQuestions" not in plan:
|
||||
plan["researchQuestions"] = ["What information can be found about this topic?"]
|
||||
|
||||
return plan
|
||||
else:
|
||||
# Fallback plan
|
||||
logger.warning(f"Not able creating research plan, generating fallback plan")
|
||||
return {
|
||||
"requiresWebResearch": True,
|
||||
"researchQuestions": ["What information can be found about this topic?"],
|
||||
"searchTerms": [prompt],
|
||||
"directUrls": [],
|
||||
"expectedSources": ["Web pages", "Articles"],
|
||||
"contentFocus": "Relevant information about the topic",
|
||||
"feedback": f"I'll conduct web research on '{prompt}' and gather relevant information."
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error creating research plan: {str(e)}")
|
||||
# Simple fallback plan
|
||||
return {
|
||||
"requiresWebResearch": True,
|
||||
"researchQuestions": ["What information can be found about this topic?"],
|
||||
"searchTerms": [prompt],
|
||||
"directUrls": [],
|
||||
"expectedSources": ["Web pages", "Articles"],
|
||||
"contentFocus": "Relevant information about the topic",
|
||||
"feedback": f"I'll conduct web research on '{prompt}' and gather relevant information."
|
||||
}
|
||||
|
||||
async def _gatherResearchMaterial(self, researchPlan: Dict[str, Any], workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Gather research material based on the research plan.
|
||||
|
||||
Args:
|
||||
researchPlan: Research plan dictionary
|
||||
workflow: Current workflow object
|
||||
|
||||
Returns:
|
||||
List of research results
|
||||
"""
|
||||
allResults = []
|
||||
|
||||
# Process direct URLs
|
||||
directUrls = researchPlan.get("directUrls", [])[:self.maxUrl]
|
||||
for i, url in enumerate(directUrls):
|
||||
progress = 45 + int((i / len(directUrls)) * 5) # Progress from 45% to 50%
|
||||
self.service.logAdd(workflow, f"Processing direct URL {i+1}/{len(directUrls)}...", level="info", progress=progress)
|
||||
logger.info(f"Processing direct URL: {url}")
|
||||
try:
|
||||
# Fetch and extract content
|
||||
soup = self._readUrl(url)
|
||||
|
||||
if soup:
|
||||
# Extract title and content
|
||||
title = self._extractTitle(soup, url)
|
||||
content = self._extractMainContent(soup)
|
||||
|
||||
# Add to results
|
||||
allResults.append({
|
||||
"title": title,
|
||||
"url": url,
|
||||
"sourceType": "directUrl",
|
||||
"content": content,
|
||||
"summary": "" # Will be filled later
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning(f"Error processing URL {url}: {str(e)}")
|
||||
|
||||
# Process search terms
|
||||
searchTerms = researchPlan.get("searchTerms", [])[:self.maxSearchTerms]
|
||||
for i, term in enumerate(searchTerms):
|
||||
progress = 50 + int((i / len(searchTerms)) * 5) # Progress from 50% to 55%
|
||||
self.service.logAdd(workflow, f"Searching term {i+1}/{len(searchTerms)}...", level="info", progress=progress)
|
||||
logger.info(f"Searching for: {term}")
|
||||
try:
|
||||
# Perform search
|
||||
searchResults = self._searchWeb(term)
|
||||
|
||||
# Process each search result
|
||||
for result in searchResults:
|
||||
# Check if URL is already in results
|
||||
if not any(r["url"] == result["url"] for r in allResults):
|
||||
allResults.append({
|
||||
"title": result["title"],
|
||||
"url": result["url"],
|
||||
"sourceType": "searchResult",
|
||||
"content": result["data"],
|
||||
"snippet": result["snippet"],
|
||||
"summary": "" # Will be filled later
|
||||
})
|
||||
|
||||
# Stop if we've reached the maximum results
|
||||
if len(allResults) >= self.maxResults:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"Error searching for {term}: {str(e)}")
|
||||
|
||||
# Stop if we've reached the maximum results
|
||||
if len(allResults) >= self.maxResults:
|
||||
break
|
||||
|
||||
# Create summaries for all results
|
||||
allResults = await self._summarizeAllResults(allResults, researchPlan)
|
||||
|
||||
return allResults
|
||||
|
||||
async def _summarizeAllResults(self, results: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Create summaries for all research results.
|
||||
|
||||
Args:
|
||||
results: List of research results
|
||||
researchPlan: Research plan with questions and focus
|
||||
|
||||
Returns:
|
||||
Results with added summaries
|
||||
"""
|
||||
for i, result in enumerate(results):
|
||||
logger.info(f"Summarizing result {i+1}/{len(results)}: {result['title'][:30]}...")
|
||||
|
||||
try:
|
||||
# Limit content length to avoid token issues
|
||||
content = self._limitText(result.get("content", ""), maxChars=8000)
|
||||
researchQuestions = researchPlan.get("researchQuestions", ["What relevant information does this page contain?"])
|
||||
contentFocus = researchPlan.get("contentFocus", "Relevant information")
|
||||
|
||||
# Create summary using AI
|
||||
summaryPrompt = f"""
|
||||
Summarize this web page content based on these research questions:
|
||||
{', '.join(researchQuestions)}
|
||||
|
||||
Focus on: {contentFocus}
|
||||
|
||||
Web page: {result['url']}
|
||||
Title: {result['title']}
|
||||
|
||||
Content:
|
||||
{content}
|
||||
|
||||
Create a concise summary that:
|
||||
1. Directly answers the research questions if possible
|
||||
2. Extracts the most relevant information from the page
|
||||
3. Includes specific facts, figures, or quotes if available
|
||||
4. Is around 2000 characters long
|
||||
|
||||
Only include information actually found in the content. No fabrications or assumptions.
|
||||
"""
|
||||
|
||||
# Get summary from AI
|
||||
summary = await self.service.base.callAi([
|
||||
{"role": "system", "content": "You are a research expert. Respond with valid JSON only."},
|
||||
{"role": "user", "content": summaryPrompt}
|
||||
])
|
||||
|
||||
# Add summary to result
|
||||
result["summary"] = summary.strip()
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error summarizing result {i+1}: {str(e)}")
|
||||
result["summary"] = f"Error creating summary: {str(e)}"
|
||||
|
||||
return results
|
||||
|
||||
async def _createOutputDocuments(self, prompt: str, results: List[Dict[str, Any]],
|
||||
outputSpecs: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Create output documents based on research results and specifications.
|
||||
|
||||
Args:
|
||||
prompt: Original research prompt
|
||||
results: List of research results
|
||||
outputSpecs: Output specifications
|
||||
researchPlan: Research plan
|
||||
|
||||
Returns:
|
||||
List of output documents
|
||||
"""
|
||||
# If no output specs provided, create default output
|
||||
if not outputSpecs:
|
||||
outputSpecs = [{
|
||||
"label": "webResearchResults.md",
|
||||
"description": "Comprehensive web research results"
|
||||
}]
|
||||
|
||||
# Generate documents
|
||||
documents = []
|
||||
|
||||
# Process each output specification
|
||||
for spec in outputSpecs:
|
||||
outputLabel = spec.get("label", "")
|
||||
outputDescription = spec.get("description", "")
|
||||
|
||||
# Determine format based on file extension
|
||||
formatType = self._determineFormatType(outputLabel)
|
||||
|
||||
# Create appropriate document based on format
|
||||
if formatType == "json":
|
||||
# JSON output - structured data
|
||||
document = await self._createJsonDocument(prompt, results, researchPlan, outputLabel)
|
||||
elif formatType == "csv":
|
||||
# CSV output - tabular data
|
||||
document = await self._createCsvDocument(results, outputLabel)
|
||||
else:
|
||||
# Text-based output (markdown, html, text) - narrative report
|
||||
document = await self._createNarrativeDocument(
|
||||
prompt, results, researchPlan, formatType, outputLabel, outputDescription
|
||||
)
|
||||
|
||||
documents.append(document)
|
||||
|
||||
return documents
|
||||
|
||||
async def _createNarrativeDocument(self, prompt: str, results: List[Dict[str, Any]],
|
||||
researchPlan: Dict[str, Any], formatType: str,
|
||||
outputLabel: str, outputDescription: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a narrative document (markdown, html, text) from research results.
|
||||
|
||||
Args:
|
||||
prompt: Original research prompt
|
||||
results: Research results
|
||||
researchPlan: Research plan
|
||||
formatType: Output format (markdown, html, text)
|
||||
outputLabel: Output filename
|
||||
outputDescription: Output description
|
||||
|
||||
Returns:
|
||||
Document object
|
||||
"""
|
||||
# Create content based on format
|
||||
if formatType == "markdown":
|
||||
contentType = "text/markdown"
|
||||
templateFormat = "markdown"
|
||||
elif formatType == "html":
|
||||
contentType = "text/html"
|
||||
templateFormat = "html"
|
||||
else:
|
||||
contentType = "text/plain"
|
||||
templateFormat = "text"
|
||||
|
||||
# Prepare research context
|
||||
researchQuestions = researchPlan.get("researchQuestions", [])
|
||||
searchTerms = researchPlan.get("searchTerms", [])
|
||||
|
||||
# Create document structure based on results
|
||||
sourcesSummary = []
|
||||
for result in results:
|
||||
sourcesSummary.append({
|
||||
"title": result.get("title", "Untitled"),
|
||||
"url": result.get("url", ""),
|
||||
"summary": result.get("summary", ""),
|
||||
"snippet": result.get("snippet", "")
|
||||
})
|
||||
|
||||
# Truncate content for prompt
|
||||
sourcesJson = json.dumps(sourcesSummary, indent=2)
|
||||
if len(sourcesJson) > 10000:
|
||||
# Logic to truncate each summary while preserving structure
|
||||
for i in range(len(sourcesSummary)):
|
||||
if len(sourcesJson) <= 10000:
|
||||
break
|
||||
# Gradually truncate summaries
|
||||
sourcesSummary[i]["summary"] = sourcesSummary[i]["summary"][:500] + "..."
|
||||
sourcesJson = json.dumps(sourcesSummary, indent=2)
|
||||
|
||||
# Create report prompt
|
||||
reportPrompt = f"""
|
||||
Create a comprehensive {formatType} research report based on the following web research:
|
||||
|
||||
TASK: {prompt}
|
||||
|
||||
RESEARCH QUESTIONS:
|
||||
{', '.join(researchQuestions)}
|
||||
|
||||
SEARCH TERMS USED:
|
||||
{', '.join(searchTerms)}
|
||||
|
||||
SOURCES AND FINDINGS:
|
||||
{sourcesJson}
|
||||
|
||||
REPORT DETAILS:
|
||||
- Format: {templateFormat}
|
||||
- Filename: {outputLabel}
|
||||
- Description: {outputDescription}
|
||||
|
||||
Create a well-structured report that:
|
||||
1. Includes an executive summary of key findings
|
||||
2. Addresses each research question directly
|
||||
3. Integrates information from all relevant sources
|
||||
4. Cites sources appropriately for each piece of information
|
||||
5. Provides a comprehensive synthesis of the research
|
||||
6. Is formatted professionally and appropriately for {templateFormat}
|
||||
|
||||
The report should be scholarly, accurate, and focused on the original research task.
|
||||
"""
|
||||
|
||||
try:
|
||||
# Generate report with AI
|
||||
reportContent = await self.service.base.callAi([
|
||||
{"role": "system", "content": "You are a research expert. Respond with valid JSON only."},
|
||||
{"role": "user", "content": reportPrompt}
|
||||
])
|
||||
|
||||
# Convert to HTML if needed
|
||||
if formatType == "html" and not reportContent.lower().startswith("<html"):
|
||||
# Check if it's markdown that needs conversion
|
||||
if reportContent.startswith("#"):
|
||||
reportContent = markdown.markdown(reportContent)
|
||||
# Wrap in basic HTML structure if needed
|
||||
if not reportContent.lower().startswith("<html"):
|
||||
reportContent = f"<html><head><title>Web Research Results</title></head><body>{reportContent}</body></html>"
|
||||
|
||||
return self.formatAgentDocumentOutput(outputLabel, reportContent, contentType)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating narrative document: {str(e)}")
|
||||
# Create error document
|
||||
if formatType == "markdown":
|
||||
content = f"# Web Research Error\n\nAn error occurred: {str(e)}"
|
||||
elif formatType == "html":
|
||||
content = f"<html><body><h1>Web Research Error</h1><p>An error occurred: {str(e)}</p></body></html>"
|
||||
else:
|
||||
content = f"WEB RESEARCH ERROR\n\nAn error occurred: {str(e)}"
|
||||
|
||||
return self.formatAgentDocumentOutput(outputLabel, content, contentType)
|
||||
|
||||
async def _createJsonDocument(self, prompt: str, results: List[Dict[str, Any]],
|
||||
researchPlan: Dict[str, Any], outputLabel: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a JSON document from research results.
|
||||
|
||||
Args:
|
||||
prompt: Original research prompt
|
||||
results: Research results
|
||||
researchPlan: Research plan
|
||||
outputLabel: Output filename
|
||||
|
||||
Returns:
|
||||
Document object
|
||||
"""
|
||||
try:
|
||||
# Create structured data
|
||||
sourcesData = []
|
||||
for result in results:
|
||||
sourcesData.append({
|
||||
"title": result.get("title", "Untitled"),
|
||||
"url": result.get("url", ""),
|
||||
"summary": result.get("summary", ""),
|
||||
"snippet": result.get("snippet", ""),
|
||||
"sourceType": result.get("sourceType", "")
|
||||
})
|
||||
|
||||
# Create metadata
|
||||
metadata = {
|
||||
"query": prompt,
|
||||
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"researchQuestions": researchPlan.get("researchQuestions", []),
|
||||
"searchTerms": researchPlan.get("searchTerms", [])
|
||||
}
|
||||
|
||||
# Compile complete report object
|
||||
jsonContent = {
|
||||
"metadata": metadata,
|
||||
"summary": researchPlan.get("feedback", "Web research results"),
|
||||
"sources": sourcesData
|
||||
}
|
||||
|
||||
# Convert to JSON string
|
||||
content = json.dumps(jsonContent, indent=2)
|
||||
|
||||
return self.formatAgentDocumentOutput(outputLabel, content, "application/json")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating JSON document: {str(e)}")
|
||||
return self.formatAgentDocumentOutput(outputLabel, json.dumps({"error": str(e)}), "application/json")
|
||||
|
||||
async def _createCsvDocument(self, results: List[Dict[str, Any]], outputLabel: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a CSV document from research results.
|
||||
|
||||
Args:
|
||||
results: Research results
|
||||
outputLabel: Output filename
|
||||
|
||||
Returns:
|
||||
Document object
|
||||
"""
|
||||
try:
|
||||
# Create CSV header
|
||||
csvLines = ["Title,URL,Source Type,Snippet"]
|
||||
|
||||
# Add results
|
||||
for result in results:
|
||||
# Escape CSV fields
|
||||
title = result.get("title", "").replace('"', '""')
|
||||
url = result.get("url", "").replace('"', '""')
|
||||
sourceType = result.get("sourceType", "").replace('"', '""')
|
||||
snippet = result.get("snippet", "").replace('"', '""')
|
||||
|
||||
csvLines.append(f'"{title}","{url}","{sourceType}","{snippet}"')
|
||||
|
||||
# Combine into CSV content
|
||||
content = "\n".join(csvLines)
|
||||
|
||||
return self.formatAgentDocumentOutput(outputLabel, content, "text/csv")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating CSV document: {str(e)}")
|
||||
return self.formatAgentDocumentOutput(outputLabel, "Error,Error\nFailed to create CSV,{0}".format(str(e)), "text/csv")
|
||||
|
||||
def _determineFormatType(self, outputLabel: str) -> str:
|
||||
"""
|
||||
Determine the format type based on the filename.
|
||||
|
||||
Args:
|
||||
outputLabel: Output filename
|
||||
|
||||
Returns:
|
||||
Format type (markdown, html, text, json, csv)
|
||||
"""
|
||||
outputLabelLower = outputLabel.lower()
|
||||
|
||||
if outputLabelLower.endswith(".md"):
|
||||
return "markdown"
|
||||
elif outputLabelLower.endswith(".html"):
|
||||
return "html"
|
||||
elif outputLabelLower.endswith(".txt"):
|
||||
return "text"
|
||||
elif outputLabelLower.endswith(".json"):
|
||||
return "json"
|
||||
elif outputLabelLower.endswith(".csv"):
|
||||
return "csv"
|
||||
else:
|
||||
# Default to markdown
|
||||
return "markdown"
|
||||
|
||||
def _searchWeb(self, query: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Conduct a web search using SerpAPI and return the results.
|
||||
|
||||
Args:
|
||||
query: The search query
|
||||
|
||||
Returns:
|
||||
List of search results
|
||||
"""
|
||||
if not self.srcApikey:
|
||||
return []
|
||||
|
||||
# Get user language from serviceBase if available
|
||||
userLanguage = "en" # Default language
|
||||
if self.service.base.userLanguage:
|
||||
userLanguage = self.service.base.userLanguage
|
||||
|
||||
try:
|
||||
# Format the search request for SerpAPI
|
||||
params = {
|
||||
"engine": self.srcEngine,
|
||||
"q": query,
|
||||
"api_key": self.srcApikey,
|
||||
"num": self.maxResults, # Number of results to return
|
||||
"hl": userLanguage # Identified user language
|
||||
}
|
||||
|
||||
# Make the API request
|
||||
response = requests.get("https://serpapi.com/search", params=params, timeout=self.timeout)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse JSON response
|
||||
search_results = response.json()
|
||||
|
||||
# Extract organic results
|
||||
results = []
|
||||
|
||||
if "organic_results" in search_results:
|
||||
for result in search_results["organic_results"][:self.maxResults]:
|
||||
# Extract title
|
||||
title = result.get("title", "No title")
|
||||
|
||||
# Extract URL
|
||||
url = result.get("link", "No URL")
|
||||
|
||||
# Extract snippet
|
||||
snippet = result.get("snippet", "No description")
|
||||
|
||||
# Get actual page content
|
||||
try:
|
||||
targetPageSoup = self._readUrl(url)
|
||||
content = self._extractMainContent(targetPageSoup)
|
||||
except Exception as e:
|
||||
logger.warning(f"Error extracting content from {url}: {str(e)}")
|
||||
content = f"Error extracting content: {str(e)}"
|
||||
|
||||
results.append({
|
||||
'title': title,
|
||||
'url': url,
|
||||
'snippet': snippet,
|
||||
'data': content
|
||||
})
|
||||
|
||||
# Limit number of results
|
||||
if len(results) >= self.maxResults:
|
||||
break
|
||||
else:
|
||||
logger.warning(f"No organic results found in SerpAPI response for: {query}")
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error searching with SerpAPI for {query}: {str(e)}")
|
||||
return []
|
||||
|
||||
def _readUrl(self, url: str) -> BeautifulSoup:
|
||||
"""
|
||||
Read a URL and return a BeautifulSoup parser for the content.
|
||||
|
||||
Args:
|
||||
url: The URL to read
|
||||
|
||||
Returns:
|
||||
BeautifulSoup object with the content or None on errors
|
||||
"""
|
||||
if not url or not url.startswith(('http://', 'https://')):
|
||||
return None
|
||||
|
||||
headers = {
|
||||
'User-Agent': self.userAgent,
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
}
|
||||
|
||||
try:
|
||||
# Initial request
|
||||
response = requests.get(url, headers=headers, timeout=self.timeout)
|
||||
|
||||
# Handling for status 202
|
||||
if response.status_code == 202:
|
||||
# Retry with backoff
|
||||
backoffTimes = [0.5, 1.0, 2.0, 5.0]
|
||||
|
||||
for waitTime in backoffTimes:
|
||||
time.sleep(waitTime)
|
||||
response = requests.get(url, headers=headers, timeout=self.timeout)
|
||||
|
||||
if response.status_code != 202:
|
||||
break
|
||||
|
||||
# Raise for error status codes
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse HTML
|
||||
return BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading URL {url}: {str(e)}")
|
||||
return None
|
||||
|
||||
def _extractTitle(self, soup: BeautifulSoup, url: str) -> str:
|
||||
"""
|
||||
Extract the title from a webpage.
|
||||
|
||||
Args:
|
||||
soup: BeautifulSoup object of the webpage
|
||||
url: URL of the webpage
|
||||
|
||||
Returns:
|
||||
Extracted title
|
||||
"""
|
||||
if not soup:
|
||||
return f"Error with {url}"
|
||||
|
||||
# Extract title from title tag
|
||||
titleTag = soup.find('title')
|
||||
title = titleTag.text.strip() if titleTag else "No title"
|
||||
|
||||
# Alternative: Also look for h1 tags if title tag is missing
|
||||
if title == "No title":
|
||||
h1Tag = soup.find('h1')
|
||||
if h1Tag:
|
||||
title = h1Tag.text.strip()
|
||||
|
||||
return title
|
||||
|
||||
def _extractMainContent(self, soup: BeautifulSoup, maxChars: int = 10000) -> str:
|
||||
"""
|
||||
Extract the main content from an HTML page.
|
||||
|
||||
Args:
|
||||
soup: BeautifulSoup object of the webpage
|
||||
maxChars: Maximum number of characters
|
||||
|
||||
Returns:
|
||||
Extracted main content as a string
|
||||
"""
|
||||
if not soup:
|
||||
return ""
|
||||
|
||||
# Try to find main content elements in priority order
|
||||
mainContent = None
|
||||
for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
|
||||
content = soup.select_one(selector)
|
||||
if content:
|
||||
mainContent = content
|
||||
break
|
||||
|
||||
# If no main content found, use the body
|
||||
if not mainContent:
|
||||
mainContent = soup.find('body') or soup
|
||||
|
||||
# Remove script, style, nav, footer elements that don't contribute to main content
|
||||
for element in mainContent.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
|
||||
element.extract()
|
||||
|
||||
# Extract text content
|
||||
textContent = mainContent.get_text(separator=' ', strip=True)
|
||||
|
||||
# Limit to maxChars
|
||||
return textContent[:maxChars]
|
||||
|
||||
def _limitText(self, text: str, maxChars: int = 10000) -> str:
|
||||
"""
|
||||
Limit text to a maximum number of characters.
|
||||
|
||||
Args:
|
||||
text: Input text
|
||||
maxChars: Maximum number of characters
|
||||
|
||||
Returns:
|
||||
Limited text
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
# If text is already under the limit, return unchanged
|
||||
if len(text) <= maxChars:
|
||||
return text
|
||||
|
||||
# Otherwise limit text to maxChars
|
||||
return text[:maxChars] + "... [Content truncated due to length]"
|
||||
|
||||
|
||||
# Factory function for the Webcrawler agent
|
||||
def getAgentWebcrawler():
|
||||
"""Returns an instance of the Webcrawler agent."""
|
||||
return AgentWebcrawler()
|
||||
|
|
@ -6,7 +6,7 @@ Uses the JSON connector for data access with added language support.
|
|||
import os
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from datetime import datetime, UTC
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
|
||||
import asyncio
|
||||
|
|
@ -327,6 +327,11 @@ class ChatObjects:
|
|||
publishedAt=createdMessage.get("publishedAt", self._getCurrentTimestamp()),
|
||||
stats=ChatStat(**createdMessage.get("stats", {})) if createdMessage.get("stats") else None
|
||||
)
|
||||
|
||||
# Update workflow stats for message creation (estimate bytes for message)
|
||||
message_size = len(createdMessage.get("message", "")) + sum(len(doc.get("filename", "")) for doc in createdMessage.get("documents", []))
|
||||
self.updateWorkflowStats(workflowId, bytesSent=0, bytesReceived=message_size)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating workflow message: {str(e)}")
|
||||
return None
|
||||
|
|
@ -535,6 +540,64 @@ class ChatObjects:
|
|||
# Get logs for this workflow
|
||||
return [ChatLog(**log) for log in self.db.getRecordset("workflowLogs", recordFilter={"workflowId": workflowId})]
|
||||
|
||||
def updateWorkflowStats(self, workflowId: str, bytesSent: int = 0, bytesReceived: int = 0) -> bool:
|
||||
"""Updates workflow statistics during execution with incremental values."""
|
||||
try:
|
||||
# Get current workflow
|
||||
workflow = self.getWorkflow(workflowId)
|
||||
if not workflow:
|
||||
logger.error(f"Workflow {workflowId} not found for stats update")
|
||||
return False
|
||||
|
||||
if not self._canModify("workflows", workflowId):
|
||||
logger.error(f"No permission to update workflow {workflowId} stats")
|
||||
return False
|
||||
|
||||
# Get current stats
|
||||
currentStats = workflow.stats.dict() if workflow.stats else {
|
||||
"bytesSent": 0,
|
||||
"bytesReceived": 0,
|
||||
"tokenCount": 0,
|
||||
"processingTime": 0
|
||||
}
|
||||
|
||||
# Calculate processing time from workflow start
|
||||
workflow_start = datetime.fromisoformat(workflow.startedAt.replace('Z', '+00:00'))
|
||||
current_time = datetime.now(UTC)
|
||||
processing_time = (current_time - workflow_start).total_seconds()
|
||||
|
||||
# Update stats with incremental values
|
||||
currentStats["bytesSent"] = currentStats.get("bytesSent", 0) + bytesSent
|
||||
currentStats["bytesReceived"] = currentStats.get("bytesReceived", 0) + bytesReceived
|
||||
currentStats["tokenCount"] = currentStats["bytesSent"] + currentStats["bytesReceived"]
|
||||
currentStats["processingTime"] = processing_time
|
||||
|
||||
# Update workflow in database
|
||||
self.db.recordModify("workflows", workflowId, {
|
||||
"dataStats": currentStats
|
||||
})
|
||||
|
||||
# Log to stats table
|
||||
stats_record = {
|
||||
"timestamp": self._getCurrentTimestamp(),
|
||||
"workflowId": workflowId,
|
||||
"bytesSent": bytesSent,
|
||||
"bytesReceived": bytesReceived,
|
||||
"tokenCount": bytesSent + bytesReceived,
|
||||
"processingTime": processing_time
|
||||
}
|
||||
|
||||
# Create stats record in database
|
||||
self.db.recordCreate("stats", stats_record)
|
||||
|
||||
logger.debug(f"Updated workflow {workflowId} stats: {currentStats}")
|
||||
logger.debug(f"Logged stats record: {stats_record}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating workflow stats: {str(e)}")
|
||||
return False
|
||||
|
||||
def createWorkflowLog(self, logData: Dict[str, Any]) -> ChatLog:
|
||||
"""Creates a log entry for a workflow if user has access."""
|
||||
# Check workflow access
|
||||
|
|
@ -777,14 +840,7 @@ class ChatObjects:
|
|||
# Create workflow
|
||||
workflow = self.createWorkflow(workflowData)
|
||||
|
||||
# Add log entry
|
||||
self.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": "Workflow started",
|
||||
"type": "info",
|
||||
"status": "running",
|
||||
"progress": 0
|
||||
})
|
||||
# Remove the 'Workflow started' log entry
|
||||
|
||||
# Start workflow processing
|
||||
from modules.workflow.managerWorkflow import WorkflowManager
|
||||
|
|
|
|||
|
|
@ -10,9 +10,9 @@ logger = logging.getLogger(__name__)
|
|||
class MethodCoder(MethodBase):
|
||||
"""Coder method implementation for code operations"""
|
||||
|
||||
def __init__(self, serviceContainer: Any):
|
||||
def __init__(self, serviceCenter: Any):
|
||||
"""Initialize the coder method"""
|
||||
super().__init__(serviceContainer)
|
||||
super().__init__(serviceCenter)
|
||||
self.name = "coder"
|
||||
self.description = "Handle code operations like analysis, generation, and refactoring"
|
||||
|
||||
|
|
@ -87,7 +87,18 @@ class MethodCoder(MethodBase):
|
|||
)
|
||||
|
||||
# Extract text content from ExtractedContent objects
|
||||
text_contents = self.service.extractTextFromContentObjects(all_code_content)
|
||||
text_contents = []
|
||||
for content_obj in all_code_content:
|
||||
if hasattr(content_obj, 'contents') and content_obj.contents:
|
||||
# Extract text from ContentItem objects
|
||||
for content_item in content_obj.contents:
|
||||
if hasattr(content_item, 'data') and content_item.data:
|
||||
text_contents.append(content_item.data)
|
||||
elif isinstance(content_obj, str):
|
||||
text_contents.append(content_obj)
|
||||
else:
|
||||
# Fallback: convert to string representation
|
||||
text_contents.append(str(content_obj))
|
||||
|
||||
# Combine all extracted text content for analysis
|
||||
combined_content = "\n\n--- CODE SEPARATOR ---\n\n".join(text_contents)
|
||||
|
|
@ -8,7 +8,6 @@ from typing import Dict, Any, List, Optional
|
|||
import uuid
|
||||
from datetime import datetime, UTC
|
||||
|
||||
from modules.workflow.managerDocument import DocumentManager
|
||||
from modules.workflow.methodBase import MethodBase, ActionResult, action
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -16,12 +15,11 @@ logger = logging.getLogger(__name__)
|
|||
class MethodDocument(MethodBase):
|
||||
"""Document method implementation for document operations"""
|
||||
|
||||
def __init__(self, serviceContainer: Any):
|
||||
def __init__(self, serviceCenter: Any):
|
||||
"""Initialize the document method"""
|
||||
super().__init__(serviceContainer)
|
||||
super().__init__(serviceCenter)
|
||||
self.name = "document"
|
||||
self.description = "Handle document operations like extraction and analysis"
|
||||
self.documentManager = DocumentManager(serviceContainer)
|
||||
|
||||
@action
|
||||
async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
|
||||
|
|
@ -94,7 +92,18 @@ class MethodDocument(MethodBase):
|
|||
)
|
||||
|
||||
# Extract text content from ExtractedContent objects
|
||||
text_contents = self.service.extractTextFromContentObjects(all_extracted_content)
|
||||
text_contents = []
|
||||
for content_obj in all_extracted_content:
|
||||
if hasattr(content_obj, 'contents') and content_obj.contents:
|
||||
# Extract text from ContentItem objects
|
||||
for content_item in content_obj.contents:
|
||||
if hasattr(content_item, 'data') and content_item.data:
|
||||
text_contents.append(content_item.data)
|
||||
elif isinstance(content_obj, str):
|
||||
text_contents.append(content_obj)
|
||||
else:
|
||||
# Fallback: convert to string representation
|
||||
text_contents.append(str(content_obj))
|
||||
|
||||
# Combine all extracted text content
|
||||
combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join(text_contents)
|
||||
|
|
|
|||
|
|
@ -16,9 +16,9 @@ logger = logging.getLogger(__name__)
|
|||
class MethodOutlook(MethodBase):
|
||||
"""Outlook method implementation for email operations"""
|
||||
|
||||
def __init__(self, serviceContainer: Any):
|
||||
def __init__(self, serviceCenter: Any):
|
||||
"""Initialize the Outlook method"""
|
||||
super().__init__(serviceContainer)
|
||||
super().__init__(serviceCenter)
|
||||
self.name = "outlook"
|
||||
self.description = "Handle Microsoft Outlook email operations"
|
||||
|
||||
|
|
|
|||
|
|
@ -16,8 +16,8 @@ logger = logging.getLogger(__name__)
|
|||
class MethodSharepoint(MethodBase):
|
||||
"""SharePoint method implementation for document operations"""
|
||||
|
||||
def __init__(self, serviceContainer: Any):
|
||||
super().__init__(serviceContainer)
|
||||
def __init__(self, serviceCenter: Any):
|
||||
super().__init__(serviceCenter)
|
||||
self.name = "sharepoint"
|
||||
self.description = "Handle Microsoft SharePoint document operations"
|
||||
|
||||
|
|
|
|||
|
|
@ -19,9 +19,9 @@ logger = logging.getLogger(__name__)
|
|||
class MethodWeb(MethodBase):
|
||||
"""Web method implementation for web operations"""
|
||||
|
||||
def __init__(self, serviceContainer: Any):
|
||||
def __init__(self, serviceCenter: Any):
|
||||
"""Initialize the web method"""
|
||||
super().__init__(serviceContainer)
|
||||
super().__init__(serviceCenter)
|
||||
self.name = "web"
|
||||
self.description = "Handle web operations like crawling and scraping"
|
||||
|
||||
|
|
@ -452,7 +452,7 @@ class MethodWeb(MethodBase):
|
|||
"query": query
|
||||
}
|
||||
else:
|
||||
# Get user language from service container if available
|
||||
# Get user language from service center if available
|
||||
userLanguage = "en" # Default language
|
||||
if hasattr(self.service, 'user') and hasattr(self.service.user, 'language'):
|
||||
userLanguage = self.service.user.language
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ async def get_workflow_status(
|
|||
) -> ChatWorkflow:
|
||||
"""Get the current status of a workflow."""
|
||||
try:
|
||||
# Get service container
|
||||
# Get service center
|
||||
interfaceChat = getServiceChat(currentUser)
|
||||
|
||||
# Retrieve workflow
|
||||
|
|
@ -208,7 +208,7 @@ async def get_workflow_logs(
|
|||
) -> List[ChatLog]:
|
||||
"""Get logs for a workflow with support for selective data transfer."""
|
||||
try:
|
||||
# Get service container
|
||||
# Get service center
|
||||
interfaceChat = getServiceChat(currentUser)
|
||||
|
||||
# Verify workflow exists
|
||||
|
|
@ -251,7 +251,7 @@ async def get_workflow_messages(
|
|||
) -> List[ChatMessage]:
|
||||
"""Get messages for a workflow with support for selective data transfer."""
|
||||
try:
|
||||
# Get service container
|
||||
# Get service center
|
||||
interfaceChat = getServiceChat(currentUser)
|
||||
|
||||
# Verify workflow exists
|
||||
|
|
@ -297,7 +297,7 @@ async def start_workflow(
|
|||
Corresponds to State 1 in the state machine documentation.
|
||||
"""
|
||||
try:
|
||||
# Get service container
|
||||
# Get service center
|
||||
interfaceChat = getServiceChat(currentUser)
|
||||
|
||||
# Start or continue workflow using ChatObjects
|
||||
|
|
@ -322,7 +322,7 @@ async def stop_workflow(
|
|||
) -> ChatWorkflow:
|
||||
"""Stops a running workflow."""
|
||||
try:
|
||||
# Get service container
|
||||
# Get service center
|
||||
interfaceChat = getServiceChat(currentUser)
|
||||
|
||||
# Stop workflow using ChatObjects
|
||||
|
|
@ -347,7 +347,7 @@ async def delete_workflow(
|
|||
) -> Dict[str, Any]:
|
||||
"""Deletes a workflow and its associated data."""
|
||||
try:
|
||||
# Get service container
|
||||
# Get service center
|
||||
interfaceChat = getServiceChat(currentUser)
|
||||
|
||||
# Get raw workflow data from database to check permissions
|
||||
|
|
@ -402,7 +402,7 @@ async def delete_workflow_message(
|
|||
) -> Dict[str, Any]:
|
||||
"""Delete a message from a workflow."""
|
||||
try:
|
||||
# Get service container
|
||||
# Get service center
|
||||
interfaceChat = getServiceChat(currentUser)
|
||||
|
||||
# Verify workflow exists
|
||||
|
|
@ -453,7 +453,7 @@ async def delete_file_from_message(
|
|||
) -> Dict[str, Any]:
|
||||
"""Delete a file reference from a message in a workflow."""
|
||||
try:
|
||||
# Get service container
|
||||
# Get service center
|
||||
interfaceChat = getServiceChat(currentUser)
|
||||
|
||||
# Verify workflow exists
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import asyncio
|
|||
import logging
|
||||
import uuid
|
||||
import json
|
||||
import time
|
||||
from typing import Dict, Any, Optional, List, Union
|
||||
from datetime import datetime, UTC
|
||||
|
||||
|
|
@ -9,7 +10,7 @@ from modules.interfaces.interfaceAppModel import User
|
|||
from modules.interfaces.interfaceChatModel import (
|
||||
TaskStatus, ChatDocument, TaskItem, TaskAction, TaskResult, ChatStat, ChatLog, ChatMessage, ChatWorkflow
|
||||
)
|
||||
from modules.workflow.serviceContainer import ServiceContainer
|
||||
from modules.workflow.serviceCenter import ServiceCenter
|
||||
from modules.interfaces.interfaceChatObjects import ChatObjects
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -20,7 +21,7 @@ class ChatManager:
|
|||
def __init__(self, currentUser: User, chatInterface: ChatObjects):
|
||||
self.currentUser = currentUser
|
||||
self.chatInterface = chatInterface
|
||||
self.service: ServiceContainer = None
|
||||
self.service: ServiceCenter = None
|
||||
self.workflow: ChatWorkflow = None
|
||||
|
||||
# Circuit breaker for AI calls
|
||||
|
|
@ -37,7 +38,7 @@ class ChatManager:
|
|||
async def initialize(self, workflow: ChatWorkflow) -> None:
|
||||
"""Initialize chat manager with workflow"""
|
||||
self.workflow = workflow
|
||||
self.service = ServiceContainer(self.currentUser, self.workflow)
|
||||
self.service = ServiceCenter(self.currentUser, self.workflow)
|
||||
|
||||
# ===== WORKFLOW PHASES =====
|
||||
|
||||
|
|
@ -119,6 +120,12 @@ class ChatManager:
|
|||
task_actions.append(task_action)
|
||||
logger.info(f"Created task action: {task_action.execMethod}.{task_action.execAction}")
|
||||
|
||||
# Update stats for task validation (estimate bytes for action validation)
|
||||
if task_actions:
|
||||
# Calculate actual action size for stats
|
||||
action_size = self.service.calculateObjectSize(task_actions)
|
||||
self.service.updateWorkflowStats(eventLabel="action", bytesSent=action_size)
|
||||
|
||||
logger.info(f"Task action definition completed: {len(task_actions)} actions")
|
||||
return task_actions
|
||||
|
||||
|
|
@ -265,6 +272,7 @@ class ChatManager:
|
|||
async def processFileIds(self, fileIds: List[str]) -> List[ChatDocument]:
|
||||
"""Process file IDs and return ChatDocument objects"""
|
||||
documents = []
|
||||
|
||||
for fileId in fileIds:
|
||||
try:
|
||||
# Ensure service is initialized
|
||||
|
|
@ -290,6 +298,8 @@ class ChatManager:
|
|||
logger.warning(f"No file info found for file ID {fileId}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file ID {fileId}: {str(e)}")
|
||||
|
||||
|
||||
return documents
|
||||
|
||||
def setUserLanguage(self, language: str) -> None:
|
||||
|
|
@ -768,7 +778,8 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
'documents_metadata': documents_metadata,
|
||||
'actionId': action_result.get('actionId', ''),
|
||||
'actionMethod': action_result.get('actionMethod', ''),
|
||||
'actionName': action_result.get('actionName', '')
|
||||
'actionName': action_result.get('actionName', ''),
|
||||
'success_indicator': 'documents' if len(documents_metadata) > 0 else 'text_result' if action_result.get('result', '').strip() else 'none'
|
||||
}
|
||||
step_result_serializable['action_results'].append(serializable_action_result)
|
||||
|
||||
|
|
@ -787,6 +798,13 @@ INSTRUCTIONS:
|
|||
4. Decide on next action: continue, retry, or fail
|
||||
5. If retry, provide specific improvements needed
|
||||
|
||||
IMPORTANT NOTES:
|
||||
- Actions can produce either text results OR documents (or both)
|
||||
- Empty result_summary is acceptable if documents were produced (documents_count > 0)
|
||||
- Focus on whether the action achieved its intended purpose, not just text output
|
||||
- Document-based actions (like file extractions) often have empty text results but successful document outputs
|
||||
- Check the 'success_indicator' field: 'documents' means success via document output, 'text_result' means success via text, 'none' means no output
|
||||
|
||||
REQUIRED JSON STRUCTURE:
|
||||
{{
|
||||
"status": "success|retry|failed",
|
||||
|
|
@ -829,7 +847,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
async def _executeSingleAction(self, action: TaskAction, workflow: ChatWorkflow) -> Dict[str, Any]:
|
||||
"""Execute a single action and return result with enhanced document processing"""
|
||||
try:
|
||||
# Execute the actual method action using the service container
|
||||
# Execute the actual method action using the service center
|
||||
result = await self.service.executeAction(
|
||||
methodName=action.execMethod,
|
||||
actionName=action.execAction,
|
||||
|
|
@ -943,7 +961,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
message_data = {
|
||||
"workflowId": workflow.id,
|
||||
"role": "assistant",
|
||||
"message": f"Executed {action.execMethod}.{action.execAction} successfully",
|
||||
"message": f"Executed action {action.execMethod}.{action.execAction}",
|
||||
"status": "step",
|
||||
"sequenceNr": len(workflow.messages) + 1,
|
||||
"publishedAt": datetime.now(UTC).isoformat(),
|
||||
|
|
@ -979,7 +997,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
file_size = len(str(doc_data))
|
||||
mime_type = "application/octet-stream"
|
||||
|
||||
# Enhanced MIME type detection using service container
|
||||
# Enhanced MIME type detection using service center
|
||||
if mime_type == "application/octet-stream":
|
||||
mime_type = self._detectMimeTypeFromContent(document_data, document_name)
|
||||
|
||||
|
|
@ -1045,7 +1063,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
|
||||
def _detectMimeTypeFromContent(self, content: Any, filename: str) -> str:
|
||||
"""
|
||||
Detect MIME type from content and filename using service container.
|
||||
Detect MIME type from content and filename using service center.
|
||||
Only returns a detected MIME type if it's better than application/octet-stream.
|
||||
|
||||
Args:
|
||||
|
|
@ -1065,7 +1083,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
else:
|
||||
file_bytes = str(content).encode('utf-8')
|
||||
|
||||
# Use service container's MIME type detection
|
||||
# Use service center's MIME type detection
|
||||
detected_mime_type = self.service.detectContentTypeFromData(file_bytes, filename)
|
||||
if detected_mime_type != "application/octet-stream":
|
||||
return detected_mime_type
|
||||
|
|
@ -1076,7 +1094,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
|
||||
def _detectMimeTypeFromDocument(self, document: Any, filename: str) -> str:
|
||||
"""
|
||||
Detect MIME type from document object using service container.
|
||||
Detect MIME type from document object using service center.
|
||||
Only returns a detected MIME type if it's better than application/octet-stream.
|
||||
|
||||
Args:
|
||||
|
|
@ -1094,7 +1112,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
else:
|
||||
file_bytes = str(content).encode('utf-8')
|
||||
|
||||
# Use service container's MIME type detection
|
||||
# Use service center's MIME type detection
|
||||
detected_mime_type = self.service.detectContentTypeFromData(file_bytes, filename)
|
||||
if detected_mime_type != "application/octet-stream":
|
||||
return detected_mime_type
|
||||
|
|
@ -1222,8 +1240,11 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
action_results = review_context.get('action_results', [])
|
||||
if action_results:
|
||||
# Check for common issues that warrant retry
|
||||
# Only consider empty results a problem if there are no documents produced
|
||||
has_empty_results = any(
|
||||
not result.get('result', '').strip()
|
||||
not result.get('result', '').strip() and
|
||||
not result.get('documents', []) and
|
||||
not result.get('documents_metadata', [])
|
||||
for result in action_results
|
||||
if result.get('status') == 'completed'
|
||||
)
|
||||
|
|
@ -1417,7 +1438,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
assistant_messages = [msg for msg in workflow.messages if msg.role == 'assistant']
|
||||
|
||||
# Generate summary feedback
|
||||
feedback = f"Workflow completed successfully.\n\n"
|
||||
feedback = f"Workflow completed.\n\n"
|
||||
feedback += f"Processed {len(user_messages)} user inputs and generated {len(assistant_messages)} responses.\n"
|
||||
|
||||
# Add final status
|
||||
|
|
@ -1437,36 +1458,38 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
# ===== UNIFIED WORKFLOW EXECUTION =====
|
||||
|
||||
async def executeUnifiedWorkflow(self, userInput: str, workflow: ChatWorkflow) -> Dict[str, Any]:
|
||||
"""Execute workflow using the new unified phases with retry logic"""
|
||||
"""Execute a unified workflow with all phases"""
|
||||
try:
|
||||
logger.info(f"Starting unified workflow execution for workflow {workflow.id}")
|
||||
start_time = time.time()
|
||||
|
||||
# Create user-friendly progress log
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": "Starting workflow analysis and planning",
|
||||
"type": "info",
|
||||
"status": "running",
|
||||
"progress": 5,
|
||||
"agentName": "System"
|
||||
})
|
||||
# Initialize chat manager with workflow
|
||||
await self.initialize(workflow)
|
||||
|
||||
# Process file IDs if provided
|
||||
documents = []
|
||||
if hasattr(userInput, 'listFileId') and userInput.listFileId:
|
||||
documents = await self.processFileIds(userInput.listFileId)
|
||||
logger.info(f"Processed {len(documents)} documents")
|
||||
|
||||
# Calculate and update user input stats
|
||||
user_input_size = self.service.calculateUserInputSize(userInput)
|
||||
self.service.updateWorkflowStats(eventLabel="userinput", bytesReceived=user_input_size)
|
||||
|
||||
# Phase 1: High-Level Task Planning
|
||||
logger.info("=== PHASE 1: HIGH-LEVEL TASK PLANNING ===")
|
||||
task_plan = await self.planHighLevelTasks(userInput, workflow)
|
||||
if not task_plan or not task_plan.get('tasks'):
|
||||
logger.error("Failed to create task plan")
|
||||
return {
|
||||
'status': 'failed',
|
||||
'error': 'Failed to create task plan',
|
||||
'phase': 'planning'
|
||||
}
|
||||
logger.info("--- PHASE 1: HIGH-LEVEL TASK PLANNING ---")
|
||||
task_plan = await self.planHighLevelTasks(userInput.prompt, workflow)
|
||||
|
||||
# Update stats for task planning
|
||||
task_plan_size = self.service.calculateObjectSize(task_plan)
|
||||
self.service.updateWorkflowStats(eventLabel="taskplan", bytesSent=task_plan_size)
|
||||
|
||||
# Create user-friendly task plan log
|
||||
tasks_count = len(task_plan.get('tasks', []))
|
||||
task_descriptions = "\n".join([f"- {task.get('description', 'No description')}" for task in task_plan.get('tasks', [])])
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Planning completed: {tasks_count} tasks identified",
|
||||
"message": f"Planning completed: {tasks_count} tasks identified\n{task_descriptions}",
|
||||
"type": "info",
|
||||
"status": "running",
|
||||
"progress": 15,
|
||||
|
|
@ -1598,22 +1621,29 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
logger.debug(f"TASK {i+1} ACTIONS CREATED: {json.dumps(task_actions_serializable, indent=2, ensure_ascii=False)}")
|
||||
|
||||
# Phase 3: Execute Task Actions
|
||||
logger.info(f"--- PHASE 3: EXECUTING ACTIONS FOR TASK {i+1} ---")
|
||||
logger.info(f"--- PHASE 3: EXECUTING TASK {i+1} ACTIONS ---")
|
||||
action_results = await self.executeTaskActions(task_actions, workflow)
|
||||
|
||||
# Update stats for action execution
|
||||
# Action stats are already handled by the service center during AI calls
|
||||
|
||||
# Create user-friendly action completion log with quality metrics
|
||||
successful_actions = sum(1 for result in action_results if result.get('status') == 'completed')
|
||||
total_actions = len(action_results)
|
||||
|
||||
if total_actions > 0:
|
||||
quality_percentage = (successful_actions / total_actions) * 100
|
||||
if successful_actions == total_actions:
|
||||
log_type = "success"
|
||||
elif successful_actions == 0:
|
||||
log_type = "error"
|
||||
else:
|
||||
log_type = "warning"
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Task {i+1} actions completed: {successful_actions}/{total_actions} successful ({quality_percentage:.0f}% quality)",
|
||||
"type": "success" if quality_percentage >= 80 else "warning" if quality_percentage >= 60 else "error",
|
||||
"message": f"Successful actions: {successful_actions}/{total_actions}",
|
||||
"type": log_type,
|
||||
"status": "running",
|
||||
"progress": progress + 10,
|
||||
"agentName": "System"
|
||||
"progress": progress + 10
|
||||
})
|
||||
|
||||
# Log action results (with metadata only)
|
||||
|
|
@ -1653,6 +1683,9 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
logger.info(f"--- PHASE 4: REVIEWING TASK {i+1} COMPLETION ---")
|
||||
review_result = await self.reviewTaskCompletion(task_step, task_actions, action_results, workflow)
|
||||
|
||||
# Update stats for task review
|
||||
# Task review stats are already handled by the service center during AI calls
|
||||
|
||||
# Create user-friendly review log with quality metrics
|
||||
quality_metrics = review_result.get('quality_metrics', {})
|
||||
quality_score = quality_metrics.get('score', 0)
|
||||
|
|
@ -1662,29 +1695,62 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
if review_status == 'success':
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Task {i+1} completed successfully (Quality: {quality_score:.0f}%, Confidence: {confidence:.0f}%)",
|
||||
"message": f"🎯 Task completed successfully with quality score {quality_score} and confidence {confidence}",
|
||||
"type": "success",
|
||||
"status": "running",
|
||||
"progress": progress + 20,
|
||||
"agentName": "System"
|
||||
"progress": progress + 20
|
||||
})
|
||||
elif review_status == 'retry':
|
||||
# Extract improvement details
|
||||
improvements = review_result.get('improvements', '')
|
||||
reason = review_result.get('reason', '')
|
||||
unmet_criteria = review_result.get('unmet_criteria', [])
|
||||
|
||||
# Build detailed message
|
||||
retry_details = []
|
||||
if reason:
|
||||
retry_details.append(f"Reason: {reason}")
|
||||
if improvements:
|
||||
retry_details.append(f"Improvements: {improvements}")
|
||||
if unmet_criteria:
|
||||
retry_details.append(f"Missing criteria: {', '.join(unmet_criteria[:3])}{'...' if len(unmet_criteria) > 3 else ''}")
|
||||
|
||||
retry_message = f"🔄 Task needs improvement"
|
||||
if retry_details:
|
||||
retry_message += f"\n{chr(10).join(retry_details)}"
|
||||
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Task {i+1} needs improvement (Quality: {quality_score:.0f}%, Confidence: {confidence:.0f}%)",
|
||||
"message": retry_message,
|
||||
"type": "warning",
|
||||
"status": "running",
|
||||
"progress": progress + 15,
|
||||
"agentName": "System"
|
||||
"progress": progress + 15
|
||||
})
|
||||
else:
|
||||
# Extract failure details
|
||||
reason = review_result.get('reason', '')
|
||||
unmet_criteria = review_result.get('unmet_criteria', [])
|
||||
missing_outputs = review_result.get('missing_outputs', [])
|
||||
|
||||
# Build detailed failure message
|
||||
failure_details = []
|
||||
if reason:
|
||||
failure_details.append(f"Reason: {reason}")
|
||||
if unmet_criteria:
|
||||
failure_details.append(f"Unmet criteria: {', '.join(unmet_criteria[:3])}{'...' if len(unmet_criteria) > 3 else ''}")
|
||||
if missing_outputs:
|
||||
failure_details.append(f"Missing outputs: {', '.join(missing_outputs[:3])}{'...' if len(missing_outputs) > 3 else ''}")
|
||||
|
||||
failure_message = f"❌ Task failed"
|
||||
if failure_details:
|
||||
failure_message += f"\n{chr(10).join(failure_details)}"
|
||||
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Task {i+1} failed (Quality: {quality_score:.0f}%, Confidence: {confidence:.0f}%)",
|
||||
"message": failure_message,
|
||||
"type": "error",
|
||||
"status": "running",
|
||||
"progress": progress + 15,
|
||||
"agentName": "System"
|
||||
"progress": progress + 15
|
||||
})
|
||||
|
||||
# Log review result (with metadata only)
|
||||
|
|
@ -1724,7 +1790,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
previous_review_feedback = review_result.get('improvements', '')
|
||||
|
||||
retry_count += 1
|
||||
if retry_count >= max_retries:
|
||||
if retry_count > max_retries:
|
||||
logger.error(f"Task {i+1} failed after {max_retries} retries")
|
||||
task_success = False
|
||||
else:
|
||||
|
|
@ -1775,35 +1841,37 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
|
||||
# Final workflow summary
|
||||
successful_tasks = sum(1 for result in workflow_results if result.get('task_success', False))
|
||||
total_tasks = len(workflow_results)
|
||||
total_tasks = len(task_plan['tasks'])
|
||||
|
||||
# Final workflow stats are already handled by the service center during AI calls
|
||||
|
||||
# Calculate total processing time
|
||||
total_processing_time = time.time() - start_time
|
||||
|
||||
# Create final user-friendly completion log
|
||||
if successful_tasks == total_tasks:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Workflow completed successfully: {successful_tasks}/{total_tasks} tasks completed",
|
||||
"message": f"🎉 Workflow completed ({successful_tasks}/{total_tasks} tasks)",
|
||||
"type": "success",
|
||||
"status": "completed",
|
||||
"progress": 100,
|
||||
"agentName": "System"
|
||||
"progress": 100
|
||||
})
|
||||
elif successful_tasks > 0:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Workflow completed partially: {successful_tasks}/{total_tasks} tasks completed",
|
||||
"message": f"⚠️ Workflow partially completed ({successful_tasks}/{total_tasks} tasks)",
|
||||
"type": "warning",
|
||||
"status": "completed",
|
||||
"progress": 100,
|
||||
"agentName": "System"
|
||||
"progress": 100
|
||||
})
|
||||
else:
|
||||
self.chatInterface.createWorkflowLog({
|
||||
"workflowId": workflow.id,
|
||||
"message": f"Workflow failed: {successful_tasks}/{total_tasks} tasks completed",
|
||||
"message": f"❌ Workflow failed ({successful_tasks}/{total_tasks} tasks)",
|
||||
"type": "error",
|
||||
"status": "failed",
|
||||
"progress": 100,
|
||||
"agentName": "System"
|
||||
"progress": 100
|
||||
})
|
||||
|
||||
# Create serializable workflow results (with metadata only)
|
||||
|
|
@ -1836,7 +1904,8 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
|
|||
'documents_metadata': documents_metadata,
|
||||
'actionId': action_result.get('actionId', ''),
|
||||
'actionMethod': action_result.get('actionMethod', ''),
|
||||
'actionName': action_result.get('actionName', '')
|
||||
'actionName': action_result.get('actionName', ''),
|
||||
'success_indicator': 'documents' if len(documents_metadata) > 0 else 'text_result' if action_result.get('result', '').strip() else 'none'
|
||||
}
|
||||
action_results_metadata.append(action_result_metadata)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,73 +0,0 @@
|
|||
"""
|
||||
Document Manager Module for handling document operations and content extraction.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from modules.interfaces.interfaceChatModel import (
|
||||
ChatDocument,
|
||||
ExtractedContent
|
||||
)
|
||||
from modules.workflow.processorDocument import DocumentProcessor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DocumentManager:
|
||||
"""Manager for document operations and content extraction"""
|
||||
|
||||
def __init__(self, serviceContainer):
|
||||
self.service = serviceContainer
|
||||
# Create processor with service container for AI calls
|
||||
self._processor = DocumentProcessor(serviceContainer)
|
||||
|
||||
async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
||||
"""Extract content from ChatDocument using prompt"""
|
||||
try:
|
||||
# Extract file data from ChatDocument
|
||||
if document.data:
|
||||
fileData = document.data.encode('utf-8') if isinstance(document.data, str) else document.data
|
||||
else:
|
||||
# Try to get file data from service container if document has fileId
|
||||
if hasattr(document, 'fileId') and document.fileId:
|
||||
fileData = self.service.getFileData(document.fileId)
|
||||
else:
|
||||
logger.error(f"No file data available in document: {document}")
|
||||
raise ValueError("No file data available in document")
|
||||
|
||||
# Get filename and mime type from document
|
||||
filename = document.filename if hasattr(document, 'filename') else "document"
|
||||
mimeType = document.mimeType if hasattr(document, 'mimeType') else "application/octet-stream"
|
||||
|
||||
# Process with processor
|
||||
extractedContent = await self._processor.processFileData(
|
||||
fileData=fileData,
|
||||
filename=filename,
|
||||
mimeType=mimeType,
|
||||
base64Encoded=False,
|
||||
prompt=prompt
|
||||
)
|
||||
|
||||
# Update objectId to match document ID
|
||||
extractedContent.objectId = document.id
|
||||
extractedContent.objectType = "ChatDocument"
|
||||
|
||||
return extractedContent
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting from document: {str(e)}")
|
||||
raise
|
||||
|
||||
async def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, documentId: str = None) -> ExtractedContent:
|
||||
"""Extract content from file data directly using prompt"""
|
||||
try:
|
||||
return await self._processor.processFileData(
|
||||
fileData=fileData,
|
||||
filename=filename,
|
||||
mimeType=mimeType,
|
||||
base64Encoded=base64Encoded,
|
||||
prompt=prompt,
|
||||
documentId=documentId
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting from file data: {str(e)}")
|
||||
raise
|
||||
|
|
@ -20,9 +20,9 @@ def action(func):
|
|||
class MethodBase:
|
||||
"""Base class for all methods"""
|
||||
|
||||
def __init__(self, serviceContainer: Any):
|
||||
"""Initialize method with service container"""
|
||||
self.service = serviceContainer
|
||||
def __init__(self, serviceCenter: Any):
|
||||
"""Initialize method with service center"""
|
||||
self.service = serviceCenter
|
||||
self.name: str
|
||||
self.description: str
|
||||
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
||||
|
|
|
|||
|
|
@ -32,10 +32,10 @@ class FileProcessingError(Exception):
|
|||
class DocumentProcessor:
|
||||
"""Processor for handling document operations and content extraction."""
|
||||
|
||||
def __init__(self, serviceContainer=None):
|
||||
def __init__(self, serviceCenter=None):
|
||||
"""Initialize the document processor."""
|
||||
self._neutralizer = DataAnonymizer() if APP_CONFIG.get("ENABLE_CONTENT_NEUTRALIZATION", False) else None
|
||||
self._serviceContainer = serviceContainer
|
||||
self._serviceCenter = serviceCenter
|
||||
|
||||
self.supportedTypes: Dict[str, Callable[[bytes, str, str], Awaitable[List[ContentItem]]]] = {
|
||||
'text/plain': self._processText,
|
||||
|
|
@ -136,7 +136,7 @@ class DocumentProcessor:
|
|||
|
||||
# Detect content type if needed
|
||||
if mimeType == "application/octet-stream":
|
||||
mimeType = self._serviceContainer.detectContentTypeFromData(fileData, filename)
|
||||
mimeType = self._serviceCenter.detectContentTypeFromData(fileData, filename)
|
||||
|
||||
# Process document based on type
|
||||
if mimeType not in self.supportedTypes:
|
||||
|
|
@ -527,7 +527,7 @@ class DocumentProcessor:
|
|||
# chunk is already base64 encoded string from _processImage
|
||||
# Use the original prompt directly for images (no content embedding)
|
||||
logger.debug(f"Calling image AI service for MIME type: {mimeType}")
|
||||
processedContent = await self._serviceContainer.callAiImageBasic(prompt, chunk, mimeType)
|
||||
processedContent = await self._serviceCenter.callAiImageBasic(prompt, chunk, mimeType)
|
||||
else:
|
||||
# For text content, use text AI service
|
||||
# Neutralize content if neutralizer is enabled (only for text)
|
||||
|
|
@ -548,7 +548,7 @@ class DocumentProcessor:
|
|||
"""
|
||||
|
||||
logger.debug(f"Calling text AI service for MIME type: {mimeType}")
|
||||
processedContent = await self._serviceContainer.callAiTextBasic(aiPrompt, contentToProcess)
|
||||
processedContent = await self._serviceCenter.callAiTextBasic(aiPrompt, contentToProcess)
|
||||
|
||||
chunkResults.append(processedContent)
|
||||
except Exception as aiError:
|
||||
|
|
|
|||
|
|
@ -8,14 +8,14 @@ from modules.interfaces.interfaceAppModel import User, UserConnection
|
|||
from modules.interfaces.interfaceChatModel import (
|
||||
TaskStatus, ChatDocument, TaskItem, TaskAction, TaskResult,
|
||||
|
||||
ChatStat, ChatLog, ChatMessage, ChatWorkflow, DocumentExchange
|
||||
ChatStat, ChatLog, ChatMessage, ChatWorkflow, DocumentExchange, ExtractedContent
|
||||
)
|
||||
from modules.interfaces.interfaceAiCalls import AiCalls
|
||||
from modules.interfaces.interfaceChatObjects import getInterface as getChatObjects
|
||||
from modules.interfaces.interfaceChatModel import ActionResult
|
||||
from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects
|
||||
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
|
||||
from modules.workflow.managerDocument import DocumentManager
|
||||
from modules.workflow.processorDocument import DocumentProcessor
|
||||
from modules.workflow.methodBase import MethodBase
|
||||
import uuid
|
||||
import base64
|
||||
|
|
@ -23,8 +23,8 @@ import hashlib
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ServiceContainer:
|
||||
"""Service container that provides access to all services and their functions"""
|
||||
class ServiceCenter:
|
||||
"""Service center that provides access to all services and their functions"""
|
||||
|
||||
def __init__(self, currentUser: User, workflow: ChatWorkflow):
|
||||
# Core services
|
||||
|
|
@ -39,7 +39,7 @@ class ServiceContainer:
|
|||
self.interfaceComponent = getComponentObjects(currentUser)
|
||||
self.interfaceApp = getAppObjects(currentUser)
|
||||
self.interfaceAiCalls = AiCalls()
|
||||
self.documentManager = DocumentManager(self)
|
||||
self.documentProcessor = DocumentProcessor(self)
|
||||
|
||||
# Initialize methods catalog
|
||||
self.methods = {}
|
||||
|
|
@ -115,7 +115,7 @@ class ServiceContainer:
|
|||
def detectContentTypeFromData(self, fileData: bytes, filename: str) -> str:
|
||||
"""
|
||||
Detect content type from file data and filename.
|
||||
This method makes the MIME type detection function accessible through the service container.
|
||||
This method makes the MIME type detection function accessible through the service center.
|
||||
|
||||
Args:
|
||||
fileData: Raw file data as bytes
|
||||
|
|
@ -263,17 +263,11 @@ class ServiceContainer:
|
|||
|
||||
# ===== Functions =====
|
||||
|
||||
def extractContent(self, prompt: str, document: ChatDocument) -> str:
|
||||
def extractContent(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
||||
"""Extract content from document using prompt"""
|
||||
return self.documentManager.extractContentFromDocument(prompt, document)
|
||||
return self.extractContentFromDocument(prompt, document)
|
||||
|
||||
|
||||
async def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, documentId: str = None) -> str:
|
||||
"""Extract content from file data directly using prompt"""
|
||||
extracted_content = await self.documentManager.extractContentFromFileData(prompt, fileData, filename, mimeType, base64Encoded, documentId)
|
||||
# Convert ExtractedContent to string for backward compatibility
|
||||
if hasattr(extracted_content, 'contents'):
|
||||
return "\n".join([item.data for item in extracted_content.contents])
|
||||
return str(extracted_content)
|
||||
|
||||
def getMethodsCatalog(self) -> Dict[str, Any]:
|
||||
"""Get catalog of available methods and their actions"""
|
||||
|
|
@ -502,7 +496,7 @@ Instructions:
|
|||
Please provide a comprehensive summary of this conversation."""
|
||||
|
||||
# Get summary using AI
|
||||
return await self.interfaceAiCalls.callAiTextBasic(prompt)
|
||||
return await self.callAiTextBasic(prompt)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error summarizing chat: {str(e)}")
|
||||
|
|
@ -535,27 +529,81 @@ Instructions:
|
|||
Please provide a clear summary of this message."""
|
||||
|
||||
# Get summary using AI
|
||||
return await self.interfaceAiCalls.callAiTextBasic(prompt)
|
||||
return await self.callAiTextBasic(prompt)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error summarizing message: {str(e)}")
|
||||
return f"Error summarizing message: {str(e)}"
|
||||
|
||||
def callAiTextBasic(self, prompt: str, context: str = None) -> str:
|
||||
async def callAiTextBasic(self, prompt: str, context: str = None) -> str:
|
||||
"""Basic text processing using OpenAI"""
|
||||
return self.interfaceAiCalls.callAiTextBasic(prompt, context)
|
||||
# Calculate prompt size for stats
|
||||
prompt_size = self.calculateObjectSize(prompt)
|
||||
if context:
|
||||
prompt_size += self.calculateObjectSize(context)
|
||||
|
||||
def callAiTextAdvanced(self, prompt: str, context: str = None) -> str:
|
||||
# Call AI
|
||||
response = await self.interfaceAiCalls.callAiTextBasic(prompt, context)
|
||||
|
||||
# Calculate response size for stats
|
||||
response_size = self.calculateObjectSize(response)
|
||||
|
||||
# Update stats
|
||||
self.updateWorkflowStats(eventLabel="aicall.openai.text", bytesSent=prompt_size, bytesReceived=response_size)
|
||||
|
||||
return response
|
||||
|
||||
async def callAiTextAdvanced(self, prompt: str, context: str = None) -> str:
|
||||
"""Advanced text processing using Anthropic"""
|
||||
return self.interfaceAiCalls.callAiTextAdvanced(prompt, context)
|
||||
# Calculate prompt size for stats
|
||||
prompt_size = self.calculateObjectSize(prompt)
|
||||
if context:
|
||||
prompt_size += self.calculateObjectSize(context)
|
||||
|
||||
def callAiImageBasic(self, prompt: str, imageData: str, mimeType: str) -> str:
|
||||
# Call AI
|
||||
response = await self.interfaceAiCalls.callAiTextAdvanced(prompt, context)
|
||||
|
||||
# Calculate response size for stats
|
||||
response_size = self.calculateObjectSize(response)
|
||||
|
||||
# Update stats
|
||||
self.updateWorkflowStats(eventLabel="aicall.anthropic.text", bytesSent=prompt_size, bytesReceived=response_size)
|
||||
|
||||
return response
|
||||
|
||||
async def callAiImageBasic(self, prompt: str, imageData: str, mimeType: str) -> str:
|
||||
"""Basic image processing using OpenAI"""
|
||||
return self.interfaceAiCalls.callAiImageBasic(prompt, imageData, mimeType)
|
||||
# Calculate prompt size for stats
|
||||
prompt_size = self.calculateObjectSize(prompt)
|
||||
prompt_size += self.calculateObjectSize(imageData)
|
||||
|
||||
def callAiImageAdvanced(self, prompt: str, imageData: str, mimeType: str) -> str:
|
||||
# Call AI
|
||||
response = await self.interfaceAiCalls.callAiImageBasic(prompt, imageData, mimeType)
|
||||
|
||||
# Calculate response size for stats
|
||||
response_size = self.calculateObjectSize(response)
|
||||
|
||||
# Update stats
|
||||
self.updateWorkflowStats(eventLabel="aicall.openai.image", bytesSent=prompt_size, bytesReceived=response_size)
|
||||
|
||||
return response
|
||||
|
||||
async def callAiImageAdvanced(self, prompt: str, imageData: str, mimeType: str) -> str:
|
||||
"""Advanced image processing using Anthropic"""
|
||||
return self.interfaceAiCalls.callAiImageAdvanced(prompt, imageData, mimeType)
|
||||
# Calculate prompt size for stats
|
||||
prompt_size = self.calculateObjectSize(prompt)
|
||||
prompt_size += self.calculateObjectSize(imageData)
|
||||
|
||||
# Call AI
|
||||
response = await self.interfaceAiCalls.callAiImageAdvanced(prompt, imageData, mimeType)
|
||||
|
||||
# Calculate response size for stats
|
||||
response_size = self.calculateObjectSize(response)
|
||||
|
||||
# Update stats
|
||||
self.updateWorkflowStats(eventLabel="aicall.anthropic.image", bytesSent=prompt_size, bytesReceived=response_size)
|
||||
|
||||
return response
|
||||
|
||||
def getFileInfo(self, fileId: str) -> Dict[str, Any]:
|
||||
"""Get file information"""
|
||||
|
|
@ -575,6 +623,59 @@ Please provide a clear summary of this message."""
|
|||
"""Get file data by ID"""
|
||||
return self.interfaceComponent.getFileData(fileId)
|
||||
|
||||
async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
||||
"""Extract content from ChatDocument using prompt"""
|
||||
try:
|
||||
# Extract file data from ChatDocument
|
||||
if document.data:
|
||||
fileData = document.data.encode('utf-8') if isinstance(document.data, str) else document.data
|
||||
else:
|
||||
# Try to get file data from service center if document has fileId
|
||||
if hasattr(document, 'fileId') and document.fileId:
|
||||
fileData = self.getFileData(document.fileId)
|
||||
else:
|
||||
logger.error(f"No file data available in document: {document}")
|
||||
raise ValueError("No file data available in document")
|
||||
|
||||
# Get filename and mime type from document
|
||||
filename = document.filename if hasattr(document, 'filename') else "document"
|
||||
mimeType = document.mimeType if hasattr(document, 'mimeType') else "application/octet-stream"
|
||||
|
||||
# Process with document processor directly
|
||||
extractedContent = await self.documentProcessor.processFileData(
|
||||
fileData=fileData,
|
||||
filename=filename,
|
||||
mimeType=mimeType,
|
||||
base64Encoded=False,
|
||||
prompt=prompt,
|
||||
documentId=document.id
|
||||
)
|
||||
|
||||
# Update objectId to match document ID
|
||||
extractedContent.objectId = document.id
|
||||
extractedContent.objectType = "ChatDocument"
|
||||
|
||||
return extractedContent
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting from document: {str(e)}")
|
||||
raise
|
||||
|
||||
async def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, documentId: str = None) -> ExtractedContent:
|
||||
"""Extract content from file data directly using prompt"""
|
||||
try:
|
||||
return await self.documentProcessor.processFileData(
|
||||
prompt=prompt,
|
||||
fileData=fileData,
|
||||
filename=filename,
|
||||
mimeType=mimeType,
|
||||
base64Encoded=base64Encoded,
|
||||
documentId=documentId
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting from file data: {str(e)}")
|
||||
raise
|
||||
|
||||
def createFile(self, fileName: str, mimeType: str, content: str, base64encoded: bool = False) -> str:
|
||||
"""Create new file and return its ID"""
|
||||
# Convert content to bytes based on base64 flag
|
||||
|
|
@ -613,29 +714,85 @@ Please provide a clear summary of this message."""
|
|||
mimeType=mimeType
|
||||
)
|
||||
|
||||
def extractTextFromContentObjects(self, content_objects: List[Any]) -> List[str]:
|
||||
def updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None:
|
||||
"""
|
||||
Extract text content from ExtractedContent objects or other content objects.
|
||||
Centralized function to update workflow statistics in database and running workflow.
|
||||
|
||||
Args:
|
||||
content_objects: List of ExtractedContent objects or other content objects
|
||||
eventLabel: Label for the event (e.g., "userinput", "taskplan", "action", "aicall<ainame>")
|
||||
bytesSent: Bytes sent (incremental)
|
||||
bytesReceived: Bytes received (incremental)
|
||||
tokenCount: Token count (incremental, default 0)
|
||||
"""
|
||||
try:
|
||||
if hasattr(self, 'workflow') and self.workflow:
|
||||
# Update the running workflow stats
|
||||
self.interfaceChat.updateWorkflowStats(
|
||||
self.workflow.id,
|
||||
bytesSent=bytesSent,
|
||||
bytesReceived=bytesReceived
|
||||
)
|
||||
|
||||
# Log the stats event
|
||||
logger.debug(f"Workflow stats updated - Event: {eventLabel}, Sent: {bytesSent}, Received: {bytesReceived}, Tokens: {tokenCount}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating workflow stats: {str(e)}")
|
||||
|
||||
def calculateObjectSize(self, obj: Any) -> int:
|
||||
"""
|
||||
Calculate the size of an object in bytes.
|
||||
|
||||
Args:
|
||||
obj: Object to calculate size for
|
||||
|
||||
Returns:
|
||||
List of extracted text strings
|
||||
int: Size in bytes
|
||||
"""
|
||||
text_contents = []
|
||||
for content_obj in content_objects:
|
||||
if hasattr(content_obj, 'contents') and content_obj.contents:
|
||||
# Extract text from ContentItem objects
|
||||
for content_item in content_obj.contents:
|
||||
if hasattr(content_item, 'data') and content_item.data:
|
||||
text_contents.append(content_item.data)
|
||||
elif isinstance(content_obj, str):
|
||||
text_contents.append(content_obj)
|
||||
else:
|
||||
# Fallback: convert to string representation
|
||||
text_contents.append(str(content_obj))
|
||||
return text_contents
|
||||
try:
|
||||
import json
|
||||
import sys
|
||||
|
||||
if obj is None:
|
||||
return 0
|
||||
|
||||
# Convert object to JSON string and calculate size
|
||||
json_str = json.dumps(obj, ensure_ascii=False, default=str)
|
||||
return len(json_str.encode('utf-8'))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating object size: {str(e)}")
|
||||
return 0
|
||||
|
||||
def calculateUserInputSize(self, userInput: Any) -> int:
|
||||
"""
|
||||
Calculate size of user input including file sizes.
|
||||
|
||||
Args:
|
||||
userInput: User input object
|
||||
|
||||
Returns:
|
||||
int: Total size in bytes
|
||||
"""
|
||||
try:
|
||||
total_size = 0
|
||||
|
||||
# Calculate base user input size
|
||||
if hasattr(userInput, 'prompt'):
|
||||
total_size += self.calculateObjectSize(userInput.prompt)
|
||||
|
||||
# Add file sizes if present
|
||||
if hasattr(userInput, 'listFileId') and userInput.listFileId:
|
||||
for fileId in userInput.listFileId:
|
||||
file_info = self.getFileInfo(fileId)
|
||||
if file_info:
|
||||
total_size += file_info.get('size', 0)
|
||||
|
||||
return total_size
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating user input size: {str(e)}")
|
||||
return 0
|
||||
|
||||
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
|
||||
"""Execute a method action"""
|
||||
|
|
@ -659,9 +816,9 @@ Please provide a clear summary of this message."""
|
|||
# Create singleton instance
|
||||
serviceObject = None
|
||||
|
||||
def initializeServiceContainer(currentUser: User, workflow: ChatWorkflow) -> ServiceContainer:
|
||||
"""Initialize the service container singleton"""
|
||||
def initializeServiceCenter(currentUser: User, workflow: ChatWorkflow) -> ServiceCenter:
|
||||
"""Initialize the service center singleton"""
|
||||
global serviceObject
|
||||
if serviceObject is None:
|
||||
serviceObject = ServiceContainer(currentUser, workflow)
|
||||
serviceObject = ServiceCenter(currentUser, workflow)
|
||||
return serviceObject
|
||||
|
|
@ -1,226 +0,0 @@
|
|||
# Workflow Architecture Documentation
|
||||
|
||||
## Overview
|
||||
|
||||
The workflow system has been refactored into a clear, structured approach with 5 distinct phases. This eliminates redundancies and provides better error handling, quality assessment, and maintainability.
|
||||
|
||||
## Architecture Principles
|
||||
|
||||
### 1. **Clear Phase Separation**
|
||||
Each workflow phase has a specific responsibility and clear inputs/outputs.
|
||||
|
||||
### 2. **Unified Data Model**
|
||||
Standardized on `TaskAction` objects throughout the system.
|
||||
|
||||
### 3. **Consistent Prompt Generation**
|
||||
All AI interactions use dedicated prompt generation functions.
|
||||
|
||||
### 4. **Quality Assessment**
|
||||
Each task is reviewed before proceeding to the next.
|
||||
|
||||
## Workflow Phases
|
||||
|
||||
### **Phase 1: High-Level Task Planning**
|
||||
**Function:** `planHighLevelTasks()`
|
||||
**Purpose:** Analyze user request and create a structured task plan
|
||||
**Input:** User input, available documents
|
||||
**Output:** Task plan with multiple task steps
|
||||
**Prompt Function:** `_createTaskPlanningPrompt()`
|
||||
|
||||
```python
|
||||
task_plan = await chatManager.planHighLevelTasks(userInput, workflow)
|
||||
```
|
||||
|
||||
### **Phase 2: Task Definition and Action Generation**
|
||||
**Function:** `defineTaskActions()`
|
||||
**Purpose:** Define specific actions for each task step
|
||||
**Input:** Task step, workflow context, previous results
|
||||
**Output:** List of TaskAction objects
|
||||
**Prompt Function:** `_createActionDefinitionPrompt()`
|
||||
|
||||
```python
|
||||
task_actions = await chatManager.defineTaskActions(task_step, workflow, previous_results)
|
||||
```
|
||||
|
||||
### **Phase 3: Action Execution**
|
||||
**Function:** `executeTaskActions()`
|
||||
**Purpose:** Execute all actions for a task step
|
||||
**Input:** List of TaskAction objects
|
||||
**Output:** List of action results
|
||||
**Prompt Function:** `_createActionExecutionPrompt()`
|
||||
|
||||
```python
|
||||
action_results = await chatManager.executeTaskActions(task_actions, workflow)
|
||||
```
|
||||
|
||||
### **Phase 4: Task Review and Quality Assessment**
|
||||
**Function:** `reviewTaskCompletion()`
|
||||
**Purpose:** Review task completion and decide next steps
|
||||
**Input:** Task step, actions, results
|
||||
**Output:** Review result with quality metrics
|
||||
**Prompt Function:** `_createResultReviewPrompt()`
|
||||
|
||||
```python
|
||||
review_result = await chatManager.reviewTaskCompletion(task_step, task_actions, action_results, workflow)
|
||||
```
|
||||
|
||||
### **Phase 5: Task Handover and State Management**
|
||||
**Function:** `prepareTaskHandover()`
|
||||
**Purpose:** Prepare results for next task or workflow completion
|
||||
**Input:** Task step, actions, review result
|
||||
**Output:** Handover data for next iteration
|
||||
**Prompt Function:** None (data processing only)
|
||||
|
||||
```python
|
||||
handover_data = await chatManager.prepareTaskHandover(task_step, task_actions, review_result, workflow)
|
||||
```
|
||||
|
||||
## Unified Workflow Execution
|
||||
|
||||
### **Main Entry Point**
|
||||
**Function:** `executeUnifiedWorkflow()`
|
||||
**Purpose:** Orchestrate all phases in sequence
|
||||
**Input:** User input, workflow
|
||||
**Output:** Complete workflow results
|
||||
|
||||
```python
|
||||
workflow_result = await chatManager.executeUnifiedWorkflow(userInput.prompt, workflow)
|
||||
```
|
||||
|
||||
### **Workflow Flow**
|
||||
```
|
||||
1. planHighLevelTasks() → Task Plan
|
||||
2. For each task step:
|
||||
├── defineTaskActions() → Task Actions
|
||||
├── executeTaskActions() → Action Results
|
||||
├── reviewTaskCompletion() → Review Result
|
||||
└── prepareTaskHandover() → Handover Data
|
||||
3. Return workflow summary
|
||||
```
|
||||
|
||||
## Prompt Generation Functions
|
||||
|
||||
| **Function** | **Used In** | **Purpose** |
|
||||
|-------------|-------------|-------------|
|
||||
| `_createTaskPlanningPrompt()` | `planHighLevelTasks()` | Generate high-level task plan |
|
||||
| `_createActionDefinitionPrompt()` | `defineTaskActions()` | Generate specific actions for task |
|
||||
| `_createActionExecutionPrompt()` | `executeTaskActions()` | Execute individual actions |
|
||||
| `_createResultReviewPrompt()` | `reviewTaskCompletion()` | Review task completion |
|
||||
|
||||
## Data Models
|
||||
|
||||
### **TaskAction Object**
|
||||
```python
|
||||
class TaskAction:
|
||||
id: str
|
||||
execMethod: str
|
||||
execAction: str
|
||||
execParameters: Dict[str, Any]
|
||||
execResultLabel: Optional[str]
|
||||
status: TaskStatus
|
||||
error: Optional[str]
|
||||
result: Optional[str]
|
||||
# ... other fields
|
||||
```
|
||||
|
||||
### **Workflow Result Structure**
|
||||
```python
|
||||
{
|
||||
'status': 'completed' | 'partial' | 'failed',
|
||||
'successful_tasks': int,
|
||||
'total_tasks': int,
|
||||
'workflow_results': List[Dict],
|
||||
'final_results': List[str]
|
||||
}
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
### **Phase-Level Error Handling**
|
||||
Each phase has its own error handling:
|
||||
- **Planning:** Fallback to basic task plan
|
||||
- **Definition:** Skip task if no actions defined
|
||||
- **Execution:** Stop on first action failure
|
||||
- **Review:** Default to success to avoid blocking
|
||||
- **Handover:** Provide empty results on error
|
||||
|
||||
### **Circuit Breaker Pattern**
|
||||
AI calls use circuit breaker pattern to prevent cascading failures.
|
||||
|
||||
## Quality Metrics
|
||||
|
||||
### **Task Quality Assessment**
|
||||
- Success rate of actions
|
||||
- Completion of expected outputs
|
||||
- Meeting of success criteria
|
||||
- Confidence scores
|
||||
|
||||
### **Workflow Quality Metrics**
|
||||
- Overall success rate
|
||||
- Task completion percentage
|
||||
- Error patterns and suggestions
|
||||
|
||||
## Benefits of Refactored Architecture
|
||||
|
||||
### **1. Clear Separation of Concerns**
|
||||
Each phase has a single responsibility and clear interfaces.
|
||||
|
||||
### **2. Better Error Handling**
|
||||
Granular error handling at each phase with appropriate fallbacks.
|
||||
|
||||
### **3. Quality Assessment**
|
||||
Built-in review and quality metrics for each task.
|
||||
|
||||
### **4. Maintainability**
|
||||
Consistent patterns and unified data models.
|
||||
|
||||
### **5. Extensibility**
|
||||
Easy to add new phases or modify existing ones.
|
||||
|
||||
### **6. Debugging**
|
||||
Clear logging and error reporting at each phase.
|
||||
|
||||
## Migration Path
|
||||
|
||||
### **Legacy Methods**
|
||||
All legacy methods are preserved for backward compatibility:
|
||||
- `createInitialTask()`
|
||||
- `createNextTask()`
|
||||
- `executeTask()`
|
||||
- `executeAction()`
|
||||
|
||||
### **New Unified Approach**
|
||||
Use `executeUnifiedWorkflow()` for new implementations.
|
||||
|
||||
## Usage Example
|
||||
|
||||
```python
|
||||
# Initialize chat manager
|
||||
await chatManager.initialize(workflow)
|
||||
|
||||
# Execute unified workflow
|
||||
workflow_result = await chatManager.executeUnifiedWorkflow(userInput.prompt, workflow)
|
||||
|
||||
# Process results
|
||||
if workflow_result['status'] == 'completed':
|
||||
print(f"Workflow completed: {workflow_result['successful_tasks']}/{workflow_result['total_tasks']} tasks")
|
||||
else:
|
||||
print(f"Workflow failed: {workflow_result['error']}")
|
||||
```
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
### **1. Retry Logic**
|
||||
Add exponential backoff retry for failed tasks.
|
||||
|
||||
### **2. Alternative Approaches**
|
||||
When primary method fails, try alternative approaches.
|
||||
|
||||
### **3. Parallel Execution**
|
||||
Execute independent tasks in parallel.
|
||||
|
||||
### **4. Progress Tracking**
|
||||
Real-time progress updates during workflow execution.
|
||||
|
||||
### **5. Rollback Mechanisms**
|
||||
Undo failed operations and restore previous state.
|
||||
|
|
@ -993,95 +993,6 @@ Ich möchte den agentenchat workflow ändern. kannst du mir bitte dazu in einem
|
|||
|
||||
3. Neue Objektstruktur für den workflow ablauf:
|
||||
|
||||
workflow =
|
||||
{
|
||||
// Core workflow properties
|
||||
"id": "workflow_uuid",
|
||||
"name": "Analysis Workflow",
|
||||
"mandate_id": 123,
|
||||
"user_id": 456,
|
||||
"status": "running", // "running", "failed", "stopped"
|
||||
"started_at": "2025-03-29T14:15:00.000Z",
|
||||
"last_activity": "2025-03-29T14:45:00.000Z",
|
||||
"current_round": 1,
|
||||
"waiting_for_user": false,
|
||||
|
||||
// Performance statistics (sum)
|
||||
"data_stats": {
|
||||
"total_processing_time": 3.9,
|
||||
"total_token_count": 857,
|
||||
"total_bytes_semt": 1026323,
|
||||
"total_bytes_received": 4200,
|
||||
}
|
||||
|
||||
// Messages array - main conversation history with structured message objects
|
||||
"messages": [],
|
||||
|
||||
// Logs
|
||||
"logs": [
|
||||
{
|
||||
"id": "log_uuid1",
|
||||
"message": "Workflow started",
|
||||
"type": "info",
|
||||
"timestamp": "2025-03-29T14:15:00.000Z"
|
||||
}
|
||||
]
|
||||
|
||||
}
|
||||
|
||||
"messages": [
|
||||
{
|
||||
// Core message properties
|
||||
"id": "msg_uuid", // Unique identifier for each message
|
||||
"workflow_id": "workflow_uuid", // Reference to the parent workflow
|
||||
"parent_message_id": "msg_previous_uuid", // Reference to message being responded to
|
||||
"started_at": "2025-03-29T14:30:00.000Z", // Single timestamp for message creation
|
||||
"finished_at": "2025-03-29T14:30:00.000Z", // Single timestamp for message closing, when next message is created
|
||||
"sequence_no": 1, // Optional, but useful for ordering within workflow
|
||||
|
||||
// Status information
|
||||
"status": "completed", // message status: "pending", "processing", "completed", "failed"
|
||||
|
||||
// Role instead of agent information
|
||||
"role": "system", // "system", "user", "assistant" - who created this message
|
||||
|
||||
// Metadata for statistics and accounting
|
||||
"data_stats": {
|
||||
"processing_time": 2.5, // Time taken to generate in seconds
|
||||
"token_count": 1205, // Token count (for AI models)
|
||||
"bytes_sent": 4096, // Data sent to generate this message
|
||||
"bytes_received": 8192, // Data received
|
||||
}
|
||||
|
||||
// Documents section - includes prompt and all referenced files
|
||||
"documents": [
|
||||
{
|
||||
// Document metadata
|
||||
"id": "doc_uuid",
|
||||
"source": {
|
||||
"type": "prompt", // "prompt", "file", "clipboard"
|
||||
"path": "/full/path/to/file.txt", // Storage path (for files)
|
||||
"name": "display_filename.txt",
|
||||
"size": 1024, // Size in bytes
|
||||
"lines": 42, // Line count (for text files)
|
||||
"content_type": "text/plain", // MIME type
|
||||
"upload_date": "2025-03-29T14:30:00.000Z"
|
||||
},
|
||||
|
||||
// Document contents (can have multiple parts)
|
||||
"contents": [
|
||||
{
|
||||
"label": "Main Content", // Optional label
|
||||
"type": "text", // "text", "image", "chart", etc.
|
||||
"text": "The actual text content",
|
||||
"is_extracted": true // Flag if this is extracted from original file
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
4. Die Schritte in einem Workflow (neu) - bitte den code revidieren und alle unnötigen teile entfernen.
|
||||
|
|
|
|||
|
|
@ -1,187 +0,0 @@
|
|||
# Document Management Refactoring Specification
|
||||
|
||||
## Overview
|
||||
This specification outlines the refactoring of document management in the system, focusing on proper model separation, centralized content extraction, and future-proof neutralization integration.
|
||||
|
||||
## Model Structure
|
||||
|
||||
### Base Document Models
|
||||
```python
|
||||
class ContentMetadata(BaseModel, ModelMixin):
|
||||
"""Metadata for content items"""
|
||||
size: int = Field(description="Content size in bytes")
|
||||
pages: Optional[int] = Field(None, description="Number of pages for multi-page content")
|
||||
error: Optional[str] = Field(None, description="Processing error if any")
|
||||
# Media-specific attributes
|
||||
width: Optional[int] = Field(None, description="Width in pixels for images/videos")
|
||||
height: Optional[int] = Field(None, description="Height in pixels for images/videos")
|
||||
colorMode: Optional[str] = Field(None, description="Color mode (e.g., RGB, CMYK, grayscale)")
|
||||
fps: Optional[float] = Field(None, description="Frames per second for videos")
|
||||
durationSec: Optional[float] = Field(None, description="Duration in seconds for videos/audio")
|
||||
|
||||
class ContentItem(BaseModel, ModelMixin):
|
||||
"""Individual content item from a document"""
|
||||
label: str = Field(description="Content label (e.g., tab name, tag name)")
|
||||
data: str = Field(description="Text content")
|
||||
metadata: ContentMetadata = Field(description="Content metadata")
|
||||
|
||||
class ChatDocument(BaseModel, ModelMixin):
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
fileId: str
|
||||
filename: str
|
||||
fileSize: int
|
||||
mimeType: str
|
||||
|
||||
class TaskDocument(BaseModel, ModelMixin):
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
filename: str
|
||||
fileSize: int
|
||||
mimeType: str
|
||||
data: str # Base64 encoded file data
|
||||
|
||||
class ExtractedContent(BaseModel, ModelMixin):
|
||||
objectId: str # Reference to source document
|
||||
objectType: str = Field(description="Type of source object ('ChatDocument' or 'TaskDocument')")
|
||||
contents: List[ContentItem]
|
||||
```
|
||||
|
||||
## Service Layer Structure
|
||||
|
||||
### Document Service
|
||||
```python
|
||||
class DocumentService:
|
||||
def __init__(self, service_container):
|
||||
self.service = service_container
|
||||
self.neutralizer_enabled = False # Flag for neutralization feature
|
||||
|
||||
async def extractFromChatDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
|
||||
"""
|
||||
Extract content from a ChatDocument by converting it to TaskDocument first.
|
||||
"""
|
||||
# Convert ChatDocument to TaskDocument
|
||||
task_doc = await self._convertToTaskDocument(document)
|
||||
return await self.getDocumentContent(task_doc, prompt)
|
||||
|
||||
async def extractFromTaskDocument(self, prompt: str, document: TaskDocument) -> ExtractedContent:
|
||||
"""
|
||||
Extract content directly from a TaskDocument.
|
||||
"""
|
||||
return await self.getDocumentContent(document, prompt)
|
||||
|
||||
async def getDocumentContent(self, document: TaskDocument, prompt: str) -> ExtractedContent:
|
||||
"""
|
||||
Helper function for centralized content extraction.
|
||||
Handles the actual content extraction and optional neutralization.
|
||||
"""
|
||||
# Extract content based on mimeType
|
||||
content = await self._extractRawContent(document)
|
||||
|
||||
# Apply neutralization if enabled
|
||||
if self.neutralizer_enabled:
|
||||
from modules.neutralizer import neutralizer
|
||||
content = await neutralizer.process_content(content)
|
||||
|
||||
# Process content with AI using prompt
|
||||
processed_content = await self._processWithAI(content, prompt)
|
||||
|
||||
return ExtractedContent(
|
||||
objectId=document.id,
|
||||
objectType="TaskDocument",
|
||||
contents=processed_content
|
||||
)
|
||||
```
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
1. **Model Cleanup**
|
||||
- Create new model classes in `interfaceChatModel.py`
|
||||
- Remove deprecated models:
|
||||
- DocumentExtraction
|
||||
- DocumentContext
|
||||
- ProcessedDocument
|
||||
- ChatContent (replaced by ContentItem)
|
||||
- Update ChatDocument to remove contents attribute
|
||||
- Convert all snake_case to camelCase in manager*.py and method*.py
|
||||
|
||||
2. **Service Implementation**
|
||||
- Create new `DocumentService` class in `serviceDocument.py`
|
||||
- Implement the three main methods:
|
||||
- extractFromChatDocument
|
||||
- extractFromTaskDocument
|
||||
- getDocumentContent (helper function)
|
||||
- Add neutralization integration with feature flag
|
||||
|
||||
3. **UserInput Processing**
|
||||
- Update `UserInputRequest` processing to use `ChatMessage`
|
||||
- Implement `processFileIds` in `interfaceChatObjects`
|
||||
- Update all references to use new model structure
|
||||
|
||||
4. **Method Module Updates**
|
||||
- Update all method*.py modules to use new service layer
|
||||
- Remove direct file access
|
||||
- Implement proper error handling and logging
|
||||
|
||||
5. **Testing and Validation**
|
||||
- Create unit tests for new models and services
|
||||
- Test document processing with various file types
|
||||
- Validate content extraction and neutralization
|
||||
- Test error handling and edge cases
|
||||
|
||||
## Files to be Removed/Modified
|
||||
|
||||
### To be Removed
|
||||
1. `DocumentExtraction` class from interfaceChatModel.py
|
||||
2. `DocumentContext` class from interfaceChatModel.py
|
||||
3. `ProcessedDocument` class from interfaceChatModel.py
|
||||
4. `ChatContent` class from interfaceChatModel.py
|
||||
5. Direct file access methods from method*.py modules
|
||||
|
||||
### To be Modified
|
||||
1. `interfaceChatModel.py`
|
||||
- Add new model classes
|
||||
- Remove deprecated classes
|
||||
- Update existing classes
|
||||
|
||||
2. `managerDocument.py`
|
||||
- Move core functionality to DocumentService
|
||||
- Update to use new model structure
|
||||
- Remove redundant methods
|
||||
|
||||
3. `method*.py` modules
|
||||
- Update to use DocumentService
|
||||
- Remove direct file access
|
||||
- Update error handling
|
||||
|
||||
4. `interfaceChatObjects.py`
|
||||
- Implement processFileIds
|
||||
- Update document handling
|
||||
|
||||
## Neutralization Integration
|
||||
|
||||
The neutralization feature is integrated into the `getDocumentContent` method with a feature flag. When enabled, it will process content through the neutralizer before sending it to AI processing.
|
||||
|
||||
```python
|
||||
# In getDocumentContent method
|
||||
if self.neutralizer_enabled:
|
||||
from modules.neutralizer import neutralizer
|
||||
content = await neutralizer.process_content(content)
|
||||
```
|
||||
|
||||
This allows for easy enabling/disabling of the feature and future expansion of neutralization capabilities.
|
||||
|
||||
## Migration Strategy
|
||||
|
||||
1. Create new models and services
|
||||
2. Implement new functionality alongside existing code
|
||||
3. Gradually migrate method modules to use new services
|
||||
4. Remove deprecated code once migration is complete
|
||||
5. Enable neutralization feature when ready
|
||||
|
||||
## Testing Requirements
|
||||
|
||||
1. Unit tests for all new model classes
|
||||
2. Integration tests for DocumentService
|
||||
3. Tests for content extraction with various file types
|
||||
4. Tests for neutralization integration
|
||||
5. Performance tests for large file handling
|
||||
6. Error handling and edge case tests
|
||||
|
|
@ -5,10 +5,10 @@
|
|||
### 1.1 Core Components
|
||||
- **WorkflowManager**: Orchestrates the overall workflow process
|
||||
- **ChatManager**: Manages chat interactions and task execution
|
||||
- **ServiceContainer**: Central state and context management
|
||||
- **ServiceCenter**: Central state and context management
|
||||
- **AgentTask**: Core data object for task execution
|
||||
|
||||
### 1.2 Service Container Structure
|
||||
### 1.2 Service center Structure
|
||||
```python
|
||||
from enum import Enum
|
||||
from typing import Dict, List, Optional, Any, Literal
|
||||
|
|
@ -161,8 +161,8 @@ class AgentTask(BaseModel):
|
|||
"""Check if any action has failed"""
|
||||
return any(a.status == ActionStatus.FAILED for a in self.actionList)
|
||||
|
||||
class ServiceContainer:
|
||||
"""Service container with improved state management"""
|
||||
class ServiceCenter:
|
||||
"""Service center with improved state management"""
|
||||
|
||||
def __init__(self):
|
||||
self.state = {
|
||||
|
|
@ -481,7 +481,7 @@ class AgentTask:
|
|||
graph TD
|
||||
A[User Input] --> B[WorkflowManager.workflowProcess]
|
||||
B --> C[ChatManager.initialize]
|
||||
C --> D[Create ServiceContainer]
|
||||
C --> D[Create ServiceCenter]
|
||||
D --> E[Create Initial Task]
|
||||
```
|
||||
|
||||
|
|
@ -491,7 +491,7 @@ graph TD
|
|||
- Starts task processing loop
|
||||
|
||||
2. **ChatManager.initialize**
|
||||
- Creates ServiceContainer with all required components
|
||||
- Creates ServiceCenter with all required components
|
||||
- Initializes service interfaces
|
||||
- Sets up task and state management
|
||||
|
||||
|
|
@ -675,7 +675,7 @@ graph TD
|
|||
### 3.1 Method Registration
|
||||
```python
|
||||
def _registerMethods(self):
|
||||
"""Register available methods in service container"""
|
||||
"""Register available methods in service center"""
|
||||
self.service.methods = {
|
||||
"sharepoint": MethodSharepoint(self.service),
|
||||
"outlook": MethodOutlook(self.service),
|
||||
|
|
@ -862,7 +862,6 @@ gateway/
|
|||
│ │ ├── managerChat.py # Chat management and AI response validation
|
||||
│ │ ├── managerPrompt.py # AI prompt generation and management
|
||||
│ │ ├── methodBase.py # Base method class with result validation
|
||||
│ │ ├── managerDocument.py # Document operations management
|
||||
│ │ └── processorDocument.py # Document content extraction
|
||||
│ │
|
||||
│ ├── agents/ # To be refactored into methods
|
||||
|
|
@ -917,7 +916,7 @@ gateway/
|
|||
#### Phase 3: Manager Updates
|
||||
1. **Chat Manager Enhancement**
|
||||
- Integrate AI response validation
|
||||
- Update service container structure
|
||||
- Update service center structure
|
||||
- Improve error handling
|
||||
|
||||
2. **Document Manager Integration**
|
||||
|
|
|
|||
|
|
@ -1,31 +0,0 @@
|
|||
# PowerShell script to run document extraction test
|
||||
# Usage: .\run_document_test.ps1 [file_path]
|
||||
|
||||
param(
|
||||
[string]$FilePath = "test_sample_document.txt"
|
||||
)
|
||||
|
||||
Write-Host "=== PowerOn Document Extraction Test ===" -ForegroundColor Green
|
||||
Write-Host ""
|
||||
|
||||
# Check if file exists
|
||||
if (-not (Test-Path $FilePath)) {
|
||||
Write-Host "Error: File not found: $FilePath" -ForegroundColor Red
|
||||
Write-Host "Please provide a valid file path as parameter or ensure test_sample_document.txt exists." -ForegroundColor Yellow
|
||||
exit 1
|
||||
}
|
||||
|
||||
Write-Host "Testing document extraction for file: $FilePath" -ForegroundColor Cyan
|
||||
Write-Host "Log file will be: test_document_extraction.log" -ForegroundColor Cyan
|
||||
Write-Host ""
|
||||
|
||||
# Run the Python test
|
||||
try {
|
||||
python test_document_extraction.py $FilePath
|
||||
Write-Host ""
|
||||
Write-Host "Test completed successfully!" -ForegroundColor Green
|
||||
Write-Host "Check test_document_extraction.log for detailed results." -ForegroundColor Cyan
|
||||
} catch {
|
||||
Write-Host "Test failed with error: $($_.Exception.Message)" -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
# Test configuration for workflow testing
|
||||
DB_APP_HOST=_test_data_app
|
||||
DB_APP_DATABASE=app
|
||||
DB_APP_USER=test
|
||||
DB_APP_PASSWORD_SECRET=test123
|
||||
|
||||
DB_CHAT_HOST=_test_data_chat
|
||||
DB_CHAT_DATABASE=chat
|
||||
DB_CHAT_USER=test
|
||||
DB_CHAT_PASSWORD_SECRET=test123
|
||||
|
||||
# AI Configuration
|
||||
AI_PROVIDER=openai
|
||||
AI_MODEL=gpt-3.5-turbo
|
||||
AI_API_KEY_SECRET=test_key
|
||||
|
|
@ -1,288 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test procedure for DocumentManager document extraction functionality.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import argparse
|
||||
from datetime import datetime, UTC
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
print("Starting test_document_extraction.py...")
|
||||
|
||||
# Configure logging FIRST, before any other imports
|
||||
import logging
|
||||
|
||||
# Clear any existing handlers to avoid duplicate logs
|
||||
for handler in logging.root.handlers[:]:
|
||||
logging.root.removeHandler(handler)
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout),
|
||||
logging.FileHandler('test_document_extraction.log', mode='w', encoding='utf-8') # 'w' mode clears the file
|
||||
],
|
||||
force=True # Force reconfiguration even if already configured
|
||||
)
|
||||
|
||||
# Filter out httpcore messages
|
||||
logging.getLogger('httpcore').setLevel(logging.WARNING)
|
||||
logging.getLogger('httpx').setLevel(logging.WARNING)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Set up test configuration
|
||||
os.environ['POWERON_CONFIG_FILE'] = 'test_config.ini'
|
||||
print("Set POWERON_CONFIG_FILE environment variable")
|
||||
|
||||
try:
|
||||
# Import required modules
|
||||
from modules.interfaces.interfaceAppObjects import User, UserConnection
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow
|
||||
from modules.workflow.managerDocument import DocumentManager
|
||||
from modules.workflow.serviceContainer import ServiceContainer
|
||||
print("All imports successful")
|
||||
except Exception as e:
|
||||
print(f"Import error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
def log_extraction_debug(message: str, data: dict = None):
|
||||
"""Log extraction debug data with JSON dumps"""
|
||||
timestamp = datetime.now(UTC).isoformat()
|
||||
if data:
|
||||
logger.debug(f"[{timestamp}] {message}\n{json.dumps(data, indent=2, ensure_ascii=False)}")
|
||||
else:
|
||||
logger.debug(f"[{timestamp}] {message}")
|
||||
|
||||
def create_test_user() -> User:
|
||||
"""Create a test user for the document extraction"""
|
||||
return User(
|
||||
id="test-user-doc-001",
|
||||
mandateId="test-mandate-doc-001",
|
||||
username="testuser_doc",
|
||||
email="test_doc@example.com",
|
||||
fullName="Test Document User",
|
||||
enabled=True,
|
||||
language="en",
|
||||
privilege="user",
|
||||
authenticationAuthority="local"
|
||||
)
|
||||
|
||||
def create_test_workflow() -> ChatWorkflow:
|
||||
"""Create a test workflow for document extraction"""
|
||||
return ChatWorkflow(
|
||||
id="test-workflow-doc-001",
|
||||
mandateId="test-mandate-doc-001",
|
||||
status="running",
|
||||
name="Document Extraction Test Workflow",
|
||||
currentRound=1,
|
||||
lastActivity=datetime.now(UTC).isoformat(),
|
||||
startedAt=datetime.now(UTC).isoformat(),
|
||||
logs=[],
|
||||
messages=[],
|
||||
stats=None,
|
||||
tasks=[]
|
||||
)
|
||||
|
||||
def detect_mime_type(file_path: str) -> str:
|
||||
"""Detect MIME type based on file extension"""
|
||||
ext = Path(file_path).suffix.lower()
|
||||
mime_types = {
|
||||
'.txt': 'text/plain',
|
||||
'.md': 'text/markdown',
|
||||
'.csv': 'text/csv',
|
||||
'.json': 'application/json',
|
||||
'.xml': 'application/xml',
|
||||
'.js': 'application/javascript',
|
||||
'.py': 'application/x-python',
|
||||
'.svg': 'image/svg+xml',
|
||||
'.jpg': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
'.png': 'image/png',
|
||||
'.gif': 'image/gif',
|
||||
'.pdf': 'application/pdf',
|
||||
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'.doc': 'application/msword',
|
||||
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'.xls': 'application/vnd.ms-excel',
|
||||
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'.ppt': 'application/vnd.ms-powerpoint',
|
||||
'.html': 'text/html',
|
||||
'.htm': 'text/html'
|
||||
}
|
||||
return mime_types.get(ext, 'application/octet-stream')
|
||||
|
||||
async def test_document_extraction(file_path: str):
|
||||
"""Test document extraction from a file path"""
|
||||
try:
|
||||
# Clear the log file before each run
|
||||
log_file_path = "test_document_extraction.log"
|
||||
if os.path.exists(log_file_path):
|
||||
with open(log_file_path, 'w') as f:
|
||||
f.write("") # Clear the file
|
||||
logger.info(f"Cleared log file: {log_file_path}")
|
||||
|
||||
logger.info("=== STARTING DOCUMENT EXTRACTION TEST ===")
|
||||
|
||||
# Validate file path
|
||||
if not os.path.exists(file_path):
|
||||
raise FileNotFoundError(f"File not found: {file_path}")
|
||||
|
||||
# Get file info
|
||||
file_path_obj = Path(file_path)
|
||||
filename = file_path_obj.name
|
||||
mime_type = detect_mime_type(file_path)
|
||||
file_size = file_path_obj.stat().st_size
|
||||
|
||||
log_extraction_debug("File information", {
|
||||
"file_path": file_path,
|
||||
"filename": filename,
|
||||
"mime_type": mime_type,
|
||||
"file_size_bytes": file_size,
|
||||
"file_size_mb": round(file_size / (1024 * 1024), 2)
|
||||
})
|
||||
|
||||
# Read file data
|
||||
try:
|
||||
with open(file_path, 'rb') as f:
|
||||
file_data = f.read()
|
||||
log_extraction_debug("File read successfully", {
|
||||
"bytes_read": len(file_data),
|
||||
"file_encoding": "binary"
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading file: {str(e)}")
|
||||
raise
|
||||
|
||||
# Create test user and workflow
|
||||
test_user = create_test_user()
|
||||
test_workflow = create_test_workflow()
|
||||
|
||||
# Create service container
|
||||
service_container = ServiceContainer(test_user, test_workflow)
|
||||
log_extraction_debug("Service container created", {
|
||||
"user_id": test_user.id,
|
||||
"workflow_id": test_workflow.id
|
||||
})
|
||||
|
||||
# Create document manager
|
||||
document_manager = DocumentManager(service_container)
|
||||
log_extraction_debug("Document manager created")
|
||||
|
||||
# Define extraction prompt
|
||||
extraction_prompt = "extract the table and convert it to a csv table"
|
||||
|
||||
log_extraction_debug("Starting document extraction", {
|
||||
"prompt": extraction_prompt,
|
||||
"filename": filename,
|
||||
"mime_type": mime_type
|
||||
})
|
||||
|
||||
# Extract content from file data
|
||||
try:
|
||||
extracted_content = await document_manager.extractContentFromFileData(
|
||||
prompt=extraction_prompt,
|
||||
fileData=file_data,
|
||||
filename=filename,
|
||||
mimeType=mime_type,
|
||||
base64Encoded=False,
|
||||
documentId=f"test-doc-{datetime.now(UTC).timestamp()}"
|
||||
)
|
||||
|
||||
# Log extraction results
|
||||
extraction_result = {
|
||||
"extracted_content_id": extracted_content.id,
|
||||
"content_items_count": len(extracted_content.contents)
|
||||
}
|
||||
|
||||
# Add objectId and objectType if they exist (set by DocumentManager)
|
||||
if hasattr(extracted_content, 'objectId'):
|
||||
extraction_result["object_id"] = extracted_content.objectId
|
||||
if hasattr(extracted_content, 'objectType'):
|
||||
extraction_result["object_type"] = extracted_content.objectType
|
||||
|
||||
log_extraction_debug("Document extraction completed successfully", extraction_result)
|
||||
|
||||
# Log detailed content information
|
||||
for i, content_item in enumerate(extracted_content.contents):
|
||||
content_info = {
|
||||
"label": content_item.label,
|
||||
"data_length": len(content_item.data) if content_item.data else 0,
|
||||
"data_preview": content_item.data[:500] + "..." if content_item.data and len(content_item.data) > 500 else content_item.data
|
||||
}
|
||||
|
||||
# Add metadata if available
|
||||
if content_item.metadata:
|
||||
content_info["metadata"] = {
|
||||
"size": content_item.metadata.size,
|
||||
"mime_type": content_item.metadata.mimeType,
|
||||
"base64_encoded": content_item.metadata.base64Encoded,
|
||||
"pages": content_item.metadata.pages
|
||||
}
|
||||
|
||||
log_extraction_debug(f"CONTENT ITEM {i+1}:", content_info)
|
||||
|
||||
# Log summary of all extracted content
|
||||
all_content = "\n\n".join([item.data for item in extracted_content.contents if item.data])
|
||||
log_extraction_debug("COMPLETE EXTRACTED CONTENT:", {
|
||||
"total_length": len(all_content),
|
||||
"content": all_content
|
||||
})
|
||||
|
||||
return extracted_content
|
||||
|
||||
except Exception as e:
|
||||
log_extraction_debug("DOCUMENT EXTRACTION EXCEPTION:", {
|
||||
"error_type": type(e).__name__,
|
||||
"error_message": str(e),
|
||||
"error_args": e.args if hasattr(e, 'args') else None
|
||||
})
|
||||
raise
|
||||
|
||||
logger.info("=== DOCUMENT EXTRACTION TEST COMPLETED ===")
|
||||
return extracted_content
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Document extraction test failed with error: {str(e)}")
|
||||
log_extraction_debug("Full error details", {
|
||||
"error_type": type(e).__name__,
|
||||
"error_message": str(e)
|
||||
})
|
||||
raise
|
||||
|
||||
async def main():
|
||||
"""Main function to run the document extraction test"""
|
||||
print("Inside main()")
|
||||
logger.info("=" * 50)
|
||||
logger.info("DOCUMENT EXTRACTION TEST")
|
||||
logger.info("=" * 50)
|
||||
|
||||
# Parse command line arguments
|
||||
parser = argparse.ArgumentParser(description='Test document extraction functionality')
|
||||
parser.add_argument('file_path', help='Path to the file to extract content from')
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
extracted_content = await test_document_extraction(args.file_path)
|
||||
logger.info("=" * 50)
|
||||
logger.info("TEST COMPLETED SUCCESSFULLY")
|
||||
logger.info("=" * 50)
|
||||
return extracted_content
|
||||
except Exception as e:
|
||||
logger.error("=" * 50)
|
||||
logger.error("TEST FAILED")
|
||||
logger.error("=" * 50)
|
||||
raise
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("About to run main()")
|
||||
asyncio.run(main())
|
||||
print("main() finished")
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from modules.workflow.methodBase import MethodBase
|
||||
|
||||
class TestMethod(MethodBase):
|
||||
pass
|
||||
|
||||
def test_parameter_extraction():
|
||||
test = TestMethod(None)
|
||||
test.name = 'test'
|
||||
|
||||
docstring = """Call AI service with document content
|
||||
|
||||
Parameters:
|
||||
prompt (str): The prompt to send to the AI service
|
||||
documents (List[Dict[str, Any]], optional): List of documents to include in context
|
||||
Each document should have: documentReference (str), contentExtractionPrompt (str, optional)"""
|
||||
|
||||
print("Docstring:")
|
||||
print(docstring)
|
||||
print("\nExtracted descriptions:")
|
||||
descriptions = test._extractParameterDescriptions(docstring)
|
||||
for param, desc in descriptions.items():
|
||||
print(f" {param}: {desc}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_parameter_extraction()
|
||||
|
|
@ -1,289 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script for retry enhancement in managerChat.py
|
||||
Tests that previous action results and review feedback are properly passed to retry prompts.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add the gateway directory to the Python path
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'gateway'))
|
||||
|
||||
from modules.workflow.managerChat import ChatManager
|
||||
from modules.interfaces.interfaceAppModel import User
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow, ChatMessage
|
||||
from modules.interfaces.interfaceChatObjects import ChatObjects
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MockChatObjects(ChatObjects):
|
||||
"""Mock implementation of ChatObjects for testing"""
|
||||
|
||||
def createTaskAction(self, action_data):
|
||||
"""Mock task action creation"""
|
||||
class MockTaskAction:
|
||||
def __init__(self, data):
|
||||
self.id = "test_action_id"
|
||||
self.execMethod = data.get("execMethod", "unknown")
|
||||
self.execAction = data.get("execAction", "unknown")
|
||||
self.execParameters = data.get("execParameters", {})
|
||||
self.execResultLabel = data.get("execResultLabel", "")
|
||||
self.status = data.get("status", "PENDING")
|
||||
self.result = ""
|
||||
self.error = ""
|
||||
|
||||
def setSuccess(self):
|
||||
self.status = "COMPLETED"
|
||||
|
||||
def setError(self, error):
|
||||
self.status = "FAILED"
|
||||
self.error = error
|
||||
|
||||
def isSuccessful(self):
|
||||
return self.status == "COMPLETED"
|
||||
|
||||
return MockTaskAction(action_data)
|
||||
|
||||
def createChatDocument(self, document_data):
|
||||
"""Mock document creation"""
|
||||
class MockChatDocument:
|
||||
def __init__(self, data):
|
||||
self.fileId = data.get("fileId", "")
|
||||
self.filename = data.get("filename", "unknown")
|
||||
self.fileSize = data.get("fileSize", 0)
|
||||
self.mimeType = data.get("mimeType", "application/octet-stream")
|
||||
self.content = ""
|
||||
|
||||
return MockChatDocument(document_data)
|
||||
|
||||
def createWorkflowMessage(self, message_data):
|
||||
"""Mock message creation"""
|
||||
class MockWorkflowMessage:
|
||||
def __init__(self, data):
|
||||
self.workflowId = data.get("workflowId", "")
|
||||
self.role = data.get("role", "assistant")
|
||||
self.message = data.get("message", "")
|
||||
self.status = data.get("status", "step")
|
||||
self.sequenceNr = data.get("sequenceNr", 1)
|
||||
self.publishedAt = data.get("publishedAt", "")
|
||||
self.actionId = data.get("actionId", "")
|
||||
self.actionMethod = data.get("actionMethod", "")
|
||||
self.actionName = data.get("actionName", "")
|
||||
self.documentsLabel = data.get("documentsLabel", "")
|
||||
self.documents = data.get("documents", [])
|
||||
|
||||
return MockWorkflowMessage(message_data)
|
||||
|
||||
class MockServiceContainer:
|
||||
"""Mock service container for testing"""
|
||||
|
||||
def __init__(self, user, workflow):
|
||||
self.user = user
|
||||
self.workflow = workflow
|
||||
|
||||
def getMethodsList(self):
|
||||
"""Mock methods list"""
|
||||
return ["document.extract(documentList, aiPrompt)", "document.analyze(documentList, aiPrompt)"]
|
||||
|
||||
async def summarizeChat(self, messages):
|
||||
"""Mock chat summarization"""
|
||||
return "Mock chat history summary"
|
||||
|
||||
def getDocumentReferenceList(self):
|
||||
"""Mock document references"""
|
||||
return {
|
||||
'chat': [],
|
||||
'history': []
|
||||
}
|
||||
|
||||
def getConnectionReferenceList(self):
|
||||
"""Mock connection references"""
|
||||
return ["connection1", "connection2"]
|
||||
|
||||
def getFileInfo(self, fileId):
|
||||
"""Mock file info"""
|
||||
return {
|
||||
"filename": f"test_file_{fileId}.txt",
|
||||
"size": 1024,
|
||||
"mimeType": "text/plain"
|
||||
}
|
||||
|
||||
def createFile(self, fileName, mimeType, content, base64encoded=False):
|
||||
"""Mock file creation"""
|
||||
return f"file_id_{fileName}"
|
||||
|
||||
def createDocument(self, fileName, mimeType, content, base64encoded=False):
|
||||
"""Mock document creation"""
|
||||
class MockDocument:
|
||||
def __init__(self, name, mime, cont):
|
||||
self.filename = name
|
||||
self.mimeType = mime
|
||||
self.content = cont
|
||||
self.fileSize = len(cont)
|
||||
|
||||
return MockDocument(fileName, mimeType, content)
|
||||
|
||||
def getFileExtension(self, filename):
|
||||
"""Mock file extension extraction"""
|
||||
return filename.split('.')[-1] if '.' in filename else 'txt'
|
||||
|
||||
def getMimeTypeFromExtension(self, extension):
|
||||
"""Mock MIME type detection"""
|
||||
mime_types = {
|
||||
'txt': 'text/plain',
|
||||
'pdf': 'application/pdf',
|
||||
'doc': 'application/msword',
|
||||
'json': 'application/json'
|
||||
}
|
||||
return mime_types.get(extension, 'application/octet-stream')
|
||||
|
||||
def detectContentTypeFromData(self, file_bytes, filename):
|
||||
"""Mock content type detection"""
|
||||
if filename.endswith('.txt'):
|
||||
return 'text/plain'
|
||||
elif filename.endswith('.pdf'):
|
||||
return 'application/pdf'
|
||||
elif filename.endswith('.json'):
|
||||
return 'application/json'
|
||||
return 'application/octet-stream'
|
||||
|
||||
async def callAiTextBasic(self, prompt):
|
||||
"""Mock AI call"""
|
||||
return '{"actions": [{"method": "document", "action": "extract", "parameters": {"documentList": ["test"], "aiPrompt": "Test prompt"}, "resultLabel": "task1_action1_test", "description": "Test action"}]}'
|
||||
|
||||
async def callAiTextAdvanced(self, prompt):
|
||||
"""Mock advanced AI call"""
|
||||
return '{"overview": "Test plan", "tasks": [{"id": "task_1", "description": "Test task", "dependencies": [], "expected_outputs": ["output1"], "success_criteria": ["criteria1"], "required_documents": [], "estimated_complexity": "low", "ai_prompt": "Test prompt"}]}'
|
||||
|
||||
async def executeAction(self, methodName, actionName, parameters):
|
||||
"""Mock action execution"""
|
||||
class MockResult:
|
||||
def __init__(self):
|
||||
self.success = True
|
||||
self.data = {
|
||||
"result": "Mock execution result",
|
||||
"documents": []
|
||||
}
|
||||
self.error = None
|
||||
|
||||
return MockResult()
|
||||
|
||||
async def test_retry_enhancement():
|
||||
"""Test the retry enhancement functionality"""
|
||||
logger.info("Testing retry enhancement in managerChat.py")
|
||||
|
||||
# Create mock objects
|
||||
mock_user = User(id="test_user", username="testuser", email="test@example.com", mandateId="test_mandate")
|
||||
mock_chat_objects = MockChatObjects()
|
||||
mock_workflow = ChatWorkflow(
|
||||
id="test_workflow",
|
||||
userId="test_user",
|
||||
status="active",
|
||||
messages=[],
|
||||
createdAt="2024-01-01T00:00:00Z",
|
||||
updatedAt="2024-01-01T00:00:00Z",
|
||||
mandateId="test_mandate",
|
||||
currentRound=1,
|
||||
lastActivity="2024-01-01T00:00:00Z",
|
||||
startedAt="2024-01-01T00:00:00Z"
|
||||
)
|
||||
|
||||
# Create chat manager
|
||||
chat_manager = ChatManager(mock_user, mock_chat_objects)
|
||||
|
||||
# Mock the service container directly instead of initializing
|
||||
chat_manager.service = MockServiceContainer(mock_user, mock_workflow)
|
||||
chat_manager.workflow = mock_workflow
|
||||
|
||||
# Test 1: Basic action definition without retry
|
||||
logger.info("Test 1: Basic action definition")
|
||||
task_step = {
|
||||
"id": "task_1",
|
||||
"description": "Test task",
|
||||
"expected_outputs": ["output1"],
|
||||
"success_criteria": ["criteria1"],
|
||||
"ai_prompt": "Test AI prompt"
|
||||
}
|
||||
|
||||
actions = await chat_manager.defineTaskActions(task_step, mock_workflow, [])
|
||||
logger.info(f"Generated {len(actions)} actions without retry context")
|
||||
|
||||
# Test 2: Action definition with retry context
|
||||
logger.info("Test 2: Action definition with retry context")
|
||||
enhanced_context = {
|
||||
'task_step': task_step,
|
||||
'workflow': mock_workflow,
|
||||
'workflow_id': mock_workflow.id,
|
||||
'available_documents': ["test_doc.txt"],
|
||||
'previous_results': ["task0_action1_results"],
|
||||
'improvements': "Previous attempt failed - ensure comprehensive extraction",
|
||||
'retry_count': 1,
|
||||
'previous_action_results': [
|
||||
{
|
||||
'actionMethod': 'document',
|
||||
'actionName': 'extract',
|
||||
'status': 'failed',
|
||||
'error': 'Empty result returned',
|
||||
'result': 'No content extracted',
|
||||
'resultLabel': 'task1_action1_failed'
|
||||
}
|
||||
],
|
||||
'previous_review_result': {
|
||||
'status': 'retry',
|
||||
'reason': 'Incomplete extraction',
|
||||
'quality_score': 3,
|
||||
'missing_outputs': ['detailed_analysis'],
|
||||
'unmet_criteria': ['comprehensive_coverage']
|
||||
}
|
||||
}
|
||||
|
||||
retry_actions = await chat_manager.defineTaskActions(task_step, mock_workflow, [], enhanced_context)
|
||||
logger.info(f"Generated {len(retry_actions)} actions with retry context")
|
||||
|
||||
# Test 3: Verify retry context is properly handled
|
||||
logger.info("Test 3: Verifying retry context handling")
|
||||
|
||||
# Create a test prompt to see if retry context is included
|
||||
test_prompt = await chat_manager._createActionDefinitionPrompt(enhanced_context)
|
||||
|
||||
# Check if retry context is in the prompt
|
||||
if "RETRY CONTEXT" in test_prompt:
|
||||
logger.info("✓ Retry context properly included in prompt")
|
||||
else:
|
||||
logger.error("✗ Retry context not found in prompt")
|
||||
|
||||
if "Previous action results that failed" in test_prompt:
|
||||
logger.info("✓ Previous action results included in prompt")
|
||||
else:
|
||||
logger.error("✗ Previous action results not found in prompt")
|
||||
|
||||
if "Previous review feedback" in test_prompt:
|
||||
logger.info("✓ Previous review feedback included in prompt")
|
||||
else:
|
||||
logger.error("✗ Previous review feedback not found in prompt")
|
||||
|
||||
if "Previous attempt failed" in test_prompt:
|
||||
logger.info("✓ Improvements needed included in prompt")
|
||||
else:
|
||||
logger.error("✗ Improvements needed not found in prompt")
|
||||
|
||||
# Test 4: Verify fallback actions with retry context
|
||||
logger.info("Test 4: Testing fallback actions with retry context")
|
||||
fallback_actions = chat_manager._createFallbackActions(task_step, enhanced_context)
|
||||
logger.info(f"Generated {len(fallback_actions)} fallback actions with retry context")
|
||||
|
||||
# Check if fallback actions include retry information
|
||||
if any("retry" in action.get("resultLabel", "") for action in fallback_actions):
|
||||
logger.info("✓ Fallback actions include retry information")
|
||||
else:
|
||||
logger.error("✗ Fallback actions missing retry information")
|
||||
|
||||
logger.info("Retry enhancement test completed successfully!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(test_retry_enhancement())
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
PowerOn System Architecture Overview
|
||||
|
||||
This document provides a comprehensive overview of the PowerOn system architecture, including its key components, data flow, and technical specifications.
|
||||
|
||||
MAJOR TOPICS:
|
||||
|
||||
1. System Architecture
|
||||
- Frontend Agents: Web-based user interface components
|
||||
- Gateway: Central API and workflow management system
|
||||
- Database: JSON-based data storage with component interfaces
|
||||
- AI Integration: Anthropic and OpenAI connectors for intelligent processing
|
||||
|
||||
2. Core Components
|
||||
- Document Manager: Handles file processing and content extraction
|
||||
- Workflow Manager: Orchestrates complex business processes
|
||||
- Service Container: Provides unified access to all system services
|
||||
- Neutralizer: Data anonymization and privacy protection
|
||||
|
||||
3. Data Flow Architecture
|
||||
- User authentication and authorization
|
||||
- Document upload and processing pipeline
|
||||
- AI-powered content analysis and extraction
|
||||
- Workflow execution and task management
|
||||
- Result generation and storage
|
||||
|
||||
4. Technical Specifications
|
||||
- Python-based backend with async/await support
|
||||
- RESTful API design with JSON data exchange
|
||||
- Modular component architecture
|
||||
- Extensible method system for business logic
|
||||
- Comprehensive logging and monitoring
|
||||
|
||||
5. Security Features
|
||||
- Multi-authentication authority support (Local, Microsoft, Google)
|
||||
- Token-based session management
|
||||
- Data encryption and anonymization
|
||||
- Role-based access control
|
||||
- Audit trail and compliance features
|
||||
|
||||
6. Integration Capabilities
|
||||
- SharePoint document management
|
||||
- Email system integration (Outlook)
|
||||
- Web crawling and data collection
|
||||
- AI service integration (Anthropic, OpenAI)
|
||||
- Custom method development framework
|
||||
|
||||
The PowerOn system is designed to provide a comprehensive platform for intelligent document processing, workflow automation, and AI-powered business process management. It combines modern web technologies with advanced AI capabilities to deliver a robust and scalable solution for enterprise document management and workflow automation.
|
||||
|
|
@ -1,23 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from modules.workflow.serviceContainer import ServiceContainer
|
||||
from modules.interfaces.interfaceAppObjects import User
|
||||
from modules.interfaces.interfaceChatModel import ChatWorkflow
|
||||
|
||||
def test_signatures():
|
||||
user = User(id='test', mandateId='test', username='test', email='test@test.com',
|
||||
fullName='Test User', enabled=True, language='en', privilege='user',
|
||||
authenticationAuthority='local')
|
||||
workflow = ChatWorkflow(id='test', mandateId='test', status='running', name='Test',
|
||||
currentRound=1, lastActivity='2025-01-01T00:00:00Z',
|
||||
startedAt='2025-01-01T00:00:00Z', logs=[], messages=[],
|
||||
stats=None, tasks=[])
|
||||
service = ServiceContainer(user, workflow)
|
||||
|
||||
print("Method signatures:")
|
||||
methodList = service.getMethodsList()
|
||||
for sig in methodList[:5]: # Show first 5
|
||||
print(f" {sig}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_signatures()
|
||||
488
test_workflow.py
488
test_workflow.py
|
|
@ -1,488 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test routine for WorkflowManager.workflowProcess() with new unified workflow architecture
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime, UTC, timedelta
|
||||
import uuid
|
||||
from typing import List
|
||||
|
||||
print("Starting test_workflow.py...")
|
||||
|
||||
# Configure logging FIRST, before any other imports
|
||||
import logging
|
||||
|
||||
# Clear any existing handlers to avoid duplicate logs
|
||||
for handler in logging.root.handlers[:]:
|
||||
logging.root.removeHandler(handler)
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout),
|
||||
logging.FileHandler('test_workflow.log', mode='w', encoding='utf-8') # 'w' mode clears the file
|
||||
],
|
||||
force=True # Force reconfiguration even if already configured
|
||||
)
|
||||
|
||||
# Filter out httpcore messages
|
||||
logging.getLogger('httpcore').setLevel(logging.WARNING)
|
||||
logging.getLogger('httpx').setLevel(logging.WARNING)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Set up test configuration
|
||||
os.environ['POWERON_CONFIG_FILE'] = 'test_config.ini'
|
||||
print("Set POWERON_CONFIG_FILE environment variable")
|
||||
|
||||
try:
|
||||
# Simple imports from modules (same as app.py)
|
||||
from modules.interfaces.interfaceAppObjects import User, UserConnection
|
||||
from modules.interfaces.interfaceChatObjects import ChatObjects
|
||||
from modules.interfaces.interfaceChatModel import UserInputRequest, ChatWorkflow
|
||||
from modules.workflow.managerWorkflow import WorkflowManager
|
||||
print("All imports successful")
|
||||
except Exception as e:
|
||||
print(f"Import error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
def log_workflow_debug(message: str, data: dict = None):
|
||||
"""Log workflow debug data with JSON dumps"""
|
||||
timestamp = datetime.now(UTC).isoformat()
|
||||
if data:
|
||||
logger.debug(f"[{timestamp}] {message}\n{json.dumps(data, indent=2, ensure_ascii=False)}")
|
||||
else:
|
||||
logger.debug(f"[{timestamp}] {message}")
|
||||
|
||||
def create_test_user() -> User:
|
||||
"""Create a test user for the workflow"""
|
||||
return User(
|
||||
id="test-user-001",
|
||||
mandateId="test-mandate-001",
|
||||
username="testuser",
|
||||
email="test@example.com",
|
||||
fullName="Test User",
|
||||
enabled=True,
|
||||
language="en",
|
||||
privilege="user",
|
||||
authenticationAuthority="local"
|
||||
)
|
||||
|
||||
def create_test_workflow() -> ChatWorkflow:
|
||||
"""Create a test workflow"""
|
||||
return ChatWorkflow(
|
||||
id="test-workflow-001",
|
||||
mandateId="test-mandate-001",
|
||||
status="running",
|
||||
name="Candidate Evaluation and Selection Workflow",
|
||||
currentRound=1,
|
||||
lastActivity=datetime.now(UTC).isoformat(),
|
||||
startedAt=datetime.now(UTC).isoformat(),
|
||||
logs=[],
|
||||
messages=[],
|
||||
stats=None,
|
||||
tasks=[]
|
||||
)
|
||||
|
||||
def create_test_user_input() -> UserInputRequest:
|
||||
"""Create test user input with a candidate evaluation task"""
|
||||
return UserInputRequest(
|
||||
prompt="""I have following list of job profiles from candidates (3 job profiles as text files) and want to know, who is best suited for the position of product designer (file with criteria). Create an evaluation matrix and rate all candidates according to the matrix, then produce a presentation for the management to decide and store it on the SharePoint for an available account.
|
||||
|
||||
Please ensure the evaluation includes:
|
||||
- Technical skills assessment
|
||||
- Experience level evaluation
|
||||
- Cultural fit analysis
|
||||
- Portfolio quality review
|
||||
- Communication skills assessment
|
||||
- Overall suitability score
|
||||
|
||||
The output should be suitable for executive review and include both detailed analysis and clear recommendations.""",
|
||||
listFileId=["candidate_1_profile.txt", "candidate_2_profile.txt", "candidate_3_profile.txt", "product_designer_criteria.txt"],
|
||||
userLanguage="en"
|
||||
)
|
||||
|
||||
def create_test_files(chat_interface) -> List[str]:
|
||||
"""Create test files in the database for candidate evaluation"""
|
||||
test_files = []
|
||||
|
||||
# Import the component interface
|
||||
from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects
|
||||
|
||||
# Get component interface with the same user context
|
||||
component_interface = getComponentObjects(chat_interface.currentUser)
|
||||
|
||||
# Candidate 1 Profile
|
||||
candidate_1_content = """CANDIDATE 1: Sarah Johnson
|
||||
Position: Senior Product Designer
|
||||
Experience: 8 years
|
||||
|
||||
TECHNICAL SKILLS:
|
||||
- Figma, Sketch, Adobe Creative Suite (Expert)
|
||||
- Prototyping tools: Framer, Principle (Advanced)
|
||||
- Design systems and component libraries (Expert)
|
||||
- User research and usability testing (Advanced)
|
||||
- HTML/CSS/JavaScript basics (Intermediate)
|
||||
|
||||
EXPERIENCE:
|
||||
- Senior Product Designer at TechCorp (3 years)
|
||||
- Product Designer at StartupXYZ (3 years)
|
||||
- UI/UX Designer at DesignAgency (2 years)
|
||||
|
||||
PORTFOLIO HIGHLIGHTS:
|
||||
- Redesigned e-commerce platform increasing conversion by 25%
|
||||
- Created comprehensive design system for 50+ product team
|
||||
- Led user research for mobile banking app with 1M+ users
|
||||
|
||||
COMMUNICATION SKILLS:
|
||||
- Excellent presentation skills
|
||||
- Experience presenting to C-level executives
|
||||
- Strong stakeholder management
|
||||
- Mentored 5 junior designers
|
||||
|
||||
CULTURAL FIT:
|
||||
- Collaborative team player
|
||||
- Proactive problem solver
|
||||
- Adapts quickly to new environments
|
||||
- Values user-centered design approach"""
|
||||
|
||||
# Candidate 2 Profile
|
||||
candidate_2_content = """CANDIDATE 2: Michael Chen
|
||||
Position: Product Designer
|
||||
Experience: 5 years
|
||||
|
||||
TECHNICAL SKILLS:
|
||||
- Figma, Sketch, Adobe Creative Suite (Advanced)
|
||||
- Prototyping tools: InVision, Marvel (Intermediate)
|
||||
- Design systems (Intermediate)
|
||||
- User research (Intermediate)
|
||||
- No coding experience
|
||||
|
||||
EXPERIENCE:
|
||||
- Product Designer at MidSizeTech (3 years)
|
||||
- Junior Designer at CreativeStudio (2 years)
|
||||
|
||||
PORTFOLIO HIGHLIGHTS:
|
||||
- Designed mobile app for local restaurant chain
|
||||
- Created brand identity for startup
|
||||
- Improved user flow for SaaS dashboard
|
||||
|
||||
COMMUNICATION SKILLS:
|
||||
- Good presentation skills
|
||||
- Works well in small teams
|
||||
- Some experience with stakeholders
|
||||
- Learning to mentor others
|
||||
|
||||
CULTURAL FIT:
|
||||
- Quiet but dedicated worker
|
||||
- Detail-oriented
|
||||
- Prefers structured environments
|
||||
- Focuses on visual design quality"""
|
||||
|
||||
# Candidate 3 Profile
|
||||
candidate_3_content = """CANDIDATE 3: Emma Rodriguez
|
||||
Position: UX/UI Designer
|
||||
Experience: 6 years
|
||||
|
||||
TECHNICAL SKILLS:
|
||||
- Figma, Sketch, Adobe Creative Suite (Advanced)
|
||||
- Prototyping tools: Framer, Axure (Advanced)
|
||||
- Design systems (Advanced)
|
||||
- User research and analytics (Expert)
|
||||
- Basic React/JavaScript (Intermediate)
|
||||
|
||||
EXPERIENCE:
|
||||
- UX/UI Designer at EnterpriseCorp (4 years)
|
||||
- UX Designer at ConsultingFirm (2 years)
|
||||
|
||||
PORTFOLIO HIGHLIGHTS:
|
||||
- Led UX research for enterprise software used by 10K+ users
|
||||
- Implemented data-driven design improvements increasing user satisfaction by 30%
|
||||
- Created accessibility-compliant design system
|
||||
- Conducted international user research studies
|
||||
|
||||
COMMUNICATION SKILLS:
|
||||
- Outstanding presentation and storytelling skills
|
||||
- Experience with international stakeholders
|
||||
- Strong analytical communication
|
||||
- Excellent at translating user insights to business value
|
||||
|
||||
CULTURAL FIT:
|
||||
- Natural leader and team motivator
|
||||
- Strategic thinker
|
||||
- Adapts well to change
|
||||
- Passionate about user advocacy"""
|
||||
|
||||
# Product Designer Criteria
|
||||
criteria_content = """PRODUCT DESIGNER POSITION CRITERIA
|
||||
Company: ValueOn
|
||||
Department: Product Development
|
||||
Level: Senior
|
||||
|
||||
REQUIRED SKILLS:
|
||||
- Expert proficiency in Figma and modern design tools
|
||||
- Strong understanding of user-centered design principles
|
||||
- Experience with design systems and component libraries
|
||||
- Ability to conduct user research and usability testing
|
||||
- Basic understanding of front-end development (HTML/CSS/JavaScript)
|
||||
|
||||
REQUIRED EXPERIENCE:
|
||||
- Minimum 5 years in product design
|
||||
- Experience working with cross-functional teams
|
||||
- Portfolio demonstrating complex product design solutions
|
||||
- Experience with SaaS or enterprise software preferred
|
||||
|
||||
COMMUNICATION REQUIREMENTS:
|
||||
- Excellent presentation skills
|
||||
- Ability to communicate design decisions to stakeholders
|
||||
- Experience presenting to management/executives
|
||||
- Strong collaboration and feedback skills
|
||||
|
||||
CULTURAL FIT:
|
||||
- Team-oriented and collaborative
|
||||
- Proactive and self-motivated
|
||||
- Adaptable to fast-paced environment
|
||||
- Passionate about user experience
|
||||
|
||||
RESPONSIBILITIES:
|
||||
- Lead design for core product features
|
||||
- Collaborate with product managers and engineers
|
||||
- Conduct user research and usability testing
|
||||
- Create and maintain design system
|
||||
- Present design solutions to stakeholders
|
||||
- Mentor junior designers
|
||||
|
||||
EVALUATION WEIGHTS:
|
||||
- Technical Skills: 30%
|
||||
- Experience: 25%
|
||||
- Communication: 20%
|
||||
- Cultural Fit: 15%
|
||||
- Portfolio Quality: 10%"""
|
||||
|
||||
# Create files in database
|
||||
file_contents = [
|
||||
("candidate_1_profile.txt", candidate_1_content),
|
||||
("candidate_2_profile.txt", candidate_2_content),
|
||||
("candidate_3_profile.txt", candidate_3_content),
|
||||
("product_designer_criteria.txt", criteria_content)
|
||||
]
|
||||
|
||||
for filename, content in file_contents:
|
||||
try:
|
||||
# Create file in database using the component interface
|
||||
file_item = component_interface.saveUploadedFile(
|
||||
fileContent=content.encode('utf-8'),
|
||||
fileName=filename
|
||||
)
|
||||
test_files.append(file_item.id)
|
||||
log_workflow_debug(f"Created test file: {filename}", {
|
||||
"file_id": file_item.id,
|
||||
"filename": filename,
|
||||
"content_length": len(content)
|
||||
})
|
||||
except Exception as e:
|
||||
log_workflow_debug(f"Error creating test file {filename}: {str(e)}")
|
||||
# Create a dummy file ID if creation fails
|
||||
test_files.append(f"file_{filename.replace('.', '_')}")
|
||||
|
||||
return test_files
|
||||
|
||||
async def test_workflow_process():
|
||||
print("Inside test_workflow_process()")
|
||||
"""Test the workflowProcess function with new unified workflow architecture"""
|
||||
try:
|
||||
logger.info("=== STARTING UNIFIED WORKFLOW PROCESS TEST ===")
|
||||
|
||||
# Create test data
|
||||
test_user = create_test_user()
|
||||
test_workflow = create_test_workflow()
|
||||
test_user_input = create_test_user_input()
|
||||
|
||||
log_workflow_debug("Test data created", {
|
||||
"user_id": test_user.id,
|
||||
"workflow_id": test_workflow.id,
|
||||
"user_input_prompt": test_user_input.prompt,
|
||||
"file_ids": test_user_input.listFileId
|
||||
})
|
||||
|
||||
# Create test user in database through AppObjects interface
|
||||
from modules.interfaces.interfaceAppObjects import getRootInterface
|
||||
from modules.interfaces.interfaceAppModel import AuthAuthority, ConnectionStatus, Token, UserPrivilege
|
||||
|
||||
root_interface = getRootInterface()
|
||||
created_user = root_interface.createUser(
|
||||
username=test_user.username,
|
||||
password="testpassword123", # Required for local authentication
|
||||
email=test_user.email,
|
||||
fullName=test_user.fullName,
|
||||
language=test_user.language,
|
||||
enabled=test_user.enabled,
|
||||
privilege=UserPrivilege.USER,
|
||||
authenticationAuthority=AuthAuthority.LOCAL
|
||||
)
|
||||
log_workflow_debug("Created test user in database", {
|
||||
"user_id": created_user.id,
|
||||
"username": created_user.username,
|
||||
"email": created_user.email
|
||||
})
|
||||
|
||||
# Create test connection through AppObjects interface
|
||||
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
|
||||
app_interface = getAppObjects(created_user)
|
||||
test_connection = app_interface.addUserConnection(
|
||||
userId=created_user.id,
|
||||
authority=AuthAuthority.MSFT,
|
||||
externalId="msft-user-123",
|
||||
externalUsername="testuser@example.com",
|
||||
externalEmail="testuser@example.com",
|
||||
status=ConnectionStatus.ACTIVE
|
||||
)
|
||||
log_workflow_debug("Created test connection", {
|
||||
"connection_id": test_connection.id,
|
||||
"authority": test_connection.authority,
|
||||
"external_username": test_connection.externalUsername
|
||||
})
|
||||
|
||||
# Create test token for the connection
|
||||
test_token = Token(
|
||||
userId=created_user.id,
|
||||
authority=AuthAuthority.MSFT,
|
||||
tokenAccess="test-access-token-123",
|
||||
tokenRefresh="test-refresh-token-456",
|
||||
tokenType="bearer",
|
||||
expiresAt=datetime.now(UTC).timestamp() + 3600, # 1 hour from now
|
||||
createdAt=datetime.now(UTC)
|
||||
)
|
||||
app_interface.saveToken(test_token)
|
||||
log_workflow_debug("Created test token", {
|
||||
"token_id": test_token.id,
|
||||
"authority": test_token.authority,
|
||||
"expires_at": test_token.expiresAt
|
||||
})
|
||||
|
||||
# Create test workflow in database through ChatObjects interface
|
||||
from modules.interfaces.interfaceChatObjects import getInterface as getChatObjects
|
||||
|
||||
chat_interface = getChatObjects(created_user)
|
||||
workflow_data = {
|
||||
"name": test_workflow.name,
|
||||
"status": test_workflow.status,
|
||||
"mandateId": created_user.mandateId,
|
||||
"currentRound": test_workflow.currentRound,
|
||||
"startedAt": test_workflow.startedAt,
|
||||
"lastActivity": test_workflow.lastActivity
|
||||
}
|
||||
created_workflow = chat_interface.createWorkflow(workflow_data)
|
||||
log_workflow_debug("Created test workflow in database", {
|
||||
"workflow_id": created_workflow.id,
|
||||
"name": created_workflow.name,
|
||||
"status": created_workflow.status
|
||||
})
|
||||
|
||||
# Update the test_workflow object with the created workflow's ID
|
||||
test_workflow.id = created_workflow.id
|
||||
|
||||
# Create test files in database
|
||||
logger.info("Creating test files for candidate evaluation...")
|
||||
test_file_ids = create_test_files(chat_interface)
|
||||
log_workflow_debug("Test files created", {
|
||||
"file_count": len(test_file_ids),
|
||||
"file_ids": test_file_ids
|
||||
})
|
||||
|
||||
# Update user input with real file IDs
|
||||
test_user_input.listFileId = test_file_ids
|
||||
log_workflow_debug("Updated user input with file IDs", {
|
||||
"file_ids": test_user_input.listFileId
|
||||
})
|
||||
|
||||
# Initialize WorkflowManager
|
||||
workflow_manager = WorkflowManager(chat_interface, created_user)
|
||||
logger.info("WorkflowManager initialized")
|
||||
|
||||
# Test the workflowProcess function
|
||||
logger.info("Calling workflowProcess with unified workflow architecture...")
|
||||
|
||||
try:
|
||||
# Execute the unified workflow process
|
||||
await workflow_manager.workflowProcess(test_user_input, test_workflow)
|
||||
|
||||
# Log workflow results
|
||||
log_workflow_debug("Workflow process completed successfully", {
|
||||
"workflow_id": test_workflow.id,
|
||||
"workflow_status": test_workflow.status,
|
||||
"message_count": len(test_workflow.messages),
|
||||
"final_messages": [
|
||||
{
|
||||
"role": msg.role,
|
||||
"message": msg.message[:200] + "..." if len(msg.message) > 200 else msg.message,
|
||||
"status": msg.status,
|
||||
"sequence_nr": msg.sequenceNr
|
||||
} for msg in test_workflow.messages[-3:] # Last 3 messages
|
||||
]
|
||||
})
|
||||
|
||||
# Log detailed workflow messages
|
||||
for i, message in enumerate(test_workflow.messages):
|
||||
log_workflow_debug(f"WORKFLOW MESSAGE {i+1}:", {
|
||||
"role": message.role,
|
||||
"message": message.message,
|
||||
"status": message.status,
|
||||
"sequence_nr": message.sequenceNr,
|
||||
"published_at": message.publishedAt,
|
||||
"document_count": len(message.documents) if hasattr(message, 'documents') else 0
|
||||
})
|
||||
|
||||
return test_workflow
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
error_details = {
|
||||
"error_type": type(e).__name__,
|
||||
"error_message": str(e),
|
||||
"error_args": e.args if hasattr(e, 'args') else None,
|
||||
"traceback": traceback.format_exc()
|
||||
}
|
||||
log_workflow_debug("WORKFLOW PROCESS EXCEPTION:", error_details)
|
||||
raise
|
||||
|
||||
logger.info("=== UNIFIED WORKFLOW PROCESS TEST COMPLETED ===")
|
||||
return test_workflow
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ Test failed with error: {str(e)}")
|
||||
log_workflow_debug("Full error details", {
|
||||
"error_type": type(e).__name__,
|
||||
"error_message": str(e)
|
||||
})
|
||||
raise
|
||||
|
||||
async def main():
|
||||
print("Inside main()")
|
||||
logger.info("=" * 50)
|
||||
logger.info("CANDIDATE EVALUATION UNIFIED WORKFLOW TEST")
|
||||
logger.info("=" * 50)
|
||||
|
||||
try:
|
||||
workflow = await test_workflow_process()
|
||||
logger.info("=" * 50)
|
||||
logger.info("TEST COMPLETED SUCCESSFULLY")
|
||||
logger.info("=" * 50)
|
||||
return workflow
|
||||
except Exception as e:
|
||||
logger.error("=" * 50)
|
||||
logger.error("TEST FAILED")
|
||||
logger.error("=" * 50)
|
||||
raise
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("About to run main()")
|
||||
asyncio.run(main())
|
||||
print("main() finished")
|
||||
Loading…
Reference in a new issue