stats table included

This commit is contained in:
ValueOn AG 2025-07-10 20:03:50 +02:00
parent 86fe43e987
commit d1aac4099d
30 changed files with 455 additions and 5169 deletions

View file

@ -1,114 +0,0 @@
# Document Extraction Test
This test procedure validates the DocumentManager's ability to extract content from files using AI-powered analysis.
## Files Created
- `test_document_extraction.py` - Main test script
- `test_sample_document.txt` - Sample document for testing
- `run_document_test.ps1` - PowerShell wrapper script
- `test_document_extraction.log` - Generated log file (cleared on each run)
## Usage
### Method 1: Using PowerShell Script (Recommended)
```powershell
# Test with default sample file
.\run_document_test.ps1
# Test with custom file
.\run_document_test.ps1 "path\to\your\document.pdf"
```
### Method 2: Direct Python Execution
```bash
# Test with default sample file
python test_document_extraction.py test_sample_document.txt
# Test with custom file
python test_document_extraction.py "path/to/your/document.docx"
```
## Test Features
1. **File Validation**: Checks if the specified file exists
2. **MIME Type Detection**: Automatically detects file type based on extension
3. **Content Extraction**: Uses the DocumentManager to extract content
4. **AI Processing**: Applies the prompt "summarize the content and give list of the major topics"
5. **Comprehensive Logging**: Logs all steps and results to `test_document_extraction.log`
6. **Log Cleanup**: Clears the log file on each test run
## Supported File Types
- Text files (.txt, .md)
- CSV files (.csv)
- JSON files (.json)
- XML files (.xml)
- HTML files (.html, .htm)
- Images (.jpg, .jpeg, .png, .gif, .svg)
- PDF files (.pdf)
- Office documents (.docx, .xlsx, .pptx)
- And more (fallback to binary processing)
## Test Output
The test generates detailed logs including:
- File information (path, size, MIME type)
- Extraction process details
- Extracted content summary
- AI-processed results
- Error details if any issues occur
## Example Output
```
=== STARTING DOCUMENT EXTRACTION TEST ===
File information: {
"file_path": "test_sample_document.txt",
"filename": "test_sample_document.txt",
"mime_type": "text/plain",
"file_size_bytes": 2048,
"file_size_mb": 0.0
}
Document extraction completed successfully: {
"extracted_content_id": "test-doc-1234567890",
"content_items_count": 1,
"object_type": "ExtractedContent"
}
COMPLETE EXTRACTED CONTENT: {
"total_length": 1500,
"content": "PowerOn System Architecture Overview... [AI processed summary]"
}
```
## Error Handling
The test includes comprehensive error handling for:
- File not found errors
- File reading errors
- Document processing errors
- AI processing errors
- Import errors
All errors are logged with detailed information for debugging.
## Configuration
The test uses the same configuration as other tests:
- Environment variable: `POWERON_CONFIG_FILE = 'test_config.ini'`
- Log file: `test_document_extraction.log`
- Log level: DEBUG
## Dependencies
The test requires the same dependencies as the main PowerOn system:
- Python 3.8+
- Required Python packages (see requirements.txt)
- Access to AI services (if AI processing is enabled)
- Proper configuration in test_config.ini

File diff suppressed because it is too large Load diff

View file

@ -1,537 +0,0 @@
"""
Documentation agent for generating structured documentation.
Provides comprehensive documentation generation capabilities.
"""
import logging
from typing import Dict, Any, List, Optional
import json
import re
from datetime import datetime
import os
import hashlib
import base64
import uuid
import shutil
from pathlib import Path
import traceback
import sys
import importlib.util
import inspect
from pydantic import BaseModel
from modules.workflow.agentBase import AgentBase
from modules.interfaces.serviceChatModel import ChatContent
logger = logging.getLogger(__name__)
class AgentDocumentation(AgentBase):
"""AI-driven agent for creating documentation and structured content using multi-step generation"""
def __init__(self):
"""Initialize the documentation agent"""
super().__init__()
self.name = "documentation"
self.label = "Documentation"
self.description = "Creates structured documentation, reports, and content using AI with multi-step generation"
self.capabilities = [
"report_generation",
"documentation",
"content_structuring",
"technical_writing",
"knowledge_organization"
]
def setDependencies(self, serviceBase=None):
"""Set external dependencies for the agent."""
self.setService(serviceBase)
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a task by focusing on required outputs and using AI to generate them.
Args:
task: Task dictionary with prompt, inputDocuments, outputSpecifications
Returns:
Dictionary with feedback and documents
"""
try:
# Extract task information
prompt = task.get("prompt", "")
inputDocuments = task.get("inputDocuments", [])
outputSpecs = task.get("outputSpecifications", [])
# Check AI service
if not self.service or not self.service.base:
return {
"feedback": "The Documentation agent requires an AI service to function.",
"documents": []
}
# Extract context from input documents - focusing only on dataExtracted
documentContext = self._extractDocumentContext(inputDocuments)
# Create task analysis to understand the requirements
documentationPlan = await self._analyzeTask(prompt, documentContext, outputSpecs)
logger.debug(f"Documentation plan: {documentationPlan}")
# Generate all required output documents
documents = []
# If no output specs provided, create default document
if not outputSpecs:
defaultFormat = documentationPlan.get("recommendedFormat", "markdown")
defaultTitle = documentationPlan.get("title", "Documentation")
safeTitle = self._sanitizeFilename(defaultTitle)
outputSpecs = [
{"label": f"{safeTitle}.{defaultFormat}", "description": "Comprehensive documentation"}
]
# Process each output specification
for spec in outputSpecs:
outputLabel = spec.get("label", "")
outputDescription = spec.get("description", "")
# Generate the document using multi-step approach
document = await self._createDocumentMultiStep(
prompt,
documentContext,
outputLabel,
outputDescription,
documentationPlan
)
documents.append(document)
# Generate feedback
feedback = documentationPlan.get("feedback", f"Created {len(documents)} documents based on your requirements.")
return {
"feedback": feedback,
"documents": documents
}
except Exception as e:
logger.error(f"Error in documentation generation: {str(e)}", exc_info=True)
return {
"feedback": f"Error during documentation generation: {str(e)}",
"documents": []
}
def _extractDocumentContext(self, documents: List[Dict[str, Any]]) -> str:
"""
Extract context from input documents, focusing on dataExtracted.
Args:
documents: List of document objects
Returns:
Extracted context as text
"""
contextParts = []
for doc in documents:
docName = doc.get("name", "unnamed")
if doc.get("ext"):
docName = f"{docName}.{doc.get('ext')}"
contextParts.append(f"\n\n--- {docName} ---\n")
# Process contents for dataExtracted
for content in doc.get("contents", []):
if content.get("dataExtracted"):
contextParts.append(content.get("dataExtracted", ""))
return "\n".join(contextParts)
def _sanitizeFilename(self, filename: str) -> str:
"""
Sanitize a filename by removing invalid characters.
Args:
filename: Filename to sanitize
Returns:
Sanitized filename
"""
# Replace invalid characters with underscores
invalidChars = r'<>:"/\|?*'
for char in invalidChars:
filename = filename.replace(char, '_')
# Trim filename if too long
if len(filename) > 100:
filename = filename[:97] + "..."
return filename
async def _analyzeTask(self, prompt: str, context: str, outputSpecs: List) -> Dict:
"""
Use AI to analyze the task and create a documentation plan.
Args:
prompt: The task prompt
context: Document context
outputSpecs: Output specifications
Returns:
Documentation plan dictionary
"""
analysisPrompt = f"""
Analyze this documentation task and create a detailed plan.
TASK: {prompt}
DOCUMENT CONTEXT SAMPLE:
{context[:1000]}... (truncated)
OUTPUT REQUIREMENTS:
{json.dumps(outputSpecs, indent=2)}
Create a detailed documentation plan in JSON format with the following structure:
{{
"title": "Document Title",
"documentType": "report|manual|guide|whitepaper|etc",
"audience": "technical|general|executive|etc",
"detailedStructure": [
{{
"title": "Chapter/Section Title",
"keyPoints": ["point1", "point2", ...],
"subsections": ["subsection1", "subsection2", ...],
"importance": "high|medium|low",
"estimatedLength": "short|medium|long"
}},
... more sections ...
],
"keyTopics": ["topic1", "topic2", ...],
"tone": "formal|conversational|instructional|etc",
"recommendedFormat": "markdown|html|text|etc",
"formattingRequirements": ["requirement1", "requirement2", ...],
"executiveSummary": "Brief description of what the document will cover",
"feedback": "Brief message explaining the documentation approach"
}}
Only return valid JSON. No preamble or explanations.
"""
try:
response = await self.service.base.callAi([
{"role": "system", "content": "You are a documentation expert. Respond with valid JSON only."},
{"role": "user", "content": analysisPrompt}
])
# Extract JSON from response
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
plan = json.loads(response[jsonStart:jsonEnd])
return plan
else:
# Fallback if JSON not found
return {
"title": "Documentation (DEFAULT)",
"documentType": "report",
"audience": "general",
"detailedStructure": [
{
"title": "Introduction",
"keyPoints": ["Purpose", "Scope"],
"subsections": [],
"importance": "high",
"estimatedLength": "short"
},
{
"title": "Main Content",
"keyPoints": ["Core Information"],
"subsections": ["Key Findings", "Analysis"],
"importance": "high",
"estimatedLength": "long"
},
{
"title": "Conclusion",
"keyPoints": ["Summary", "Next Steps"],
"subsections": [],
"importance": "medium",
"estimatedLength": "short"
}
],
"keyTopics": ["General Information"],
"tone": "formal",
"recommendedFormat": "markdown",
"formattingRequirements": ["Clear headings", "Professional formatting"],
"executiveSummary": "A comprehensive documentation covering the requested topics.",
"feedback": "Created documentation based on your requirements."
}
except Exception as e:
logger.warning(f"Error creating documentation plan: {str(e)}")
return {
"title": "Documentation",
"documentType": "report",
"audience": "general",
"detailedStructure": [
{
"title": "Introduction",
"keyPoints": ["Purpose", "Scope"],
"subsections": [],
"importance": "high",
"estimatedLength": "short"
},
{
"title": "Main Content",
"keyPoints": ["Core Information"],
"subsections": ["Key Findings", "Analysis"],
"importance": "high",
"estimatedLength": "long"
},
{
"title": "Conclusion",
"keyPoints": ["Summary", "Next Steps"],
"subsections": [],
"importance": "medium",
"estimatedLength": "short"
}
],
"keyTopics": ["General Information"],
"tone": "formal",
"recommendedFormat": "markdown",
"formattingRequirements": ["Clear headings", "Professional formatting"],
"executiveSummary": "A comprehensive documentation covering the requested topics.",
"feedback": "Created documentation based on your requirements."
}
async def _createDocumentMultiStep(self, prompt: str, context: str, outputLabel: str,
outputDescription: str, documentationPlan: Dict) -> ChatContent:
"""
Create a document using a multi-step approach with separate AI calls for each section.
Args:
prompt: Original task prompt
context: Document context
outputLabel: Output filename
outputDescription: Description of desired output
documentationPlan: Documentation plan from AI
Returns:
ChatContent object
"""
try:
# Determine format from filename
formatType = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "md"
# Map format to contentType
contentTypeMap = {
"md": "text/markdown",
"markdown": "text/markdown",
"html": "text/html",
"txt": "text/plain",
"text": "text/plain",
"json": "application/json",
"csv": "text/csv"
}
contentType = contentTypeMap.get(formatType, "text/plain")
# Get document information
title = documentationPlan.get("title", "Documentation")
documentType = documentationPlan.get("documentType", "document")
audience = documentationPlan.get("audience", "general")
tone = documentationPlan.get("tone", "formal")
keyTopics = documentationPlan.get("keyTopics", [])
formattingRequirements = documentationPlan.get("formattingRequirements", [])
# Get the detailed structure
detailedStructure = documentationPlan.get("detailedStructure", [])
# Step 1: Generate executive summary
summaryPrompt = f"""
Create an executive summary for a {documentType} titled "{title}".
DOCUMENT OVERVIEW:
- Type: {documentType}
- Audience: {audience}
- Key Topics: {', '.join(keyTopics)}
TASK CONTEXT: {prompt}
The executive summary should:
1. Provide a concise overview of the document's purpose
2. Highlight key points and findings
3. Be clear and engaging for the target audience
4. Set expectations for the document's content
Keep the summary brief but comprehensive.
"""
executiveSummary = await self.service.base.callAi([
{"role": "system", "content": f"You are a documentation expert creating an executive summary in {formatType} format."},
{"role": "user", "content": summaryPrompt}
], produceUserAnswer = True)
# Step 2: Generate introduction
introPrompt = f"""
Create an introduction for a {documentType} titled "{title}".
DOCUMENT OVERVIEW:
- Type: {documentType}
- Audience: {audience}
- Key Topics: {', '.join(keyTopics)}
TASK CONTEXT: {prompt}
The introduction should:
1. Set the context and purpose of the document
2. Outline the scope and objectives
3. Preview the main topics to be covered
4. Engage the reader's interest
Format the introduction according to {formatType} standards.
"""
introduction = await self.service.base.callAi([
{"role": "system", "content": f"You are a documentation expert creating an introduction in {formatType} format."},
{"role": "user", "content": introPrompt}
], produceUserAnswer = True)
# Step 3: Generate main sections
sections = []
for section in detailedStructure:
sectionTitle = section.get("title", "Section")
keyPoints = section.get("keyPoints", [])
subsections = section.get("subsections", [])
importance = section.get("importance", "medium")
estimatedLength = section.get("estimatedLength", "medium")
sectionPrompt = f"""
Create the {sectionTitle} section for a {documentType} titled "{title}".
SECTION DETAILS:
- Title: {sectionTitle}
- Key Points: {', '.join(keyPoints)}
- Subsections: {', '.join(subsections)}
- Importance: {importance}
- Estimated Length: {estimatedLength}
DOCUMENT CONTEXT:
- Type: {documentType}
- Audience: {audience}
- Key Topics: {', '.join(keyTopics)}
TASK CONTEXT: {prompt}
The section should:
1. Cover all key points thoroughly
2. Include relevant subsections
3. Maintain appropriate depth based on importance
4. Follow the document's tone and style
Format the section according to {formatType} standards.
"""
sectionContent = await self.service.base.callAi([
{"role": "system", "content": f"You are a documentation expert creating a section in {formatType} format."},
{"role": "user", "content": sectionPrompt}
], produceUserAnswer = True)
sections.append(sectionContent)
# Step 4: Generate conclusion
conclusionPrompt = f"""
Create the conclusion for a {documentType} titled "{title}".
DOCUMENT OVERVIEW:
- Type: {documentType}
- Audience: {audience}
- Key Topics: {', '.join(keyTopics)}
TASK CONTEXT: {prompt}
This conclusion should:
1. Summarize the key points covered in the document
2. Provide closure to the topics discussed
3. Include any relevant recommendations or next steps
4. Leave the reader with a clear understanding of the document's significance
The conclusion should be professional and impactful, formatted according to {formatType} standards.
"""
conclusion = await self.service.base.callAi([
{"role": "system", "content": f"You are a documentation expert creating a conclusion in {formatType} format."},
{"role": "user", "content": conclusionPrompt}
], produceUserAnswer = True)
# Step 5: Assemble the complete document
if formatType in ["md", "markdown"]:
# Markdown format
documentContent = f"# {title}\n\n"
if executiveSummary:
documentContent += f"## Executive Summary\n\n{executiveSummary}\n\n"
documentContent += f"{introduction}\n\n"
for i, sectionContent in enumerate(sections):
# Ensure section starts with heading if not already
sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
if not sectionContent.strip().startswith("#"):
documentContent += f"## {sectionTitle}\n\n"
documentContent += f"{sectionContent}\n\n"
documentContent += f"## Conclusion\n\n{conclusion}\n"
elif formatType == "html":
# HTML format
documentContent = f"<html>\n<head>\n<title>{title}</title>\n</head>\n<body>\n"
documentContent += f"<h1>{title}</h1>\n\n"
if executiveSummary:
documentContent += f"<h2>Executive Summary</h2>\n<div>{executiveSummary}</div>\n\n"
documentContent += f"<div>{introduction}</div>\n\n"
for i, sectionContent in enumerate(sections):
sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
documentContent += f"<h2>{sectionTitle}</h2>\n<div>{sectionContent}</div>\n\n"
documentContent += f"<h2>Conclusion</h2>\n<div>{conclusion}</div>\n"
documentContent += "</body>\n</html>"
else:
# Plain text format
documentContent = f"{title}\n{'=' * len(title)}\n\n"
if executiveSummary:
documentContent += f"EXECUTIVE SUMMARY\n{'-' * 17}\n\n{executiveSummary}\n\n"
documentContent += f"{introduction}\n\n"
for i, sectionContent in enumerate(sections):
sectionTitle = detailedStructure[i].get("title", f"Section {i+1}")
documentContent += f"{sectionTitle}\n{'-' * len(sectionTitle)}\n\n{sectionContent}\n\n"
documentContent += f"CONCLUSION\n{'-' * 10}\n\n{conclusion}\n"
# Create document object
return self.formatAgentDocumentOutput(outputLabel, documentContent, contentType)
except Exception as e:
logger.error(f"Error creating document: {str(e)}", exc_info=True)
# Create a simple error document
if formatType in ["md", "markdown"]:
content = f"# Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"
elif formatType == "html":
content = f"<html><body><h1>Error in Documentation</h1><p>There was an error generating the documentation: {str(e)}</p></body></html>"
else:
content = f"Error in Documentation\n\nThere was an error generating the documentation: {str(e)}"
return self.formatAgentDocumentOutput(outputLabel, content, contentType)
# Factory function for the Documentation agent
def getAgentDocumentation():
"""Returns an instance of the Documentation agent."""
return AgentDocumentation()

View file

@ -1,380 +0,0 @@
"""
Email Agent Module.
Handles email-related tasks using Microsoft Graph API.
"""
import logging
import json
from typing import Dict, Any, List, Optional, Tuple
import uuid
import os
from modules.workflow.agentBase import AgentBase
from modules.interfaces.serviceChatModel import Task, ChatDocument, ChatContent
logger = logging.getLogger(__name__)
class AgentEmail(AgentBase):
"""Agent for handling email-related tasks."""
def __init__(self):
"""Initialize the email agent."""
super().__init__()
self.name = "email"
self.label = "Email Agent"
self.description = "Handles email composition and sending using Microsoft Graph API"
self.capabilities = [
"email_composition",
"email_draft_creation",
"email_template_generation"
]
self.serviceBase = None
def setDependencies(self, serviceBase=None):
"""Set external dependencies for the agent."""
self.serviceBase = serviceBase
async def processTask(self, task: Task) -> Dict[str, Any]:
"""
Process an email-related task.
Args:
task: Task object containing:
- prompt: Instructions for the agent
- inputDocuments: List of documents to process
- outputSpecifications: List of required output documents
- context: Additional context including workflow info
Returns:
Dictionary containing:
- feedback: Text response explaining what was done
- documents: List of created documents
"""
try:
# Extract task information
prompt = task.prompt
inputDocuments = task.filesInput
outputSpecs = task.filesOutput
# Check AI service
if not self.service.base:
return {
"feedback": "The Email agent requires an AI service to function.",
"documents": []
}
# Check if Microsoft connector is available
if not hasattr(self.service, 'msft'):
return {
"feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.",
"documents": []
}
# Get Microsoft token
token_data = self.service.msft.getMsftToken()
if not token_data:
# Create authentication trigger document
auth_doc = self._createFrontendAuthTriggerDocument()
return {
"feedback": "Microsoft authentication required. Please authenticate to continue.",
"documents": [auth_doc]
}
# Extract document data from input
documentContents, attachments = self._processInputDocuments(inputDocuments)
# Generate email subject and body using AI
emailTemplate = await self._generateEmailTemplate(prompt, documentContents)
# Create HTML preview of the email
htmlPreview = self._createHtmlPreview(emailTemplate)
# Attempt to create a draft email using Microsoft Graph API
draft_result = self.service.msft.createDraftEmail(
emailTemplate["recipient"],
emailTemplate["subject"],
emailTemplate["htmlBody"],
attachments
)
# Prepare output documents
documents = []
# Process output specifications
for spec in outputSpecs:
label = spec.get("label", "")
description = spec.get("description", "")
if label.endswith(".html"):
# Create the HTML template file
templateDoc = self.formatAgentDocumentOutput(
label,
emailTemplate["htmlBody"], # Use the actual HTML body, not the preview
"text/html"
)
documents.append(templateDoc)
elif label.endswith(".json"):
# Create JSON template if requested
templateJson = json.dumps(emailTemplate, indent=2)
templateDoc = self.formatAgentDocumentOutput(
label,
templateJson,
"application/json"
)
documents.append(templateDoc)
else:
# Default to preview for other cases
previewDoc = self.formatAgentDocumentOutput(
label,
htmlPreview,
"text/html"
)
documents.append(previewDoc)
# Prepare feedback message
if draft_result:
feedback = f"Email draft created successfully for {emailTemplate.get('recipient')}. The subject is: '{emailTemplate['subject']}'"
if attachments:
feedback += f" with {len(attachments)} attachment(s)"
feedback += ". You can open and edit it in your Outlook draft folder."
else:
feedback = "Email template created but could not save as draft. HTML preview and template are available as documents."
return {
"feedback": feedback,
"documents": documents
}
except Exception as e:
logger.error(f"Error in email agent: {str(e)}")
return {
"feedback": f"Error processing email task: {str(e)}",
"documents": []
}
def _createFrontendAuthTriggerDocument(self) -> ChatDocument:
"""Create a document that triggers Microsoft authentication in the frontend."""
return ChatDocument(
id=str(uuid.uuid4()),
name="microsoft_auth",
ext="html",
data="""
<div>
<h2>Microsoft Authentication Required</h2>
<p>Please click the button below to authenticate with Microsoft:</p>
<button onclick="window.location.href='/api/auth/microsoft'">Authenticate with Microsoft</button>
</div>
""",
contents=[
ChatContent(
name="main",
data="""
<div>
<h2>Microsoft Authentication Required</h2>
<p>Please click the button below to authenticate with Microsoft:</p>
<button onclick="window.location.href='/api/auth/microsoft'">Authenticate with Microsoft</button>
</div>
""",
summary="Microsoft authentication trigger page",
metadata={
"contentType": "text/html",
"isText": True
}
)
]
)
def _processInputDocuments(self, input_docs: List[ChatDocument]) -> Tuple[str, List[Dict[str, Any]]]:
"""
Process input documents to extract content and prepare attachments.
Args:
input_docs: List of input documents
Returns:
Tuple of (document content text, list of attachments)
"""
documentContents = []
attachments = []
for doc in input_docs:
docName = doc.name
if doc.ext:
docName = f"{docName}.{doc.ext}"
# Add document name to contents
documentContents.append(f"\n\n--- {docName} ---\n")
# Process document data directly
if doc.data:
# Add to attachments with proper metadata
attachments.append({
"name": docName,
"document": {
"data": doc.data,
"mimeType": doc.contents[0].metadata.get("contentType", "application/octet-stream") if doc.contents else "application/octet-stream",
"base64Encoded": doc.contents[0].metadata.get("base64Encoded", False) if doc.contents else False
}
})
documentContents.append(f"Document attached: {docName}")
else:
documentContents.append(f"Document referenced: {docName}")
return "\n".join(documentContents), attachments
def formatAgentDocumentOutput(self, filename: str, content: str, contentType: str) -> ChatDocument:
"""
Format a document for agent output.
Args:
filename: Output filename
content: Document content
contentType: MIME type of the content
Returns:
ChatDocument object
"""
# Split filename into name and extension
name, ext = os.path.splitext(filename)
if ext.startswith('.'):
ext = ext[1:]
# Create document object
return ChatDocument(
id=str(uuid.uuid4()),
name=name,
ext=ext,
data=content,
contents=[
ChatContent(
name="main",
data=content,
summary=f"Generated {filename}",
metadata={"contentType": contentType}
)
]
)
async def _generateEmailTemplate(self, prompt: str, documentContents: str) -> Dict[str, Any]:
"""
Generate email template using AI.
Args:
prompt: The task prompt
documentContents: Extracted document content
Returns:
Email template dictionary with recipient, subject, body
"""
emailPrompt = f"""
Create an email based on the following request:
REQUEST: {prompt}
DOCUMENT CONTENTS:
{documentContents[:2000]}... (truncated if longer)
Generate an email template with:
1. A relevant recipient (use placeholder or derive from content if possible)
2. A concise but descriptive subject line
3. A professional HTML-formatted email body
4. Appropriate greeting and closing
Format your response as JSON with these fields:
- recipient: email address
- subject: subject line
- plainBody: plain text version
- htmlBody: HTML formatted version
Only return valid JSON. No preamble or explanations.
"""
try:
response = await self.service.base.callAi([
{"role": "system", "content": "You are an email template specialist. Create professional emails. Respond with valid JSON only."},
{"role": "user", "content": emailPrompt}
])
# Extract JSON from response
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
template = json.loads(response[jsonStart:jsonEnd])
return template
else:
# Fallback plan
logger.warning(f"Not able creating email template, generating fallback plan")
return {
"recipient": "recipient@example.com",
"subject": "Information Regarding Your Request",
"plainBody": f"This email is regarding your request: {prompt}",
"htmlBody": f"<html><body><p>This email is regarding your request: {prompt}</p></body></html>"
}
except Exception as e:
logger.warning(f"Error generating email template: {str(e)}")
return {
"recipient": "recipient@example.com",
"subject": "Information Regarding Your Request",
"plainBody": f"This email is regarding your request: {prompt}",
"htmlBody": f"<html><body><p>This email is regarding your request: {prompt}</p></body></html>"
}
def _createHtmlPreview(self, emailTemplate: Dict[str, Any]) -> str:
"""
Create an HTML preview of the email template.
Args:
emailTemplate: Email template dictionary
Returns:
HTML string for preview
"""
html = f"""
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Email Preview: {emailTemplate.get('subject', 'Email Template')}</title>
<style>
body {{ font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5; }}
.email-container {{ max-width: 600px; margin: 20px auto; background-color: white; border: 1px solid #ddd; border-radius: 5px; overflow: hidden; }}
.email-header {{ background-color: #f0f0f0; padding: 15px; border-bottom: 1px solid #ddd; }}
.email-content {{ padding: 20px; }}
.email-footer {{ background-color: #f0f0f0; padding: 15px; border-top: 1px solid #ddd; font-size: 12px; color: #666; }}
.field {{ margin-bottom: 10px; }}
.field-label {{ font-weight: bold; color: #555; }}
.email-body {{ margin-top: 20px; padding-top: 20px; border-top: 1px solid #eee; }}
</style>
</head>
<body>
<div class="email-container">
<div class="email-header">
<h2>Email Template Preview</h2>
</div>
<div class="email-content">
<div class="field">
<div class="field-label">To:</div>
<div>{emailTemplate.get('recipient', 'recipient@example.com')}</div>
</div>
<div class="field">
<div class="field-label">Subject:</div>
<div>{emailTemplate.get('subject', 'No Subject')}</div>
</div>
<div class="email-body">
{emailTemplate.get('htmlBody', '<p>No content</p>')}
</div>
</div>
<div class="email-footer">
<p>This is a preview of the email template. The actual email may appear differently in various email clients.</p>
</div>
</div>
</body>
</html>
"""
return html
def getAgentEmail() -> AgentEmail:
"""Factory function to create and return an EmailAgent instance."""
return AgentEmail()

View file

@ -1,348 +0,0 @@
"""
SharePoint Agent Module.
Handles SharePoint document search and data extraction using Microsoft Graph API.
"""
import logging
import json
from typing import Dict, Any, List, Optional
from modules.workflow.agentBase import AgentBase
logger = logging.getLogger(__name__)
class AgentSharepoint(AgentBase):
"""Agent for handling SharePoint document operations."""
def __init__(self):
"""Initialize the SharePoint agent."""
super().__init__()
self.name = "sharepoint"
self.label = "SharePoint Agent"
self.description = "Searches and extracts data from SharePoint documents using Microsoft Graph API"
self.capabilities = [
"document_search",
"content_extraction",
"metadata_analysis",
"document_processing"
]
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a SharePoint-related task.
Args:
task: Task object containing:
- prompt: Instructions for the agent
- inputDocuments: List of documents to process
- outputSpecifications: List of required output documents
- context: Additional context including workflow info
Returns:
Dictionary containing:
- feedback: Text response explaining what was done
- documents: List of created documents
"""
try:
# Extract task information
prompt = task.get("prompt", "")
inputDocuments = task.get("inputDocuments", [])
outputSpecs = task.get("outputSpecifications", [])
# Check AI service
if not self.service.base:
return {
"feedback": "The SharePoint agent requires an AI service to function.",
"documents": []
}
# Check if Microsoft connector is available
if not hasattr(self.service, 'msft'):
return {
"feedback": "Microsoft connector not available. Please ensure Microsoft integration is properly configured.",
"documents": []
}
# Get Microsoft token
token_data = self.service.msft.getMsftToken()
if not token_data:
# Create authentication trigger document
auth_doc = self._createFrontendAuthTriggerDocument()
return {
"feedback": "Microsoft authentication required. Please authenticate to continue.",
"documents": [auth_doc]
}
# Parse the search query from the prompt
searchQuery = await self._parseSearchQuery(prompt)
# Search SharePoint documents
searchResults = await self._searchSharePointDocuments(searchQuery)
# Process search results
documents = []
for spec in outputSpecs:
label = spec.get("label", "")
description = spec.get("description", "")
if label.endswith(".json"):
# Create JSON summary of search results
summaryDoc = self._createSearchSummaryJson(searchResults, description)
documents.append(summaryDoc)
elif label.endswith(".csv"):
# Create CSV summary of search results
summaryDoc = self._createSearchSummaryCsv(searchResults, description)
documents.append(summaryDoc)
else:
# Create text summary of search results
summaryDoc = self._createSearchSummaryText(searchResults, description)
documents.append(summaryDoc)
# Prepare feedback message
feedback = f"Found {len(searchResults)} documents matching your search criteria. "
if searchResults:
feedback += "The results have been saved as documents."
else:
feedback += "No matching documents were found."
return {
"feedback": feedback,
"documents": documents
}
except Exception as e:
logger.error(f"Error in SharePoint agent: {str(e)}")
return {
"feedback": f"Error processing SharePoint task: {str(e)}",
"documents": []
}
def _createFrontendAuthTriggerDocument(self) -> Dict[str, Any]:
"""Create a document that triggers Microsoft authentication in the frontend."""
return self.formatAgentDocumentOutput(
"microsoft_auth.html",
"""
<div>
<h2>Microsoft Authentication Required</h2>
<p>Please click the button below to authenticate with Microsoft:</p>
<button onclick="window.location.href='/api/auth/microsoft'">Authenticate with Microsoft</button>
</div>
""",
"text/html"
)
async def _parseSearchQuery(self, prompt: str) -> Dict[str, Any]:
"""
Parse the search query from the prompt using AI.
Args:
prompt: The task prompt
Returns:
Dictionary containing search parameters
"""
try:
# Use AI to parse the search query
response = await self.service.base.callAi([
{"role": "system", "content": "You are a SharePoint search query parser. Extract search parameters from the user's request."},
{"role": "user", "content": f"""
Parse the following SharePoint search request into structured parameters:
{prompt}
Return a JSON object with these fields:
- query: The main search query
- site: Optional SharePoint site name
- folder: Optional folder path
- fileTypes: List of file types to search for
- dateRange: Optional date range for filtering
- maxResults: Maximum number of results to return
Only return valid JSON. No preamble or explanations.
"""}
])
# Extract JSON from response
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
return json.loads(response[jsonStart:jsonEnd])
else:
# Fallback to simple query
return {
"query": prompt,
"maxResults": 10
}
except Exception as e:
logger.warning(f"Error parsing search query: {str(e)}")
return {
"query": prompt,
"maxResults": 10
}
async def _searchSharePointDocuments(self, searchParams: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Search SharePoint documents using Microsoft Graph API.
Args:
searchParams: Search parameters
Returns:
List of search results
"""
try:
# Get Microsoft token
token = self.service.msft.getMsftToken()
if not token:
return []
# Prepare search query
query = searchParams.get("query", "")
site = searchParams.get("site", "")
folder = searchParams.get("folder", "")
fileTypes = searchParams.get("fileTypes", [])
maxResults = searchParams.get("maxResults", 10)
# Build search URL
searchUrl = "https://graph.microsoft.com/v1.0/sites/root/drives"
if site:
searchUrl = f"https://graph.microsoft.com/v1.0/sites/{site}/drives"
# Get drives (document libraries)
response = self.service.msft.makeGraphRequest("GET", searchUrl)
if not response or "value" not in response:
return []
results = []
for drive in response["value"]:
# Search in each drive
driveId = drive["id"]
searchEndpoint = f"https://graph.microsoft.com/v1.0/drives/{driveId}/root/search(q='{query}')"
# Add file type filters if specified
if fileTypes:
typeFilter = " or ".join([f"fileType eq '{ft}'" for ft in fileTypes])
searchEndpoint += f"&filter={typeFilter}"
# Add folder filter if specified
if folder:
searchEndpoint += f"&filter=parentReference/path eq '/{folder}'"
# Add result limit
searchEndpoint += f"&top={maxResults}"
# Make the search request
searchResponse = self.service.msft.makeGraphRequest("GET", searchEndpoint)
if searchResponse and "value" in searchResponse:
for item in searchResponse["value"]:
# Get file content
fileContent = await self._getFileContent(driveId, item["id"])
results.append({
"name": item["name"],
"id": item["id"],
"driveId": driveId,
"webUrl": item["webUrl"],
"lastModified": item["lastModifiedDateTime"],
"size": item["size"],
"content": fileContent
})
return results
except Exception as e:
logger.error(f"Error searching SharePoint: {str(e)}")
return []
async def _getFileContent(self, driveId: str, fileId: str) -> str:
"""
Get file content from SharePoint.
Args:
driveId: Drive ID
fileId: File ID
Returns:
File content as string
"""
try:
# Get file content URL
contentUrl = f"https://graph.microsoft.com/v1.0/drives/{driveId}/items/{fileId}/content"
# Download file content
response = self.service.msft.makeGraphRequest("GET", contentUrl, raw=True)
if response:
return response.decode('utf-8')
return ""
except Exception as e:
logger.error(f"Error getting file content: {str(e)}")
return ""
def _createSearchSummaryJson(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
"""Create a JSON summary of search results."""
summary = {
"description": description,
"totalResults": len(results),
"results": []
}
for result in results:
summary["results"].append({
"name": result["name"],
"url": result["webUrl"],
"lastModified": result["lastModified"],
"size": result["size"]
})
return self.formatAgentDocumentOutput(
"sharepoint_search_results.json",
json.dumps(summary, indent=2),
"application/json"
)
def _createSearchSummaryCsv(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
"""Create a CSV summary of search results."""
csvLines = ["Name,URL,Last Modified,Size (bytes)"]
for result in results:
name = result["name"].replace('"', '""')
url = result["webUrl"].replace('"', '""')
lastModified = result["lastModified"].replace('"', '""')
size = str(result["size"])
csvLines.append(f'"{name}","{url}","{lastModified}",{size}')
return self.formatAgentDocumentOutput(
"sharepoint_search_results.csv",
"\n".join(csvLines),
"text/csv"
)
def _createSearchSummaryText(self, results: List[Dict[str, Any]], description: str) -> Dict[str, Any]:
"""Create a text summary of search results."""
textLines = [
f"SharePoint Search Results",
f"Description: {description}",
f"Total Results: {len(results)}",
"\nResults:"
]
for result in results:
textLines.extend([
f"\nName: {result['name']}",
f"URL: {result['webUrl']}",
f"Last Modified: {result['lastModified']}",
f"Size: {result['size']} bytes"
])
return self.formatAgentDocumentOutput(
"sharepoint_search_results.txt",
"\n".join(textLines),
"text/plain"
)
def getAgentSharepoint() -> AgentSharepoint:
"""Factory function to create and return a SharePointAgent instance."""
return AgentSharepoint()

View file

@ -1,814 +0,0 @@
"""
Web crawler agent for gathering and analyzing web content.
Provides web research and content extraction capabilities.
"""
import logging
import json
import re
import time
import os
from typing import Dict, Any, List
from urllib.parse import quote_plus, unquote
from bs4 import BeautifulSoup
import requests
import markdown
from modules.workflow.agentBase import AgentBase
from modules.shared.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
class AgentWebcrawler(AgentBase):
"""AI-driven agent for web research and information retrieval"""
def __init__(self):
"""Initialize the web crawler agent"""
super().__init__()
self.name = "webcrawler"
self.label = "Web Crawler"
self.description = "Gathers and analyzes web content using AI with multi-step research"
self.capabilities = [
"web_research",
"content_gathering",
"data_extraction",
"information_synthesis",
"source_verification"
]
# Web crawling configuration
self.srcApikey = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_APIKEY","")
self.srcEngine = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_ENGINE","google")
self.srcCountry = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_COUNTRY","auto")
self.maxUrl = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_URLS", "5"))
self.maxSearchTerms = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_SEARCH_KEYWORDS", "3"))
self.maxResults = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_MAX_SEARCH_RESULTS", "5"))
self.timeout = int(APP_CONFIG.get("Agent_Webcrawler_SERPAPI_TIMEOUT", "30"))
self.userAgent = APP_CONFIG.get("Agent_Webcrawler_SERPAPI_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
if not self.srcApikey:
logger.error("SerpAPI key not configured")
def setDependencies(self, serviceBase=None):
"""Set external dependencies for the agent."""
self.setService(serviceBase)
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a task by focusing on required outputs and using AI to guide the research process.
Args:
task: Task dictionary with prompt, inputDocuments, outputSpecifications
Returns:
Dictionary with feedback and documents
"""
try:
# Extract task information
prompt = task.get("prompt", "")
inputDocuments = task.get("inputDocuments", [])
outputSpecs = task.get("outputSpecifications", [])
workflow = task.get("context", {}).get("workflow", {})
# Check AI service
if not self.service or not self.service.base:
return {
"feedback": "The Web Crawler agent requires an AI service to function.",
"documents": []
}
# Create research plan
if workflow:
self.service.logAdd(workflow, "Creating research plan...", level="info", progress=35)
researchPlan = await self._createResearchPlan(prompt)
# Check if this is truly a web research task
if not researchPlan.get("requiresWebResearch", True):
return {
"feedback": "This task doesn't appear to require web research. Please try a different agent.",
"documents": []
}
# Gather raw material through web research
if workflow:
self.service.logAdd(workflow, "Gathering research material...", level="info", progress=45)
rawResults = await self._gatherResearchMaterial(researchPlan, workflow)
# Format results into requested output documents
if workflow:
self.service.logAdd(workflow, "Creating output documents...", level="info", progress=55)
documents = await self._createOutputDocuments(
prompt,
rawResults,
outputSpecs,
researchPlan
)
# Generate feedback
feedback = researchPlan.get("feedback", f"I conducted web research on '{prompt[:50]}...' and gathered information from {len(rawResults)} relevant sources.")
return {
"feedback": feedback,
"documents": documents
}
except Exception as e:
logger.error(f"Error during web research: {str(e)}", exc_info=True)
return {
"feedback": f"Error during web research: {str(e)}",
"documents": []
}
async def _createResearchPlan(self, prompt: str) -> Dict[str, Any]:
"""
Use AI to create a detailed research plan.
Args:
prompt: The research query
Returns:
Research plan dictionary
"""
researchPrompt = f"""
Create a detailed web research plan for this task: "{prompt}"
Analyze the request carefully and create a structured plan in JSON format with the following elements:
{{
"requiresWebResearch": true/false, # Whether this genuinely requires web research
"researchQuestions": ["question1", "question2", ...], # 2-4 specific questions to answer
"searchTerms": ["term1", "term2", ...], # Up to {self.maxSearchTerms} effective search terms
"directUrls": ["url1", "url2", ...], # Any URLs directly mentioned in the request (up to {self.maxUrl})
"expectedSources": ["type1", "type2", ...], # Types of sources that would be most valuable
"contentFocus": "what specific content to extract or focus on",
"feedback": "explanation of how the research will be conducted"
}}
Respond with ONLY the JSON object, no additional text or explanations.
"""
try:
# Get research plan from AI
response = await self.service.base.callAi([
{"role": "system", "content": "You are a research expert. Respond with valid JSON only."},
{"role": "user", "content": researchPrompt}
])
# Extract JSON
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
plan = json.loads(response[jsonStart:jsonEnd])
# Ensure we have the expected fields with defaults if missing
if "searchTerms" not in plan:
plan["searchTerms"] = [prompt]
if "directUrls" not in plan:
plan["directUrls"] = []
if "researchQuestions" not in plan:
plan["researchQuestions"] = ["What information can be found about this topic?"]
return plan
else:
# Fallback plan
logger.warning(f"Not able creating research plan, generating fallback plan")
return {
"requiresWebResearch": True,
"researchQuestions": ["What information can be found about this topic?"],
"searchTerms": [prompt],
"directUrls": [],
"expectedSources": ["Web pages", "Articles"],
"contentFocus": "Relevant information about the topic",
"feedback": f"I'll conduct web research on '{prompt}' and gather relevant information."
}
except Exception as e:
logger.warning(f"Error creating research plan: {str(e)}")
# Simple fallback plan
return {
"requiresWebResearch": True,
"researchQuestions": ["What information can be found about this topic?"],
"searchTerms": [prompt],
"directUrls": [],
"expectedSources": ["Web pages", "Articles"],
"contentFocus": "Relevant information about the topic",
"feedback": f"I'll conduct web research on '{prompt}' and gather relevant information."
}
async def _gatherResearchMaterial(self, researchPlan: Dict[str, Any], workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Gather research material based on the research plan.
Args:
researchPlan: Research plan dictionary
workflow: Current workflow object
Returns:
List of research results
"""
allResults = []
# Process direct URLs
directUrls = researchPlan.get("directUrls", [])[:self.maxUrl]
for i, url in enumerate(directUrls):
progress = 45 + int((i / len(directUrls)) * 5) # Progress from 45% to 50%
self.service.logAdd(workflow, f"Processing direct URL {i+1}/{len(directUrls)}...", level="info", progress=progress)
logger.info(f"Processing direct URL: {url}")
try:
# Fetch and extract content
soup = self._readUrl(url)
if soup:
# Extract title and content
title = self._extractTitle(soup, url)
content = self._extractMainContent(soup)
# Add to results
allResults.append({
"title": title,
"url": url,
"sourceType": "directUrl",
"content": content,
"summary": "" # Will be filled later
})
except Exception as e:
logger.warning(f"Error processing URL {url}: {str(e)}")
# Process search terms
searchTerms = researchPlan.get("searchTerms", [])[:self.maxSearchTerms]
for i, term in enumerate(searchTerms):
progress = 50 + int((i / len(searchTerms)) * 5) # Progress from 50% to 55%
self.service.logAdd(workflow, f"Searching term {i+1}/{len(searchTerms)}...", level="info", progress=progress)
logger.info(f"Searching for: {term}")
try:
# Perform search
searchResults = self._searchWeb(term)
# Process each search result
for result in searchResults:
# Check if URL is already in results
if not any(r["url"] == result["url"] for r in allResults):
allResults.append({
"title": result["title"],
"url": result["url"],
"sourceType": "searchResult",
"content": result["data"],
"snippet": result["snippet"],
"summary": "" # Will be filled later
})
# Stop if we've reached the maximum results
if len(allResults) >= self.maxResults:
break
except Exception as e:
logger.warning(f"Error searching for {term}: {str(e)}")
# Stop if we've reached the maximum results
if len(allResults) >= self.maxResults:
break
# Create summaries for all results
allResults = await self._summarizeAllResults(allResults, researchPlan)
return allResults
async def _summarizeAllResults(self, results: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Create summaries for all research results.
Args:
results: List of research results
researchPlan: Research plan with questions and focus
Returns:
Results with added summaries
"""
for i, result in enumerate(results):
logger.info(f"Summarizing result {i+1}/{len(results)}: {result['title'][:30]}...")
try:
# Limit content length to avoid token issues
content = self._limitText(result.get("content", ""), maxChars=8000)
researchQuestions = researchPlan.get("researchQuestions", ["What relevant information does this page contain?"])
contentFocus = researchPlan.get("contentFocus", "Relevant information")
# Create summary using AI
summaryPrompt = f"""
Summarize this web page content based on these research questions:
{', '.join(researchQuestions)}
Focus on: {contentFocus}
Web page: {result['url']}
Title: {result['title']}
Content:
{content}
Create a concise summary that:
1. Directly answers the research questions if possible
2. Extracts the most relevant information from the page
3. Includes specific facts, figures, or quotes if available
4. Is around 2000 characters long
Only include information actually found in the content. No fabrications or assumptions.
"""
# Get summary from AI
summary = await self.service.base.callAi([
{"role": "system", "content": "You are a research expert. Respond with valid JSON only."},
{"role": "user", "content": summaryPrompt}
])
# Add summary to result
result["summary"] = summary.strip()
except Exception as e:
logger.warning(f"Error summarizing result {i+1}: {str(e)}")
result["summary"] = f"Error creating summary: {str(e)}"
return results
async def _createOutputDocuments(self, prompt: str, results: List[Dict[str, Any]],
outputSpecs: List[Dict[str, Any]], researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Create output documents based on research results and specifications.
Args:
prompt: Original research prompt
results: List of research results
outputSpecs: Output specifications
researchPlan: Research plan
Returns:
List of output documents
"""
# If no output specs provided, create default output
if not outputSpecs:
outputSpecs = [{
"label": "webResearchResults.md",
"description": "Comprehensive web research results"
}]
# Generate documents
documents = []
# Process each output specification
for spec in outputSpecs:
outputLabel = spec.get("label", "")
outputDescription = spec.get("description", "")
# Determine format based on file extension
formatType = self._determineFormatType(outputLabel)
# Create appropriate document based on format
if formatType == "json":
# JSON output - structured data
document = await self._createJsonDocument(prompt, results, researchPlan, outputLabel)
elif formatType == "csv":
# CSV output - tabular data
document = await self._createCsvDocument(results, outputLabel)
else:
# Text-based output (markdown, html, text) - narrative report
document = await self._createNarrativeDocument(
prompt, results, researchPlan, formatType, outputLabel, outputDescription
)
documents.append(document)
return documents
async def _createNarrativeDocument(self, prompt: str, results: List[Dict[str, Any]],
researchPlan: Dict[str, Any], formatType: str,
outputLabel: str, outputDescription: str) -> Dict[str, Any]:
"""
Create a narrative document (markdown, html, text) from research results.
Args:
prompt: Original research prompt
results: Research results
researchPlan: Research plan
formatType: Output format (markdown, html, text)
outputLabel: Output filename
outputDescription: Output description
Returns:
Document object
"""
# Create content based on format
if formatType == "markdown":
contentType = "text/markdown"
templateFormat = "markdown"
elif formatType == "html":
contentType = "text/html"
templateFormat = "html"
else:
contentType = "text/plain"
templateFormat = "text"
# Prepare research context
researchQuestions = researchPlan.get("researchQuestions", [])
searchTerms = researchPlan.get("searchTerms", [])
# Create document structure based on results
sourcesSummary = []
for result in results:
sourcesSummary.append({
"title": result.get("title", "Untitled"),
"url": result.get("url", ""),
"summary": result.get("summary", ""),
"snippet": result.get("snippet", "")
})
# Truncate content for prompt
sourcesJson = json.dumps(sourcesSummary, indent=2)
if len(sourcesJson) > 10000:
# Logic to truncate each summary while preserving structure
for i in range(len(sourcesSummary)):
if len(sourcesJson) <= 10000:
break
# Gradually truncate summaries
sourcesSummary[i]["summary"] = sourcesSummary[i]["summary"][:500] + "..."
sourcesJson = json.dumps(sourcesSummary, indent=2)
# Create report prompt
reportPrompt = f"""
Create a comprehensive {formatType} research report based on the following web research:
TASK: {prompt}
RESEARCH QUESTIONS:
{', '.join(researchQuestions)}
SEARCH TERMS USED:
{', '.join(searchTerms)}
SOURCES AND FINDINGS:
{sourcesJson}
REPORT DETAILS:
- Format: {templateFormat}
- Filename: {outputLabel}
- Description: {outputDescription}
Create a well-structured report that:
1. Includes an executive summary of key findings
2. Addresses each research question directly
3. Integrates information from all relevant sources
4. Cites sources appropriately for each piece of information
5. Provides a comprehensive synthesis of the research
6. Is formatted professionally and appropriately for {templateFormat}
The report should be scholarly, accurate, and focused on the original research task.
"""
try:
# Generate report with AI
reportContent = await self.service.base.callAi([
{"role": "system", "content": "You are a research expert. Respond with valid JSON only."},
{"role": "user", "content": reportPrompt}
])
# Convert to HTML if needed
if formatType == "html" and not reportContent.lower().startswith("<html"):
# Check if it's markdown that needs conversion
if reportContent.startswith("#"):
reportContent = markdown.markdown(reportContent)
# Wrap in basic HTML structure if needed
if not reportContent.lower().startswith("<html"):
reportContent = f"<html><head><title>Web Research Results</title></head><body>{reportContent}</body></html>"
return self.formatAgentDocumentOutput(outputLabel, reportContent, contentType)
except Exception as e:
logger.error(f"Error creating narrative document: {str(e)}")
# Create error document
if formatType == "markdown":
content = f"# Web Research Error\n\nAn error occurred: {str(e)}"
elif formatType == "html":
content = f"<html><body><h1>Web Research Error</h1><p>An error occurred: {str(e)}</p></body></html>"
else:
content = f"WEB RESEARCH ERROR\n\nAn error occurred: {str(e)}"
return self.formatAgentDocumentOutput(outputLabel, content, contentType)
async def _createJsonDocument(self, prompt: str, results: List[Dict[str, Any]],
researchPlan: Dict[str, Any], outputLabel: str) -> Dict[str, Any]:
"""
Create a JSON document from research results.
Args:
prompt: Original research prompt
results: Research results
researchPlan: Research plan
outputLabel: Output filename
Returns:
Document object
"""
try:
# Create structured data
sourcesData = []
for result in results:
sourcesData.append({
"title": result.get("title", "Untitled"),
"url": result.get("url", ""),
"summary": result.get("summary", ""),
"snippet": result.get("snippet", ""),
"sourceType": result.get("sourceType", "")
})
# Create metadata
metadata = {
"query": prompt,
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
"researchQuestions": researchPlan.get("researchQuestions", []),
"searchTerms": researchPlan.get("searchTerms", [])
}
# Compile complete report object
jsonContent = {
"metadata": metadata,
"summary": researchPlan.get("feedback", "Web research results"),
"sources": sourcesData
}
# Convert to JSON string
content = json.dumps(jsonContent, indent=2)
return self.formatAgentDocumentOutput(outputLabel, content, "application/json")
except Exception as e:
logger.error(f"Error creating JSON document: {str(e)}")
return self.formatAgentDocumentOutput(outputLabel, json.dumps({"error": str(e)}), "application/json")
async def _createCsvDocument(self, results: List[Dict[str, Any]], outputLabel: str) -> Dict[str, Any]:
"""
Create a CSV document from research results.
Args:
results: Research results
outputLabel: Output filename
Returns:
Document object
"""
try:
# Create CSV header
csvLines = ["Title,URL,Source Type,Snippet"]
# Add results
for result in results:
# Escape CSV fields
title = result.get("title", "").replace('"', '""')
url = result.get("url", "").replace('"', '""')
sourceType = result.get("sourceType", "").replace('"', '""')
snippet = result.get("snippet", "").replace('"', '""')
csvLines.append(f'"{title}","{url}","{sourceType}","{snippet}"')
# Combine into CSV content
content = "\n".join(csvLines)
return self.formatAgentDocumentOutput(outputLabel, content, "text/csv")
except Exception as e:
logger.error(f"Error creating CSV document: {str(e)}")
return self.formatAgentDocumentOutput(outputLabel, "Error,Error\nFailed to create CSV,{0}".format(str(e)), "text/csv")
def _determineFormatType(self, outputLabel: str) -> str:
"""
Determine the format type based on the filename.
Args:
outputLabel: Output filename
Returns:
Format type (markdown, html, text, json, csv)
"""
outputLabelLower = outputLabel.lower()
if outputLabelLower.endswith(".md"):
return "markdown"
elif outputLabelLower.endswith(".html"):
return "html"
elif outputLabelLower.endswith(".txt"):
return "text"
elif outputLabelLower.endswith(".json"):
return "json"
elif outputLabelLower.endswith(".csv"):
return "csv"
else:
# Default to markdown
return "markdown"
def _searchWeb(self, query: str) -> List[Dict[str, str]]:
"""
Conduct a web search using SerpAPI and return the results.
Args:
query: The search query
Returns:
List of search results
"""
if not self.srcApikey:
return []
# Get user language from serviceBase if available
userLanguage = "en" # Default language
if self.service.base.userLanguage:
userLanguage = self.service.base.userLanguage
try:
# Format the search request for SerpAPI
params = {
"engine": self.srcEngine,
"q": query,
"api_key": self.srcApikey,
"num": self.maxResults, # Number of results to return
"hl": userLanguage # Identified user language
}
# Make the API request
response = requests.get("https://serpapi.com/search", params=params, timeout=self.timeout)
response.raise_for_status()
# Parse JSON response
search_results = response.json()
# Extract organic results
results = []
if "organic_results" in search_results:
for result in search_results["organic_results"][:self.maxResults]:
# Extract title
title = result.get("title", "No title")
# Extract URL
url = result.get("link", "No URL")
# Extract snippet
snippet = result.get("snippet", "No description")
# Get actual page content
try:
targetPageSoup = self._readUrl(url)
content = self._extractMainContent(targetPageSoup)
except Exception as e:
logger.warning(f"Error extracting content from {url}: {str(e)}")
content = f"Error extracting content: {str(e)}"
results.append({
'title': title,
'url': url,
'snippet': snippet,
'data': content
})
# Limit number of results
if len(results) >= self.maxResults:
break
else:
logger.warning(f"No organic results found in SerpAPI response for: {query}")
return results
except Exception as e:
logger.error(f"Error searching with SerpAPI for {query}: {str(e)}")
return []
def _readUrl(self, url: str) -> BeautifulSoup:
"""
Read a URL and return a BeautifulSoup parser for the content.
Args:
url: The URL to read
Returns:
BeautifulSoup object with the content or None on errors
"""
if not url or not url.startswith(('http://', 'https://')):
return None
headers = {
'User-Agent': self.userAgent,
'Accept': 'text/html,application/xhtml+xml,application/xml',
'Accept-Language': 'en-US,en;q=0.9',
}
try:
# Initial request
response = requests.get(url, headers=headers, timeout=self.timeout)
# Handling for status 202
if response.status_code == 202:
# Retry with backoff
backoffTimes = [0.5, 1.0, 2.0, 5.0]
for waitTime in backoffTimes:
time.sleep(waitTime)
response = requests.get(url, headers=headers, timeout=self.timeout)
if response.status_code != 202:
break
# Raise for error status codes
response.raise_for_status()
# Parse HTML
return BeautifulSoup(response.text, 'html.parser')
except Exception as e:
logger.error(f"Error reading URL {url}: {str(e)}")
return None
def _extractTitle(self, soup: BeautifulSoup, url: str) -> str:
"""
Extract the title from a webpage.
Args:
soup: BeautifulSoup object of the webpage
url: URL of the webpage
Returns:
Extracted title
"""
if not soup:
return f"Error with {url}"
# Extract title from title tag
titleTag = soup.find('title')
title = titleTag.text.strip() if titleTag else "No title"
# Alternative: Also look for h1 tags if title tag is missing
if title == "No title":
h1Tag = soup.find('h1')
if h1Tag:
title = h1Tag.text.strip()
return title
def _extractMainContent(self, soup: BeautifulSoup, maxChars: int = 10000) -> str:
"""
Extract the main content from an HTML page.
Args:
soup: BeautifulSoup object of the webpage
maxChars: Maximum number of characters
Returns:
Extracted main content as a string
"""
if not soup:
return ""
# Try to find main content elements in priority order
mainContent = None
for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
content = soup.select_one(selector)
if content:
mainContent = content
break
# If no main content found, use the body
if not mainContent:
mainContent = soup.find('body') or soup
# Remove script, style, nav, footer elements that don't contribute to main content
for element in mainContent.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
element.extract()
# Extract text content
textContent = mainContent.get_text(separator=' ', strip=True)
# Limit to maxChars
return textContent[:maxChars]
def _limitText(self, text: str, maxChars: int = 10000) -> str:
"""
Limit text to a maximum number of characters.
Args:
text: Input text
maxChars: Maximum number of characters
Returns:
Limited text
"""
if not text:
return ""
# If text is already under the limit, return unchanged
if len(text) <= maxChars:
return text
# Otherwise limit text to maxChars
return text[:maxChars] + "... [Content truncated due to length]"
# Factory function for the Webcrawler agent
def getAgentWebcrawler():
"""Returns an instance of the Webcrawler agent."""
return AgentWebcrawler()

View file

@ -6,7 +6,7 @@ Uses the JSON connector for data access with added language support.
import os
import logging
import uuid
from datetime import datetime
from datetime import datetime, UTC
from typing import Dict, Any, List, Optional, Union
import asyncio
@ -327,6 +327,11 @@ class ChatObjects:
publishedAt=createdMessage.get("publishedAt", self._getCurrentTimestamp()),
stats=ChatStat(**createdMessage.get("stats", {})) if createdMessage.get("stats") else None
)
# Update workflow stats for message creation (estimate bytes for message)
message_size = len(createdMessage.get("message", "")) + sum(len(doc.get("filename", "")) for doc in createdMessage.get("documents", []))
self.updateWorkflowStats(workflowId, bytesSent=0, bytesReceived=message_size)
except Exception as e:
logger.error(f"Error creating workflow message: {str(e)}")
return None
@ -535,6 +540,64 @@ class ChatObjects:
# Get logs for this workflow
return [ChatLog(**log) for log in self.db.getRecordset("workflowLogs", recordFilter={"workflowId": workflowId})]
def updateWorkflowStats(self, workflowId: str, bytesSent: int = 0, bytesReceived: int = 0) -> bool:
"""Updates workflow statistics during execution with incremental values."""
try:
# Get current workflow
workflow = self.getWorkflow(workflowId)
if not workflow:
logger.error(f"Workflow {workflowId} not found for stats update")
return False
if not self._canModify("workflows", workflowId):
logger.error(f"No permission to update workflow {workflowId} stats")
return False
# Get current stats
currentStats = workflow.stats.dict() if workflow.stats else {
"bytesSent": 0,
"bytesReceived": 0,
"tokenCount": 0,
"processingTime": 0
}
# Calculate processing time from workflow start
workflow_start = datetime.fromisoformat(workflow.startedAt.replace('Z', '+00:00'))
current_time = datetime.now(UTC)
processing_time = (current_time - workflow_start).total_seconds()
# Update stats with incremental values
currentStats["bytesSent"] = currentStats.get("bytesSent", 0) + bytesSent
currentStats["bytesReceived"] = currentStats.get("bytesReceived", 0) + bytesReceived
currentStats["tokenCount"] = currentStats["bytesSent"] + currentStats["bytesReceived"]
currentStats["processingTime"] = processing_time
# Update workflow in database
self.db.recordModify("workflows", workflowId, {
"dataStats": currentStats
})
# Log to stats table
stats_record = {
"timestamp": self._getCurrentTimestamp(),
"workflowId": workflowId,
"bytesSent": bytesSent,
"bytesReceived": bytesReceived,
"tokenCount": bytesSent + bytesReceived,
"processingTime": processing_time
}
# Create stats record in database
self.db.recordCreate("stats", stats_record)
logger.debug(f"Updated workflow {workflowId} stats: {currentStats}")
logger.debug(f"Logged stats record: {stats_record}")
return True
except Exception as e:
logger.error(f"Error updating workflow stats: {str(e)}")
return False
def createWorkflowLog(self, logData: Dict[str, Any]) -> ChatLog:
"""Creates a log entry for a workflow if user has access."""
# Check workflow access
@ -777,14 +840,7 @@ class ChatObjects:
# Create workflow
workflow = self.createWorkflow(workflowData)
# Add log entry
self.createWorkflowLog({
"workflowId": workflow.id,
"message": "Workflow started",
"type": "info",
"status": "running",
"progress": 0
})
# Remove the 'Workflow started' log entry
# Start workflow processing
from modules.workflow.managerWorkflow import WorkflowManager

View file

@ -10,9 +10,9 @@ logger = logging.getLogger(__name__)
class MethodCoder(MethodBase):
"""Coder method implementation for code operations"""
def __init__(self, serviceContainer: Any):
def __init__(self, serviceCenter: Any):
"""Initialize the coder method"""
super().__init__(serviceContainer)
super().__init__(serviceCenter)
self.name = "coder"
self.description = "Handle code operations like analysis, generation, and refactoring"
@ -87,7 +87,18 @@ class MethodCoder(MethodBase):
)
# Extract text content from ExtractedContent objects
text_contents = self.service.extractTextFromContentObjects(all_code_content)
text_contents = []
for content_obj in all_code_content:
if hasattr(content_obj, 'contents') and content_obj.contents:
# Extract text from ContentItem objects
for content_item in content_obj.contents:
if hasattr(content_item, 'data') and content_item.data:
text_contents.append(content_item.data)
elif isinstance(content_obj, str):
text_contents.append(content_obj)
else:
# Fallback: convert to string representation
text_contents.append(str(content_obj))
# Combine all extracted text content for analysis
combined_content = "\n\n--- CODE SEPARATOR ---\n\n".join(text_contents)

View file

@ -8,7 +8,6 @@ from typing import Dict, Any, List, Optional
import uuid
from datetime import datetime, UTC
from modules.workflow.managerDocument import DocumentManager
from modules.workflow.methodBase import MethodBase, ActionResult, action
logger = logging.getLogger(__name__)
@ -16,12 +15,11 @@ logger = logging.getLogger(__name__)
class MethodDocument(MethodBase):
"""Document method implementation for document operations"""
def __init__(self, serviceContainer: Any):
def __init__(self, serviceCenter: Any):
"""Initialize the document method"""
super().__init__(serviceContainer)
super().__init__(serviceCenter)
self.name = "document"
self.description = "Handle document operations like extraction and analysis"
self.documentManager = DocumentManager(serviceContainer)
@action
async def extract(self, parameters: Dict[str, Any]) -> ActionResult:
@ -94,7 +92,18 @@ class MethodDocument(MethodBase):
)
# Extract text content from ExtractedContent objects
text_contents = self.service.extractTextFromContentObjects(all_extracted_content)
text_contents = []
for content_obj in all_extracted_content:
if hasattr(content_obj, 'contents') and content_obj.contents:
# Extract text from ContentItem objects
for content_item in content_obj.contents:
if hasattr(content_item, 'data') and content_item.data:
text_contents.append(content_item.data)
elif isinstance(content_obj, str):
text_contents.append(content_obj)
else:
# Fallback: convert to string representation
text_contents.append(str(content_obj))
# Combine all extracted text content
combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join(text_contents)

View file

@ -16,9 +16,9 @@ logger = logging.getLogger(__name__)
class MethodOutlook(MethodBase):
"""Outlook method implementation for email operations"""
def __init__(self, serviceContainer: Any):
def __init__(self, serviceCenter: Any):
"""Initialize the Outlook method"""
super().__init__(serviceContainer)
super().__init__(serviceCenter)
self.name = "outlook"
self.description = "Handle Microsoft Outlook email operations"

View file

@ -16,8 +16,8 @@ logger = logging.getLogger(__name__)
class MethodSharepoint(MethodBase):
"""SharePoint method implementation for document operations"""
def __init__(self, serviceContainer: Any):
super().__init__(serviceContainer)
def __init__(self, serviceCenter: Any):
super().__init__(serviceCenter)
self.name = "sharepoint"
self.description = "Handle Microsoft SharePoint document operations"

View file

@ -19,9 +19,9 @@ logger = logging.getLogger(__name__)
class MethodWeb(MethodBase):
"""Web method implementation for web operations"""
def __init__(self, serviceContainer: Any):
def __init__(self, serviceCenter: Any):
"""Initialize the web method"""
super().__init__(serviceContainer)
super().__init__(serviceCenter)
self.name = "web"
self.description = "Handle web operations like crawling and scraping"
@ -452,7 +452,7 @@ class MethodWeb(MethodBase):
"query": query
}
else:
# Get user language from service container if available
# Get user language from service center if available
userLanguage = "en" # Default language
if hasattr(self.service, 'user') and hasattr(self.service.user, 'language'):
userLanguage = self.service.user.language

View file

@ -176,7 +176,7 @@ async def get_workflow_status(
) -> ChatWorkflow:
"""Get the current status of a workflow."""
try:
# Get service container
# Get service center
interfaceChat = getServiceChat(currentUser)
# Retrieve workflow
@ -208,7 +208,7 @@ async def get_workflow_logs(
) -> List[ChatLog]:
"""Get logs for a workflow with support for selective data transfer."""
try:
# Get service container
# Get service center
interfaceChat = getServiceChat(currentUser)
# Verify workflow exists
@ -251,7 +251,7 @@ async def get_workflow_messages(
) -> List[ChatMessage]:
"""Get messages for a workflow with support for selective data transfer."""
try:
# Get service container
# Get service center
interfaceChat = getServiceChat(currentUser)
# Verify workflow exists
@ -297,7 +297,7 @@ async def start_workflow(
Corresponds to State 1 in the state machine documentation.
"""
try:
# Get service container
# Get service center
interfaceChat = getServiceChat(currentUser)
# Start or continue workflow using ChatObjects
@ -322,7 +322,7 @@ async def stop_workflow(
) -> ChatWorkflow:
"""Stops a running workflow."""
try:
# Get service container
# Get service center
interfaceChat = getServiceChat(currentUser)
# Stop workflow using ChatObjects
@ -347,7 +347,7 @@ async def delete_workflow(
) -> Dict[str, Any]:
"""Deletes a workflow and its associated data."""
try:
# Get service container
# Get service center
interfaceChat = getServiceChat(currentUser)
# Get raw workflow data from database to check permissions
@ -402,7 +402,7 @@ async def delete_workflow_message(
) -> Dict[str, Any]:
"""Delete a message from a workflow."""
try:
# Get service container
# Get service center
interfaceChat = getServiceChat(currentUser)
# Verify workflow exists
@ -453,7 +453,7 @@ async def delete_file_from_message(
) -> Dict[str, Any]:
"""Delete a file reference from a message in a workflow."""
try:
# Get service container
# Get service center
interfaceChat = getServiceChat(currentUser)
# Verify workflow exists

View file

@ -2,6 +2,7 @@ import asyncio
import logging
import uuid
import json
import time
from typing import Dict, Any, Optional, List, Union
from datetime import datetime, UTC
@ -9,7 +10,7 @@ from modules.interfaces.interfaceAppModel import User
from modules.interfaces.interfaceChatModel import (
TaskStatus, ChatDocument, TaskItem, TaskAction, TaskResult, ChatStat, ChatLog, ChatMessage, ChatWorkflow
)
from modules.workflow.serviceContainer import ServiceContainer
from modules.workflow.serviceCenter import ServiceCenter
from modules.interfaces.interfaceChatObjects import ChatObjects
logger = logging.getLogger(__name__)
@ -20,7 +21,7 @@ class ChatManager:
def __init__(self, currentUser: User, chatInterface: ChatObjects):
self.currentUser = currentUser
self.chatInterface = chatInterface
self.service: ServiceContainer = None
self.service: ServiceCenter = None
self.workflow: ChatWorkflow = None
# Circuit breaker for AI calls
@ -37,7 +38,7 @@ class ChatManager:
async def initialize(self, workflow: ChatWorkflow) -> None:
"""Initialize chat manager with workflow"""
self.workflow = workflow
self.service = ServiceContainer(self.currentUser, self.workflow)
self.service = ServiceCenter(self.currentUser, self.workflow)
# ===== WORKFLOW PHASES =====
@ -119,6 +120,12 @@ class ChatManager:
task_actions.append(task_action)
logger.info(f"Created task action: {task_action.execMethod}.{task_action.execAction}")
# Update stats for task validation (estimate bytes for action validation)
if task_actions:
# Calculate actual action size for stats
action_size = self.service.calculateObjectSize(task_actions)
self.service.updateWorkflowStats(eventLabel="action", bytesSent=action_size)
logger.info(f"Task action definition completed: {len(task_actions)} actions")
return task_actions
@ -265,6 +272,7 @@ class ChatManager:
async def processFileIds(self, fileIds: List[str]) -> List[ChatDocument]:
"""Process file IDs and return ChatDocument objects"""
documents = []
for fileId in fileIds:
try:
# Ensure service is initialized
@ -290,6 +298,8 @@ class ChatManager:
logger.warning(f"No file info found for file ID {fileId}")
except Exception as e:
logger.error(f"Error processing file ID {fileId}: {str(e)}")
return documents
def setUserLanguage(self, language: str) -> None:
@ -768,7 +778,8 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
'documents_metadata': documents_metadata,
'actionId': action_result.get('actionId', ''),
'actionMethod': action_result.get('actionMethod', ''),
'actionName': action_result.get('actionName', '')
'actionName': action_result.get('actionName', ''),
'success_indicator': 'documents' if len(documents_metadata) > 0 else 'text_result' if action_result.get('result', '').strip() else 'none'
}
step_result_serializable['action_results'].append(serializable_action_result)
@ -787,6 +798,13 @@ INSTRUCTIONS:
4. Decide on next action: continue, retry, or fail
5. If retry, provide specific improvements needed
IMPORTANT NOTES:
- Actions can produce either text results OR documents (or both)
- Empty result_summary is acceptable if documents were produced (documents_count > 0)
- Focus on whether the action achieved its intended purpose, not just text output
- Document-based actions (like file extractions) often have empty text results but successful document outputs
- Check the 'success_indicator' field: 'documents' means success via document output, 'text_result' means success via text, 'none' means no output
REQUIRED JSON STRUCTURE:
{{
"status": "success|retry|failed",
@ -829,7 +847,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
async def _executeSingleAction(self, action: TaskAction, workflow: ChatWorkflow) -> Dict[str, Any]:
"""Execute a single action and return result with enhanced document processing"""
try:
# Execute the actual method action using the service container
# Execute the actual method action using the service center
result = await self.service.executeAction(
methodName=action.execMethod,
actionName=action.execAction,
@ -943,7 +961,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
message_data = {
"workflowId": workflow.id,
"role": "assistant",
"message": f"Executed {action.execMethod}.{action.execAction} successfully",
"message": f"Executed action {action.execMethod}.{action.execAction}",
"status": "step",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": datetime.now(UTC).isoformat(),
@ -979,7 +997,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
file_size = len(str(doc_data))
mime_type = "application/octet-stream"
# Enhanced MIME type detection using service container
# Enhanced MIME type detection using service center
if mime_type == "application/octet-stream":
mime_type = self._detectMimeTypeFromContent(document_data, document_name)
@ -1045,7 +1063,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
def _detectMimeTypeFromContent(self, content: Any, filename: str) -> str:
"""
Detect MIME type from content and filename using service container.
Detect MIME type from content and filename using service center.
Only returns a detected MIME type if it's better than application/octet-stream.
Args:
@ -1065,7 +1083,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
else:
file_bytes = str(content).encode('utf-8')
# Use service container's MIME type detection
# Use service center's MIME type detection
detected_mime_type = self.service.detectContentTypeFromData(file_bytes, filename)
if detected_mime_type != "application/octet-stream":
return detected_mime_type
@ -1076,7 +1094,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
def _detectMimeTypeFromDocument(self, document: Any, filename: str) -> str:
"""
Detect MIME type from document object using service container.
Detect MIME type from document object using service center.
Only returns a detected MIME type if it's better than application/octet-stream.
Args:
@ -1094,7 +1112,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
else:
file_bytes = str(content).encode('utf-8')
# Use service container's MIME type detection
# Use service center's MIME type detection
detected_mime_type = self.service.detectContentTypeFromData(file_bytes, filename)
if detected_mime_type != "application/octet-stream":
return detected_mime_type
@ -1222,8 +1240,11 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
action_results = review_context.get('action_results', [])
if action_results:
# Check for common issues that warrant retry
# Only consider empty results a problem if there are no documents produced
has_empty_results = any(
not result.get('result', '').strip()
not result.get('result', '').strip() and
not result.get('documents', []) and
not result.get('documents_metadata', [])
for result in action_results
if result.get('status') == 'completed'
)
@ -1417,7 +1438,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
assistant_messages = [msg for msg in workflow.messages if msg.role == 'assistant']
# Generate summary feedback
feedback = f"Workflow completed successfully.\n\n"
feedback = f"Workflow completed.\n\n"
feedback += f"Processed {len(user_messages)} user inputs and generated {len(assistant_messages)} responses.\n"
# Add final status
@ -1437,36 +1458,38 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
# ===== UNIFIED WORKFLOW EXECUTION =====
async def executeUnifiedWorkflow(self, userInput: str, workflow: ChatWorkflow) -> Dict[str, Any]:
"""Execute workflow using the new unified phases with retry logic"""
"""Execute a unified workflow with all phases"""
try:
logger.info(f"Starting unified workflow execution for workflow {workflow.id}")
start_time = time.time()
# Create user-friendly progress log
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": "Starting workflow analysis and planning",
"type": "info",
"status": "running",
"progress": 5,
"agentName": "System"
})
# Initialize chat manager with workflow
await self.initialize(workflow)
# Process file IDs if provided
documents = []
if hasattr(userInput, 'listFileId') and userInput.listFileId:
documents = await self.processFileIds(userInput.listFileId)
logger.info(f"Processed {len(documents)} documents")
# Calculate and update user input stats
user_input_size = self.service.calculateUserInputSize(userInput)
self.service.updateWorkflowStats(eventLabel="userinput", bytesReceived=user_input_size)
# Phase 1: High-Level Task Planning
logger.info("=== PHASE 1: HIGH-LEVEL TASK PLANNING ===")
task_plan = await self.planHighLevelTasks(userInput, workflow)
if not task_plan or not task_plan.get('tasks'):
logger.error("Failed to create task plan")
return {
'status': 'failed',
'error': 'Failed to create task plan',
'phase': 'planning'
}
logger.info("--- PHASE 1: HIGH-LEVEL TASK PLANNING ---")
task_plan = await self.planHighLevelTasks(userInput.prompt, workflow)
# Update stats for task planning
task_plan_size = self.service.calculateObjectSize(task_plan)
self.service.updateWorkflowStats(eventLabel="taskplan", bytesSent=task_plan_size)
# Create user-friendly task plan log
tasks_count = len(task_plan.get('tasks', []))
task_descriptions = "\n".join([f"- {task.get('description', 'No description')}" for task in task_plan.get('tasks', [])])
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"Planning completed: {tasks_count} tasks identified",
"message": f"Planning completed: {tasks_count} tasks identified\n{task_descriptions}",
"type": "info",
"status": "running",
"progress": 15,
@ -1598,22 +1621,29 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
logger.debug(f"TASK {i+1} ACTIONS CREATED: {json.dumps(task_actions_serializable, indent=2, ensure_ascii=False)}")
# Phase 3: Execute Task Actions
logger.info(f"--- PHASE 3: EXECUTING ACTIONS FOR TASK {i+1} ---")
logger.info(f"--- PHASE 3: EXECUTING TASK {i+1} ACTIONS ---")
action_results = await self.executeTaskActions(task_actions, workflow)
# Update stats for action execution
# Action stats are already handled by the service center during AI calls
# Create user-friendly action completion log with quality metrics
successful_actions = sum(1 for result in action_results if result.get('status') == 'completed')
total_actions = len(action_results)
if total_actions > 0:
quality_percentage = (successful_actions / total_actions) * 100
if successful_actions == total_actions:
log_type = "success"
elif successful_actions == 0:
log_type = "error"
else:
log_type = "warning"
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"Task {i+1} actions completed: {successful_actions}/{total_actions} successful ({quality_percentage:.0f}% quality)",
"type": "success" if quality_percentage >= 80 else "warning" if quality_percentage >= 60 else "error",
"message": f"Successful actions: {successful_actions}/{total_actions}",
"type": log_type,
"status": "running",
"progress": progress + 10,
"agentName": "System"
"progress": progress + 10
})
# Log action results (with metadata only)
@ -1653,6 +1683,9 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
logger.info(f"--- PHASE 4: REVIEWING TASK {i+1} COMPLETION ---")
review_result = await self.reviewTaskCompletion(task_step, task_actions, action_results, workflow)
# Update stats for task review
# Task review stats are already handled by the service center during AI calls
# Create user-friendly review log with quality metrics
quality_metrics = review_result.get('quality_metrics', {})
quality_score = quality_metrics.get('score', 0)
@ -1662,29 +1695,62 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
if review_status == 'success':
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"Task {i+1} completed successfully (Quality: {quality_score:.0f}%, Confidence: {confidence:.0f}%)",
"message": f"🎯 Task completed successfully with quality score {quality_score} and confidence {confidence}",
"type": "success",
"status": "running",
"progress": progress + 20,
"agentName": "System"
"progress": progress + 20
})
elif review_status == 'retry':
# Extract improvement details
improvements = review_result.get('improvements', '')
reason = review_result.get('reason', '')
unmet_criteria = review_result.get('unmet_criteria', [])
# Build detailed message
retry_details = []
if reason:
retry_details.append(f"Reason: {reason}")
if improvements:
retry_details.append(f"Improvements: {improvements}")
if unmet_criteria:
retry_details.append(f"Missing criteria: {', '.join(unmet_criteria[:3])}{'...' if len(unmet_criteria) > 3 else ''}")
retry_message = f"🔄 Task needs improvement"
if retry_details:
retry_message += f"\n{chr(10).join(retry_details)}"
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"Task {i+1} needs improvement (Quality: {quality_score:.0f}%, Confidence: {confidence:.0f}%)",
"message": retry_message,
"type": "warning",
"status": "running",
"progress": progress + 15,
"agentName": "System"
"progress": progress + 15
})
else:
# Extract failure details
reason = review_result.get('reason', '')
unmet_criteria = review_result.get('unmet_criteria', [])
missing_outputs = review_result.get('missing_outputs', [])
# Build detailed failure message
failure_details = []
if reason:
failure_details.append(f"Reason: {reason}")
if unmet_criteria:
failure_details.append(f"Unmet criteria: {', '.join(unmet_criteria[:3])}{'...' if len(unmet_criteria) > 3 else ''}")
if missing_outputs:
failure_details.append(f"Missing outputs: {', '.join(missing_outputs[:3])}{'...' if len(missing_outputs) > 3 else ''}")
failure_message = f"❌ Task failed"
if failure_details:
failure_message += f"\n{chr(10).join(failure_details)}"
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"Task {i+1} failed (Quality: {quality_score:.0f}%, Confidence: {confidence:.0f}%)",
"message": failure_message,
"type": "error",
"status": "running",
"progress": progress + 15,
"agentName": "System"
"progress": progress + 15
})
# Log review result (with metadata only)
@ -1724,7 +1790,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
previous_review_feedback = review_result.get('improvements', '')
retry_count += 1
if retry_count >= max_retries:
if retry_count > max_retries:
logger.error(f"Task {i+1} failed after {max_retries} retries")
task_success = False
else:
@ -1775,35 +1841,37 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
# Final workflow summary
successful_tasks = sum(1 for result in workflow_results if result.get('task_success', False))
total_tasks = len(workflow_results)
total_tasks = len(task_plan['tasks'])
# Final workflow stats are already handled by the service center during AI calls
# Calculate total processing time
total_processing_time = time.time() - start_time
# Create final user-friendly completion log
if successful_tasks == total_tasks:
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"Workflow completed successfully: {successful_tasks}/{total_tasks} tasks completed",
"message": f"🎉 Workflow completed ({successful_tasks}/{total_tasks} tasks)",
"type": "success",
"status": "completed",
"progress": 100,
"agentName": "System"
"progress": 100
})
elif successful_tasks > 0:
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"Workflow completed partially: {successful_tasks}/{total_tasks} tasks completed",
"message": f"⚠️ Workflow partially completed ({successful_tasks}/{total_tasks} tasks)",
"type": "warning",
"status": "completed",
"progress": 100,
"agentName": "System"
"progress": 100
})
else:
self.chatInterface.createWorkflowLog({
"workflowId": workflow.id,
"message": f"Workflow failed: {successful_tasks}/{total_tasks} tasks completed",
"message": f"Workflow failed ({successful_tasks}/{total_tasks} tasks)",
"type": "error",
"status": "failed",
"progress": 100,
"agentName": "System"
"progress": 100
})
# Create serializable workflow results (with metadata only)
@ -1836,7 +1904,8 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
'documents_metadata': documents_metadata,
'actionId': action_result.get('actionId', ''),
'actionMethod': action_result.get('actionMethod', ''),
'actionName': action_result.get('actionName', '')
'actionName': action_result.get('actionName', ''),
'success_indicator': 'documents' if len(documents_metadata) > 0 else 'text_result' if action_result.get('result', '').strip() else 'none'
}
action_results_metadata.append(action_result_metadata)

View file

@ -1,73 +0,0 @@
"""
Document Manager Module for handling document operations and content extraction.
"""
import logging
from modules.interfaces.interfaceChatModel import (
ChatDocument,
ExtractedContent
)
from modules.workflow.processorDocument import DocumentProcessor
logger = logging.getLogger(__name__)
class DocumentManager:
"""Manager for document operations and content extraction"""
def __init__(self, serviceContainer):
self.service = serviceContainer
# Create processor with service container for AI calls
self._processor = DocumentProcessor(serviceContainer)
async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
"""Extract content from ChatDocument using prompt"""
try:
# Extract file data from ChatDocument
if document.data:
fileData = document.data.encode('utf-8') if isinstance(document.data, str) else document.data
else:
# Try to get file data from service container if document has fileId
if hasattr(document, 'fileId') and document.fileId:
fileData = self.service.getFileData(document.fileId)
else:
logger.error(f"No file data available in document: {document}")
raise ValueError("No file data available in document")
# Get filename and mime type from document
filename = document.filename if hasattr(document, 'filename') else "document"
mimeType = document.mimeType if hasattr(document, 'mimeType') else "application/octet-stream"
# Process with processor
extractedContent = await self._processor.processFileData(
fileData=fileData,
filename=filename,
mimeType=mimeType,
base64Encoded=False,
prompt=prompt
)
# Update objectId to match document ID
extractedContent.objectId = document.id
extractedContent.objectType = "ChatDocument"
return extractedContent
except Exception as e:
logger.error(f"Error extracting from document: {str(e)}")
raise
async def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, documentId: str = None) -> ExtractedContent:
"""Extract content from file data directly using prompt"""
try:
return await self._processor.processFileData(
fileData=fileData,
filename=filename,
mimeType=mimeType,
base64Encoded=base64Encoded,
prompt=prompt,
documentId=documentId
)
except Exception as e:
logger.error(f"Error extracting from file data: {str(e)}")
raise

View file

@ -20,9 +20,9 @@ def action(func):
class MethodBase:
"""Base class for all methods"""
def __init__(self, serviceContainer: Any):
"""Initialize method with service container"""
self.service = serviceContainer
def __init__(self, serviceCenter: Any):
"""Initialize method with service center"""
self.service = serviceCenter
self.name: str
self.description: str
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")

View file

@ -32,10 +32,10 @@ class FileProcessingError(Exception):
class DocumentProcessor:
"""Processor for handling document operations and content extraction."""
def __init__(self, serviceContainer=None):
def __init__(self, serviceCenter=None):
"""Initialize the document processor."""
self._neutralizer = DataAnonymizer() if APP_CONFIG.get("ENABLE_CONTENT_NEUTRALIZATION", False) else None
self._serviceContainer = serviceContainer
self._serviceCenter = serviceCenter
self.supportedTypes: Dict[str, Callable[[bytes, str, str], Awaitable[List[ContentItem]]]] = {
'text/plain': self._processText,
@ -136,7 +136,7 @@ class DocumentProcessor:
# Detect content type if needed
if mimeType == "application/octet-stream":
mimeType = self._serviceContainer.detectContentTypeFromData(fileData, filename)
mimeType = self._serviceCenter.detectContentTypeFromData(fileData, filename)
# Process document based on type
if mimeType not in self.supportedTypes:
@ -527,7 +527,7 @@ class DocumentProcessor:
# chunk is already base64 encoded string from _processImage
# Use the original prompt directly for images (no content embedding)
logger.debug(f"Calling image AI service for MIME type: {mimeType}")
processedContent = await self._serviceContainer.callAiImageBasic(prompt, chunk, mimeType)
processedContent = await self._serviceCenter.callAiImageBasic(prompt, chunk, mimeType)
else:
# For text content, use text AI service
# Neutralize content if neutralizer is enabled (only for text)
@ -548,7 +548,7 @@ class DocumentProcessor:
"""
logger.debug(f"Calling text AI service for MIME type: {mimeType}")
processedContent = await self._serviceContainer.callAiTextBasic(aiPrompt, contentToProcess)
processedContent = await self._serviceCenter.callAiTextBasic(aiPrompt, contentToProcess)
chunkResults.append(processedContent)
except Exception as aiError:

View file

@ -8,14 +8,14 @@ from modules.interfaces.interfaceAppModel import User, UserConnection
from modules.interfaces.interfaceChatModel import (
TaskStatus, ChatDocument, TaskItem, TaskAction, TaskResult,
ChatStat, ChatLog, ChatMessage, ChatWorkflow, DocumentExchange
ChatStat, ChatLog, ChatMessage, ChatWorkflow, DocumentExchange, ExtractedContent
)
from modules.interfaces.interfaceAiCalls import AiCalls
from modules.interfaces.interfaceChatObjects import getInterface as getChatObjects
from modules.interfaces.interfaceChatModel import ActionResult
from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
from modules.workflow.managerDocument import DocumentManager
from modules.workflow.processorDocument import DocumentProcessor
from modules.workflow.methodBase import MethodBase
import uuid
import base64
@ -23,8 +23,8 @@ import hashlib
logger = logging.getLogger(__name__)
class ServiceContainer:
"""Service container that provides access to all services and their functions"""
class ServiceCenter:
"""Service center that provides access to all services and their functions"""
def __init__(self, currentUser: User, workflow: ChatWorkflow):
# Core services
@ -39,7 +39,7 @@ class ServiceContainer:
self.interfaceComponent = getComponentObjects(currentUser)
self.interfaceApp = getAppObjects(currentUser)
self.interfaceAiCalls = AiCalls()
self.documentManager = DocumentManager(self)
self.documentProcessor = DocumentProcessor(self)
# Initialize methods catalog
self.methods = {}
@ -115,7 +115,7 @@ class ServiceContainer:
def detectContentTypeFromData(self, fileData: bytes, filename: str) -> str:
"""
Detect content type from file data and filename.
This method makes the MIME type detection function accessible through the service container.
This method makes the MIME type detection function accessible through the service center.
Args:
fileData: Raw file data as bytes
@ -263,17 +263,11 @@ class ServiceContainer:
# ===== Functions =====
def extractContent(self, prompt: str, document: ChatDocument) -> str:
def extractContent(self, prompt: str, document: ChatDocument) -> ExtractedContent:
"""Extract content from document using prompt"""
return self.documentManager.extractContentFromDocument(prompt, document)
return self.extractContentFromDocument(prompt, document)
async def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, documentId: str = None) -> str:
"""Extract content from file data directly using prompt"""
extracted_content = await self.documentManager.extractContentFromFileData(prompt, fileData, filename, mimeType, base64Encoded, documentId)
# Convert ExtractedContent to string for backward compatibility
if hasattr(extracted_content, 'contents'):
return "\n".join([item.data for item in extracted_content.contents])
return str(extracted_content)
def getMethodsCatalog(self) -> Dict[str, Any]:
"""Get catalog of available methods and their actions"""
@ -502,7 +496,7 @@ Instructions:
Please provide a comprehensive summary of this conversation."""
# Get summary using AI
return await self.interfaceAiCalls.callAiTextBasic(prompt)
return await self.callAiTextBasic(prompt)
except Exception as e:
logger.error(f"Error summarizing chat: {str(e)}")
@ -535,27 +529,81 @@ Instructions:
Please provide a clear summary of this message."""
# Get summary using AI
return await self.interfaceAiCalls.callAiTextBasic(prompt)
return await self.callAiTextBasic(prompt)
except Exception as e:
logger.error(f"Error summarizing message: {str(e)}")
return f"Error summarizing message: {str(e)}"
def callAiTextBasic(self, prompt: str, context: str = None) -> str:
async def callAiTextBasic(self, prompt: str, context: str = None) -> str:
"""Basic text processing using OpenAI"""
return self.interfaceAiCalls.callAiTextBasic(prompt, context)
# Calculate prompt size for stats
prompt_size = self.calculateObjectSize(prompt)
if context:
prompt_size += self.calculateObjectSize(context)
def callAiTextAdvanced(self, prompt: str, context: str = None) -> str:
# Call AI
response = await self.interfaceAiCalls.callAiTextBasic(prompt, context)
# Calculate response size for stats
response_size = self.calculateObjectSize(response)
# Update stats
self.updateWorkflowStats(eventLabel="aicall.openai.text", bytesSent=prompt_size, bytesReceived=response_size)
return response
async def callAiTextAdvanced(self, prompt: str, context: str = None) -> str:
"""Advanced text processing using Anthropic"""
return self.interfaceAiCalls.callAiTextAdvanced(prompt, context)
# Calculate prompt size for stats
prompt_size = self.calculateObjectSize(prompt)
if context:
prompt_size += self.calculateObjectSize(context)
def callAiImageBasic(self, prompt: str, imageData: str, mimeType: str) -> str:
# Call AI
response = await self.interfaceAiCalls.callAiTextAdvanced(prompt, context)
# Calculate response size for stats
response_size = self.calculateObjectSize(response)
# Update stats
self.updateWorkflowStats(eventLabel="aicall.anthropic.text", bytesSent=prompt_size, bytesReceived=response_size)
return response
async def callAiImageBasic(self, prompt: str, imageData: str, mimeType: str) -> str:
"""Basic image processing using OpenAI"""
return self.interfaceAiCalls.callAiImageBasic(prompt, imageData, mimeType)
# Calculate prompt size for stats
prompt_size = self.calculateObjectSize(prompt)
prompt_size += self.calculateObjectSize(imageData)
def callAiImageAdvanced(self, prompt: str, imageData: str, mimeType: str) -> str:
# Call AI
response = await self.interfaceAiCalls.callAiImageBasic(prompt, imageData, mimeType)
# Calculate response size for stats
response_size = self.calculateObjectSize(response)
# Update stats
self.updateWorkflowStats(eventLabel="aicall.openai.image", bytesSent=prompt_size, bytesReceived=response_size)
return response
async def callAiImageAdvanced(self, prompt: str, imageData: str, mimeType: str) -> str:
"""Advanced image processing using Anthropic"""
return self.interfaceAiCalls.callAiImageAdvanced(prompt, imageData, mimeType)
# Calculate prompt size for stats
prompt_size = self.calculateObjectSize(prompt)
prompt_size += self.calculateObjectSize(imageData)
# Call AI
response = await self.interfaceAiCalls.callAiImageAdvanced(prompt, imageData, mimeType)
# Calculate response size for stats
response_size = self.calculateObjectSize(response)
# Update stats
self.updateWorkflowStats(eventLabel="aicall.anthropic.image", bytesSent=prompt_size, bytesReceived=response_size)
return response
def getFileInfo(self, fileId: str) -> Dict[str, Any]:
"""Get file information"""
@ -575,6 +623,59 @@ Please provide a clear summary of this message."""
"""Get file data by ID"""
return self.interfaceComponent.getFileData(fileId)
async def extractContentFromDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
"""Extract content from ChatDocument using prompt"""
try:
# Extract file data from ChatDocument
if document.data:
fileData = document.data.encode('utf-8') if isinstance(document.data, str) else document.data
else:
# Try to get file data from service center if document has fileId
if hasattr(document, 'fileId') and document.fileId:
fileData = self.getFileData(document.fileId)
else:
logger.error(f"No file data available in document: {document}")
raise ValueError("No file data available in document")
# Get filename and mime type from document
filename = document.filename if hasattr(document, 'filename') else "document"
mimeType = document.mimeType if hasattr(document, 'mimeType') else "application/octet-stream"
# Process with document processor directly
extractedContent = await self.documentProcessor.processFileData(
fileData=fileData,
filename=filename,
mimeType=mimeType,
base64Encoded=False,
prompt=prompt,
documentId=document.id
)
# Update objectId to match document ID
extractedContent.objectId = document.id
extractedContent.objectType = "ChatDocument"
return extractedContent
except Exception as e:
logger.error(f"Error extracting from document: {str(e)}")
raise
async def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, documentId: str = None) -> ExtractedContent:
"""Extract content from file data directly using prompt"""
try:
return await self.documentProcessor.processFileData(
prompt=prompt,
fileData=fileData,
filename=filename,
mimeType=mimeType,
base64Encoded=base64Encoded,
documentId=documentId
)
except Exception as e:
logger.error(f"Error extracting from file data: {str(e)}")
raise
def createFile(self, fileName: str, mimeType: str, content: str, base64encoded: bool = False) -> str:
"""Create new file and return its ID"""
# Convert content to bytes based on base64 flag
@ -613,29 +714,85 @@ Please provide a clear summary of this message."""
mimeType=mimeType
)
def extractTextFromContentObjects(self, content_objects: List[Any]) -> List[str]:
def updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None:
"""
Extract text content from ExtractedContent objects or other content objects.
Centralized function to update workflow statistics in database and running workflow.
Args:
content_objects: List of ExtractedContent objects or other content objects
eventLabel: Label for the event (e.g., "userinput", "taskplan", "action", "aicall<ainame>")
bytesSent: Bytes sent (incremental)
bytesReceived: Bytes received (incremental)
tokenCount: Token count (incremental, default 0)
"""
try:
if hasattr(self, 'workflow') and self.workflow:
# Update the running workflow stats
self.interfaceChat.updateWorkflowStats(
self.workflow.id,
bytesSent=bytesSent,
bytesReceived=bytesReceived
)
# Log the stats event
logger.debug(f"Workflow stats updated - Event: {eventLabel}, Sent: {bytesSent}, Received: {bytesReceived}, Tokens: {tokenCount}")
except Exception as e:
logger.error(f"Error updating workflow stats: {str(e)}")
def calculateObjectSize(self, obj: Any) -> int:
"""
Calculate the size of an object in bytes.
Args:
obj: Object to calculate size for
Returns:
List of extracted text strings
int: Size in bytes
"""
text_contents = []
for content_obj in content_objects:
if hasattr(content_obj, 'contents') and content_obj.contents:
# Extract text from ContentItem objects
for content_item in content_obj.contents:
if hasattr(content_item, 'data') and content_item.data:
text_contents.append(content_item.data)
elif isinstance(content_obj, str):
text_contents.append(content_obj)
else:
# Fallback: convert to string representation
text_contents.append(str(content_obj))
return text_contents
try:
import json
import sys
if obj is None:
return 0
# Convert object to JSON string and calculate size
json_str = json.dumps(obj, ensure_ascii=False, default=str)
return len(json_str.encode('utf-8'))
except Exception as e:
logger.error(f"Error calculating object size: {str(e)}")
return 0
def calculateUserInputSize(self, userInput: Any) -> int:
"""
Calculate size of user input including file sizes.
Args:
userInput: User input object
Returns:
int: Total size in bytes
"""
try:
total_size = 0
# Calculate base user input size
if hasattr(userInput, 'prompt'):
total_size += self.calculateObjectSize(userInput.prompt)
# Add file sizes if present
if hasattr(userInput, 'listFileId') and userInput.listFileId:
for fileId in userInput.listFileId:
file_info = self.getFileInfo(fileId)
if file_info:
total_size += file_info.get('size', 0)
return total_size
except Exception as e:
logger.error(f"Error calculating user input size: {str(e)}")
return 0
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
"""Execute a method action"""
@ -659,9 +816,9 @@ Please provide a clear summary of this message."""
# Create singleton instance
serviceObject = None
def initializeServiceContainer(currentUser: User, workflow: ChatWorkflow) -> ServiceContainer:
"""Initialize the service container singleton"""
def initializeServiceCenter(currentUser: User, workflow: ChatWorkflow) -> ServiceCenter:
"""Initialize the service center singleton"""
global serviceObject
if serviceObject is None:
serviceObject = ServiceContainer(currentUser, workflow)
serviceObject = ServiceCenter(currentUser, workflow)
return serviceObject

View file

@ -1,226 +0,0 @@
# Workflow Architecture Documentation
## Overview
The workflow system has been refactored into a clear, structured approach with 5 distinct phases. This eliminates redundancies and provides better error handling, quality assessment, and maintainability.
## Architecture Principles
### 1. **Clear Phase Separation**
Each workflow phase has a specific responsibility and clear inputs/outputs.
### 2. **Unified Data Model**
Standardized on `TaskAction` objects throughout the system.
### 3. **Consistent Prompt Generation**
All AI interactions use dedicated prompt generation functions.
### 4. **Quality Assessment**
Each task is reviewed before proceeding to the next.
## Workflow Phases
### **Phase 1: High-Level Task Planning**
**Function:** `planHighLevelTasks()`
**Purpose:** Analyze user request and create a structured task plan
**Input:** User input, available documents
**Output:** Task plan with multiple task steps
**Prompt Function:** `_createTaskPlanningPrompt()`
```python
task_plan = await chatManager.planHighLevelTasks(userInput, workflow)
```
### **Phase 2: Task Definition and Action Generation**
**Function:** `defineTaskActions()`
**Purpose:** Define specific actions for each task step
**Input:** Task step, workflow context, previous results
**Output:** List of TaskAction objects
**Prompt Function:** `_createActionDefinitionPrompt()`
```python
task_actions = await chatManager.defineTaskActions(task_step, workflow, previous_results)
```
### **Phase 3: Action Execution**
**Function:** `executeTaskActions()`
**Purpose:** Execute all actions for a task step
**Input:** List of TaskAction objects
**Output:** List of action results
**Prompt Function:** `_createActionExecutionPrompt()`
```python
action_results = await chatManager.executeTaskActions(task_actions, workflow)
```
### **Phase 4: Task Review and Quality Assessment**
**Function:** `reviewTaskCompletion()`
**Purpose:** Review task completion and decide next steps
**Input:** Task step, actions, results
**Output:** Review result with quality metrics
**Prompt Function:** `_createResultReviewPrompt()`
```python
review_result = await chatManager.reviewTaskCompletion(task_step, task_actions, action_results, workflow)
```
### **Phase 5: Task Handover and State Management**
**Function:** `prepareTaskHandover()`
**Purpose:** Prepare results for next task or workflow completion
**Input:** Task step, actions, review result
**Output:** Handover data for next iteration
**Prompt Function:** None (data processing only)
```python
handover_data = await chatManager.prepareTaskHandover(task_step, task_actions, review_result, workflow)
```
## Unified Workflow Execution
### **Main Entry Point**
**Function:** `executeUnifiedWorkflow()`
**Purpose:** Orchestrate all phases in sequence
**Input:** User input, workflow
**Output:** Complete workflow results
```python
workflow_result = await chatManager.executeUnifiedWorkflow(userInput.prompt, workflow)
```
### **Workflow Flow**
```
1. planHighLevelTasks() → Task Plan
2. For each task step:
├── defineTaskActions() → Task Actions
├── executeTaskActions() → Action Results
├── reviewTaskCompletion() → Review Result
└── prepareTaskHandover() → Handover Data
3. Return workflow summary
```
## Prompt Generation Functions
| **Function** | **Used In** | **Purpose** |
|-------------|-------------|-------------|
| `_createTaskPlanningPrompt()` | `planHighLevelTasks()` | Generate high-level task plan |
| `_createActionDefinitionPrompt()` | `defineTaskActions()` | Generate specific actions for task |
| `_createActionExecutionPrompt()` | `executeTaskActions()` | Execute individual actions |
| `_createResultReviewPrompt()` | `reviewTaskCompletion()` | Review task completion |
## Data Models
### **TaskAction Object**
```python
class TaskAction:
id: str
execMethod: str
execAction: str
execParameters: Dict[str, Any]
execResultLabel: Optional[str]
status: TaskStatus
error: Optional[str]
result: Optional[str]
# ... other fields
```
### **Workflow Result Structure**
```python
{
'status': 'completed' | 'partial' | 'failed',
'successful_tasks': int,
'total_tasks': int,
'workflow_results': List[Dict],
'final_results': List[str]
}
```
## Error Handling
### **Phase-Level Error Handling**
Each phase has its own error handling:
- **Planning:** Fallback to basic task plan
- **Definition:** Skip task if no actions defined
- **Execution:** Stop on first action failure
- **Review:** Default to success to avoid blocking
- **Handover:** Provide empty results on error
### **Circuit Breaker Pattern**
AI calls use circuit breaker pattern to prevent cascading failures.
## Quality Metrics
### **Task Quality Assessment**
- Success rate of actions
- Completion of expected outputs
- Meeting of success criteria
- Confidence scores
### **Workflow Quality Metrics**
- Overall success rate
- Task completion percentage
- Error patterns and suggestions
## Benefits of Refactored Architecture
### **1. Clear Separation of Concerns**
Each phase has a single responsibility and clear interfaces.
### **2. Better Error Handling**
Granular error handling at each phase with appropriate fallbacks.
### **3. Quality Assessment**
Built-in review and quality metrics for each task.
### **4. Maintainability**
Consistent patterns and unified data models.
### **5. Extensibility**
Easy to add new phases or modify existing ones.
### **6. Debugging**
Clear logging and error reporting at each phase.
## Migration Path
### **Legacy Methods**
All legacy methods are preserved for backward compatibility:
- `createInitialTask()`
- `createNextTask()`
- `executeTask()`
- `executeAction()`
### **New Unified Approach**
Use `executeUnifiedWorkflow()` for new implementations.
## Usage Example
```python
# Initialize chat manager
await chatManager.initialize(workflow)
# Execute unified workflow
workflow_result = await chatManager.executeUnifiedWorkflow(userInput.prompt, workflow)
# Process results
if workflow_result['status'] == 'completed':
print(f"Workflow completed: {workflow_result['successful_tasks']}/{workflow_result['total_tasks']} tasks")
else:
print(f"Workflow failed: {workflow_result['error']}")
```
## Future Enhancements
### **1. Retry Logic**
Add exponential backoff retry for failed tasks.
### **2. Alternative Approaches**
When primary method fails, try alternative approaches.
### **3. Parallel Execution**
Execute independent tasks in parallel.
### **4. Progress Tracking**
Real-time progress updates during workflow execution.
### **5. Rollback Mechanisms**
Undo failed operations and restore previous state.

View file

@ -993,95 +993,6 @@ Ich möchte den agentenchat workflow ändern. kannst du mir bitte dazu in einem
3. Neue Objektstruktur für den workflow ablauf:
workflow =
{
// Core workflow properties
"id": "workflow_uuid",
"name": "Analysis Workflow",
"mandate_id": 123,
"user_id": 456,
"status": "running", // "running", "failed", "stopped"
"started_at": "2025-03-29T14:15:00.000Z",
"last_activity": "2025-03-29T14:45:00.000Z",
"current_round": 1,
"waiting_for_user": false,
// Performance statistics (sum)
"data_stats": {
"total_processing_time": 3.9,
"total_token_count": 857,
"total_bytes_semt": 1026323,
"total_bytes_received": 4200,
}
// Messages array - main conversation history with structured message objects
"messages": [],
// Logs
"logs": [
{
"id": "log_uuid1",
"message": "Workflow started",
"type": "info",
"timestamp": "2025-03-29T14:15:00.000Z"
}
]
}
"messages": [
{
// Core message properties
"id": "msg_uuid", // Unique identifier for each message
"workflow_id": "workflow_uuid", // Reference to the parent workflow
"parent_message_id": "msg_previous_uuid", // Reference to message being responded to
"started_at": "2025-03-29T14:30:00.000Z", // Single timestamp for message creation
"finished_at": "2025-03-29T14:30:00.000Z", // Single timestamp for message closing, when next message is created
"sequence_no": 1, // Optional, but useful for ordering within workflow
// Status information
"status": "completed", // message status: "pending", "processing", "completed", "failed"
// Role instead of agent information
"role": "system", // "system", "user", "assistant" - who created this message
// Metadata for statistics and accounting
"data_stats": {
"processing_time": 2.5, // Time taken to generate in seconds
"token_count": 1205, // Token count (for AI models)
"bytes_sent": 4096, // Data sent to generate this message
"bytes_received": 8192, // Data received
}
// Documents section - includes prompt and all referenced files
"documents": [
{
// Document metadata
"id": "doc_uuid",
"source": {
"type": "prompt", // "prompt", "file", "clipboard"
"path": "/full/path/to/file.txt", // Storage path (for files)
"name": "display_filename.txt",
"size": 1024, // Size in bytes
"lines": 42, // Line count (for text files)
"content_type": "text/plain", // MIME type
"upload_date": "2025-03-29T14:30:00.000Z"
},
// Document contents (can have multiple parts)
"contents": [
{
"label": "Main Content", // Optional label
"type": "text", // "text", "image", "chart", etc.
"text": "The actual text content",
"is_extracted": true // Flag if this is extracted from original file
}
]
}
],
}
]
4. Die Schritte in einem Workflow (neu) - bitte den code revidieren und alle unnötigen teile entfernen.

View file

@ -1,187 +0,0 @@
# Document Management Refactoring Specification
## Overview
This specification outlines the refactoring of document management in the system, focusing on proper model separation, centralized content extraction, and future-proof neutralization integration.
## Model Structure
### Base Document Models
```python
class ContentMetadata(BaseModel, ModelMixin):
"""Metadata for content items"""
size: int = Field(description="Content size in bytes")
pages: Optional[int] = Field(None, description="Number of pages for multi-page content")
error: Optional[str] = Field(None, description="Processing error if any")
# Media-specific attributes
width: Optional[int] = Field(None, description="Width in pixels for images/videos")
height: Optional[int] = Field(None, description="Height in pixels for images/videos")
colorMode: Optional[str] = Field(None, description="Color mode (e.g., RGB, CMYK, grayscale)")
fps: Optional[float] = Field(None, description="Frames per second for videos")
durationSec: Optional[float] = Field(None, description="Duration in seconds for videos/audio")
class ContentItem(BaseModel, ModelMixin):
"""Individual content item from a document"""
label: str = Field(description="Content label (e.g., tab name, tag name)")
data: str = Field(description="Text content")
metadata: ContentMetadata = Field(description="Content metadata")
class ChatDocument(BaseModel, ModelMixin):
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
fileId: str
filename: str
fileSize: int
mimeType: str
class TaskDocument(BaseModel, ModelMixin):
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
filename: str
fileSize: int
mimeType: str
data: str # Base64 encoded file data
class ExtractedContent(BaseModel, ModelMixin):
objectId: str # Reference to source document
objectType: str = Field(description="Type of source object ('ChatDocument' or 'TaskDocument')")
contents: List[ContentItem]
```
## Service Layer Structure
### Document Service
```python
class DocumentService:
def __init__(self, service_container):
self.service = service_container
self.neutralizer_enabled = False # Flag for neutralization feature
async def extractFromChatDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
"""
Extract content from a ChatDocument by converting it to TaskDocument first.
"""
# Convert ChatDocument to TaskDocument
task_doc = await self._convertToTaskDocument(document)
return await self.getDocumentContent(task_doc, prompt)
async def extractFromTaskDocument(self, prompt: str, document: TaskDocument) -> ExtractedContent:
"""
Extract content directly from a TaskDocument.
"""
return await self.getDocumentContent(document, prompt)
async def getDocumentContent(self, document: TaskDocument, prompt: str) -> ExtractedContent:
"""
Helper function for centralized content extraction.
Handles the actual content extraction and optional neutralization.
"""
# Extract content based on mimeType
content = await self._extractRawContent(document)
# Apply neutralization if enabled
if self.neutralizer_enabled:
from modules.neutralizer import neutralizer
content = await neutralizer.process_content(content)
# Process content with AI using prompt
processed_content = await self._processWithAI(content, prompt)
return ExtractedContent(
objectId=document.id,
objectType="TaskDocument",
contents=processed_content
)
```
## Implementation Steps
1. **Model Cleanup**
- Create new model classes in `interfaceChatModel.py`
- Remove deprecated models:
- DocumentExtraction
- DocumentContext
- ProcessedDocument
- ChatContent (replaced by ContentItem)
- Update ChatDocument to remove contents attribute
- Convert all snake_case to camelCase in manager*.py and method*.py
2. **Service Implementation**
- Create new `DocumentService` class in `serviceDocument.py`
- Implement the three main methods:
- extractFromChatDocument
- extractFromTaskDocument
- getDocumentContent (helper function)
- Add neutralization integration with feature flag
3. **UserInput Processing**
- Update `UserInputRequest` processing to use `ChatMessage`
- Implement `processFileIds` in `interfaceChatObjects`
- Update all references to use new model structure
4. **Method Module Updates**
- Update all method*.py modules to use new service layer
- Remove direct file access
- Implement proper error handling and logging
5. **Testing and Validation**
- Create unit tests for new models and services
- Test document processing with various file types
- Validate content extraction and neutralization
- Test error handling and edge cases
## Files to be Removed/Modified
### To be Removed
1. `DocumentExtraction` class from interfaceChatModel.py
2. `DocumentContext` class from interfaceChatModel.py
3. `ProcessedDocument` class from interfaceChatModel.py
4. `ChatContent` class from interfaceChatModel.py
5. Direct file access methods from method*.py modules
### To be Modified
1. `interfaceChatModel.py`
- Add new model classes
- Remove deprecated classes
- Update existing classes
2. `managerDocument.py`
- Move core functionality to DocumentService
- Update to use new model structure
- Remove redundant methods
3. `method*.py` modules
- Update to use DocumentService
- Remove direct file access
- Update error handling
4. `interfaceChatObjects.py`
- Implement processFileIds
- Update document handling
## Neutralization Integration
The neutralization feature is integrated into the `getDocumentContent` method with a feature flag. When enabled, it will process content through the neutralizer before sending it to AI processing.
```python
# In getDocumentContent method
if self.neutralizer_enabled:
from modules.neutralizer import neutralizer
content = await neutralizer.process_content(content)
```
This allows for easy enabling/disabling of the feature and future expansion of neutralization capabilities.
## Migration Strategy
1. Create new models and services
2. Implement new functionality alongside existing code
3. Gradually migrate method modules to use new services
4. Remove deprecated code once migration is complete
5. Enable neutralization feature when ready
## Testing Requirements
1. Unit tests for all new model classes
2. Integration tests for DocumentService
3. Tests for content extraction with various file types
4. Tests for neutralization integration
5. Performance tests for large file handling
6. Error handling and edge case tests

View file

@ -5,10 +5,10 @@
### 1.1 Core Components
- **WorkflowManager**: Orchestrates the overall workflow process
- **ChatManager**: Manages chat interactions and task execution
- **ServiceContainer**: Central state and context management
- **ServiceCenter**: Central state and context management
- **AgentTask**: Core data object for task execution
### 1.2 Service Container Structure
### 1.2 Service center Structure
```python
from enum import Enum
from typing import Dict, List, Optional, Any, Literal
@ -161,8 +161,8 @@ class AgentTask(BaseModel):
"""Check if any action has failed"""
return any(a.status == ActionStatus.FAILED for a in self.actionList)
class ServiceContainer:
"""Service container with improved state management"""
class ServiceCenter:
"""Service center with improved state management"""
def __init__(self):
self.state = {
@ -481,7 +481,7 @@ class AgentTask:
graph TD
A[User Input] --> B[WorkflowManager.workflowProcess]
B --> C[ChatManager.initialize]
C --> D[Create ServiceContainer]
C --> D[Create ServiceCenter]
D --> E[Create Initial Task]
```
@ -491,7 +491,7 @@ graph TD
- Starts task processing loop
2. **ChatManager.initialize**
- Creates ServiceContainer with all required components
- Creates ServiceCenter with all required components
- Initializes service interfaces
- Sets up task and state management
@ -675,7 +675,7 @@ graph TD
### 3.1 Method Registration
```python
def _registerMethods(self):
"""Register available methods in service container"""
"""Register available methods in service center"""
self.service.methods = {
"sharepoint": MethodSharepoint(self.service),
"outlook": MethodOutlook(self.service),
@ -862,7 +862,6 @@ gateway/
│ │ ├── managerChat.py # Chat management and AI response validation
│ │ ├── managerPrompt.py # AI prompt generation and management
│ │ ├── methodBase.py # Base method class with result validation
│ │ ├── managerDocument.py # Document operations management
│ │ └── processorDocument.py # Document content extraction
│ │
│ ├── agents/ # To be refactored into methods
@ -917,7 +916,7 @@ gateway/
#### Phase 3: Manager Updates
1. **Chat Manager Enhancement**
- Integrate AI response validation
- Update service container structure
- Update service center structure
- Improve error handling
2. **Document Manager Integration**

View file

@ -1,31 +0,0 @@
# PowerShell script to run document extraction test
# Usage: .\run_document_test.ps1 [file_path]
param(
[string]$FilePath = "test_sample_document.txt"
)
Write-Host "=== PowerOn Document Extraction Test ===" -ForegroundColor Green
Write-Host ""
# Check if file exists
if (-not (Test-Path $FilePath)) {
Write-Host "Error: File not found: $FilePath" -ForegroundColor Red
Write-Host "Please provide a valid file path as parameter or ensure test_sample_document.txt exists." -ForegroundColor Yellow
exit 1
}
Write-Host "Testing document extraction for file: $FilePath" -ForegroundColor Cyan
Write-Host "Log file will be: test_document_extraction.log" -ForegroundColor Cyan
Write-Host ""
# Run the Python test
try {
python test_document_extraction.py $FilePath
Write-Host ""
Write-Host "Test completed successfully!" -ForegroundColor Green
Write-Host "Check test_document_extraction.log for detailed results." -ForegroundColor Cyan
} catch {
Write-Host "Test failed with error: $($_.Exception.Message)" -ForegroundColor Red
exit 1
}

View file

@ -1,15 +0,0 @@
# Test configuration for workflow testing
DB_APP_HOST=_test_data_app
DB_APP_DATABASE=app
DB_APP_USER=test
DB_APP_PASSWORD_SECRET=test123
DB_CHAT_HOST=_test_data_chat
DB_CHAT_DATABASE=chat
DB_CHAT_USER=test
DB_CHAT_PASSWORD_SECRET=test123
# AI Configuration
AI_PROVIDER=openai
AI_MODEL=gpt-3.5-turbo
AI_API_KEY_SECRET=test_key

View file

@ -1,288 +0,0 @@
#!/usr/bin/env python3
"""
Test procedure for DocumentManager document extraction functionality.
"""
import asyncio
import sys
import os
import json
import argparse
from datetime import datetime, UTC
from pathlib import Path
import logging
print("Starting test_document_extraction.py...")
# Configure logging FIRST, before any other imports
import logging
# Clear any existing handlers to avoid duplicate logs
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler('test_document_extraction.log', mode='w', encoding='utf-8') # 'w' mode clears the file
],
force=True # Force reconfiguration even if already configured
)
# Filter out httpcore messages
logging.getLogger('httpcore').setLevel(logging.WARNING)
logging.getLogger('httpx').setLevel(logging.WARNING)
logger = logging.getLogger(__name__)
# Set up test configuration
os.environ['POWERON_CONFIG_FILE'] = 'test_config.ini'
print("Set POWERON_CONFIG_FILE environment variable")
try:
# Import required modules
from modules.interfaces.interfaceAppObjects import User, UserConnection
from modules.interfaces.interfaceChatModel import ChatWorkflow
from modules.workflow.managerDocument import DocumentManager
from modules.workflow.serviceContainer import ServiceContainer
print("All imports successful")
except Exception as e:
print(f"Import error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
def log_extraction_debug(message: str, data: dict = None):
"""Log extraction debug data with JSON dumps"""
timestamp = datetime.now(UTC).isoformat()
if data:
logger.debug(f"[{timestamp}] {message}\n{json.dumps(data, indent=2, ensure_ascii=False)}")
else:
logger.debug(f"[{timestamp}] {message}")
def create_test_user() -> User:
"""Create a test user for the document extraction"""
return User(
id="test-user-doc-001",
mandateId="test-mandate-doc-001",
username="testuser_doc",
email="test_doc@example.com",
fullName="Test Document User",
enabled=True,
language="en",
privilege="user",
authenticationAuthority="local"
)
def create_test_workflow() -> ChatWorkflow:
"""Create a test workflow for document extraction"""
return ChatWorkflow(
id="test-workflow-doc-001",
mandateId="test-mandate-doc-001",
status="running",
name="Document Extraction Test Workflow",
currentRound=1,
lastActivity=datetime.now(UTC).isoformat(),
startedAt=datetime.now(UTC).isoformat(),
logs=[],
messages=[],
stats=None,
tasks=[]
)
def detect_mime_type(file_path: str) -> str:
"""Detect MIME type based on file extension"""
ext = Path(file_path).suffix.lower()
mime_types = {
'.txt': 'text/plain',
'.md': 'text/markdown',
'.csv': 'text/csv',
'.json': 'application/json',
'.xml': 'application/xml',
'.js': 'application/javascript',
'.py': 'application/x-python',
'.svg': 'image/svg+xml',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.pdf': 'application/pdf',
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.doc': 'application/msword',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'.xls': 'application/vnd.ms-excel',
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'.ppt': 'application/vnd.ms-powerpoint',
'.html': 'text/html',
'.htm': 'text/html'
}
return mime_types.get(ext, 'application/octet-stream')
async def test_document_extraction(file_path: str):
"""Test document extraction from a file path"""
try:
# Clear the log file before each run
log_file_path = "test_document_extraction.log"
if os.path.exists(log_file_path):
with open(log_file_path, 'w') as f:
f.write("") # Clear the file
logger.info(f"Cleared log file: {log_file_path}")
logger.info("=== STARTING DOCUMENT EXTRACTION TEST ===")
# Validate file path
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
# Get file info
file_path_obj = Path(file_path)
filename = file_path_obj.name
mime_type = detect_mime_type(file_path)
file_size = file_path_obj.stat().st_size
log_extraction_debug("File information", {
"file_path": file_path,
"filename": filename,
"mime_type": mime_type,
"file_size_bytes": file_size,
"file_size_mb": round(file_size / (1024 * 1024), 2)
})
# Read file data
try:
with open(file_path, 'rb') as f:
file_data = f.read()
log_extraction_debug("File read successfully", {
"bytes_read": len(file_data),
"file_encoding": "binary"
})
except Exception as e:
logger.error(f"Error reading file: {str(e)}")
raise
# Create test user and workflow
test_user = create_test_user()
test_workflow = create_test_workflow()
# Create service container
service_container = ServiceContainer(test_user, test_workflow)
log_extraction_debug("Service container created", {
"user_id": test_user.id,
"workflow_id": test_workflow.id
})
# Create document manager
document_manager = DocumentManager(service_container)
log_extraction_debug("Document manager created")
# Define extraction prompt
extraction_prompt = "extract the table and convert it to a csv table"
log_extraction_debug("Starting document extraction", {
"prompt": extraction_prompt,
"filename": filename,
"mime_type": mime_type
})
# Extract content from file data
try:
extracted_content = await document_manager.extractContentFromFileData(
prompt=extraction_prompt,
fileData=file_data,
filename=filename,
mimeType=mime_type,
base64Encoded=False,
documentId=f"test-doc-{datetime.now(UTC).timestamp()}"
)
# Log extraction results
extraction_result = {
"extracted_content_id": extracted_content.id,
"content_items_count": len(extracted_content.contents)
}
# Add objectId and objectType if they exist (set by DocumentManager)
if hasattr(extracted_content, 'objectId'):
extraction_result["object_id"] = extracted_content.objectId
if hasattr(extracted_content, 'objectType'):
extraction_result["object_type"] = extracted_content.objectType
log_extraction_debug("Document extraction completed successfully", extraction_result)
# Log detailed content information
for i, content_item in enumerate(extracted_content.contents):
content_info = {
"label": content_item.label,
"data_length": len(content_item.data) if content_item.data else 0,
"data_preview": content_item.data[:500] + "..." if content_item.data and len(content_item.data) > 500 else content_item.data
}
# Add metadata if available
if content_item.metadata:
content_info["metadata"] = {
"size": content_item.metadata.size,
"mime_type": content_item.metadata.mimeType,
"base64_encoded": content_item.metadata.base64Encoded,
"pages": content_item.metadata.pages
}
log_extraction_debug(f"CONTENT ITEM {i+1}:", content_info)
# Log summary of all extracted content
all_content = "\n\n".join([item.data for item in extracted_content.contents if item.data])
log_extraction_debug("COMPLETE EXTRACTED CONTENT:", {
"total_length": len(all_content),
"content": all_content
})
return extracted_content
except Exception as e:
log_extraction_debug("DOCUMENT EXTRACTION EXCEPTION:", {
"error_type": type(e).__name__,
"error_message": str(e),
"error_args": e.args if hasattr(e, 'args') else None
})
raise
logger.info("=== DOCUMENT EXTRACTION TEST COMPLETED ===")
return extracted_content
except Exception as e:
logger.error(f"❌ Document extraction test failed with error: {str(e)}")
log_extraction_debug("Full error details", {
"error_type": type(e).__name__,
"error_message": str(e)
})
raise
async def main():
"""Main function to run the document extraction test"""
print("Inside main()")
logger.info("=" * 50)
logger.info("DOCUMENT EXTRACTION TEST")
logger.info("=" * 50)
# Parse command line arguments
parser = argparse.ArgumentParser(description='Test document extraction functionality')
parser.add_argument('file_path', help='Path to the file to extract content from')
args = parser.parse_args()
try:
extracted_content = await test_document_extraction(args.file_path)
logger.info("=" * 50)
logger.info("TEST COMPLETED SUCCESSFULLY")
logger.info("=" * 50)
return extracted_content
except Exception as e:
logger.error("=" * 50)
logger.error("TEST FAILED")
logger.error("=" * 50)
raise
if __name__ == "__main__":
print("About to run main()")
asyncio.run(main())
print("main() finished")

View file

@ -1,27 +0,0 @@
#!/usr/bin/env python3
from modules.workflow.methodBase import MethodBase
class TestMethod(MethodBase):
pass
def test_parameter_extraction():
test = TestMethod(None)
test.name = 'test'
docstring = """Call AI service with document content
Parameters:
prompt (str): The prompt to send to the AI service
documents (List[Dict[str, Any]], optional): List of documents to include in context
Each document should have: documentReference (str), contentExtractionPrompt (str, optional)"""
print("Docstring:")
print(docstring)
print("\nExtracted descriptions:")
descriptions = test._extractParameterDescriptions(docstring)
for param, desc in descriptions.items():
print(f" {param}: {desc}")
if __name__ == "__main__":
test_parameter_extraction()

View file

@ -1,289 +0,0 @@
#!/usr/bin/env python3
"""
Test script for retry enhancement in managerChat.py
Tests that previous action results and review feedback are properly passed to retry prompts.
"""
import asyncio
import logging
import sys
import os
# Add the gateway directory to the Python path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'gateway'))
from modules.workflow.managerChat import ChatManager
from modules.interfaces.interfaceAppModel import User
from modules.interfaces.interfaceChatModel import ChatWorkflow, ChatMessage
from modules.interfaces.interfaceChatObjects import ChatObjects
# Configure logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
class MockChatObjects(ChatObjects):
"""Mock implementation of ChatObjects for testing"""
def createTaskAction(self, action_data):
"""Mock task action creation"""
class MockTaskAction:
def __init__(self, data):
self.id = "test_action_id"
self.execMethod = data.get("execMethod", "unknown")
self.execAction = data.get("execAction", "unknown")
self.execParameters = data.get("execParameters", {})
self.execResultLabel = data.get("execResultLabel", "")
self.status = data.get("status", "PENDING")
self.result = ""
self.error = ""
def setSuccess(self):
self.status = "COMPLETED"
def setError(self, error):
self.status = "FAILED"
self.error = error
def isSuccessful(self):
return self.status == "COMPLETED"
return MockTaskAction(action_data)
def createChatDocument(self, document_data):
"""Mock document creation"""
class MockChatDocument:
def __init__(self, data):
self.fileId = data.get("fileId", "")
self.filename = data.get("filename", "unknown")
self.fileSize = data.get("fileSize", 0)
self.mimeType = data.get("mimeType", "application/octet-stream")
self.content = ""
return MockChatDocument(document_data)
def createWorkflowMessage(self, message_data):
"""Mock message creation"""
class MockWorkflowMessage:
def __init__(self, data):
self.workflowId = data.get("workflowId", "")
self.role = data.get("role", "assistant")
self.message = data.get("message", "")
self.status = data.get("status", "step")
self.sequenceNr = data.get("sequenceNr", 1)
self.publishedAt = data.get("publishedAt", "")
self.actionId = data.get("actionId", "")
self.actionMethod = data.get("actionMethod", "")
self.actionName = data.get("actionName", "")
self.documentsLabel = data.get("documentsLabel", "")
self.documents = data.get("documents", [])
return MockWorkflowMessage(message_data)
class MockServiceContainer:
"""Mock service container for testing"""
def __init__(self, user, workflow):
self.user = user
self.workflow = workflow
def getMethodsList(self):
"""Mock methods list"""
return ["document.extract(documentList, aiPrompt)", "document.analyze(documentList, aiPrompt)"]
async def summarizeChat(self, messages):
"""Mock chat summarization"""
return "Mock chat history summary"
def getDocumentReferenceList(self):
"""Mock document references"""
return {
'chat': [],
'history': []
}
def getConnectionReferenceList(self):
"""Mock connection references"""
return ["connection1", "connection2"]
def getFileInfo(self, fileId):
"""Mock file info"""
return {
"filename": f"test_file_{fileId}.txt",
"size": 1024,
"mimeType": "text/plain"
}
def createFile(self, fileName, mimeType, content, base64encoded=False):
"""Mock file creation"""
return f"file_id_{fileName}"
def createDocument(self, fileName, mimeType, content, base64encoded=False):
"""Mock document creation"""
class MockDocument:
def __init__(self, name, mime, cont):
self.filename = name
self.mimeType = mime
self.content = cont
self.fileSize = len(cont)
return MockDocument(fileName, mimeType, content)
def getFileExtension(self, filename):
"""Mock file extension extraction"""
return filename.split('.')[-1] if '.' in filename else 'txt'
def getMimeTypeFromExtension(self, extension):
"""Mock MIME type detection"""
mime_types = {
'txt': 'text/plain',
'pdf': 'application/pdf',
'doc': 'application/msword',
'json': 'application/json'
}
return mime_types.get(extension, 'application/octet-stream')
def detectContentTypeFromData(self, file_bytes, filename):
"""Mock content type detection"""
if filename.endswith('.txt'):
return 'text/plain'
elif filename.endswith('.pdf'):
return 'application/pdf'
elif filename.endswith('.json'):
return 'application/json'
return 'application/octet-stream'
async def callAiTextBasic(self, prompt):
"""Mock AI call"""
return '{"actions": [{"method": "document", "action": "extract", "parameters": {"documentList": ["test"], "aiPrompt": "Test prompt"}, "resultLabel": "task1_action1_test", "description": "Test action"}]}'
async def callAiTextAdvanced(self, prompt):
"""Mock advanced AI call"""
return '{"overview": "Test plan", "tasks": [{"id": "task_1", "description": "Test task", "dependencies": [], "expected_outputs": ["output1"], "success_criteria": ["criteria1"], "required_documents": [], "estimated_complexity": "low", "ai_prompt": "Test prompt"}]}'
async def executeAction(self, methodName, actionName, parameters):
"""Mock action execution"""
class MockResult:
def __init__(self):
self.success = True
self.data = {
"result": "Mock execution result",
"documents": []
}
self.error = None
return MockResult()
async def test_retry_enhancement():
"""Test the retry enhancement functionality"""
logger.info("Testing retry enhancement in managerChat.py")
# Create mock objects
mock_user = User(id="test_user", username="testuser", email="test@example.com", mandateId="test_mandate")
mock_chat_objects = MockChatObjects()
mock_workflow = ChatWorkflow(
id="test_workflow",
userId="test_user",
status="active",
messages=[],
createdAt="2024-01-01T00:00:00Z",
updatedAt="2024-01-01T00:00:00Z",
mandateId="test_mandate",
currentRound=1,
lastActivity="2024-01-01T00:00:00Z",
startedAt="2024-01-01T00:00:00Z"
)
# Create chat manager
chat_manager = ChatManager(mock_user, mock_chat_objects)
# Mock the service container directly instead of initializing
chat_manager.service = MockServiceContainer(mock_user, mock_workflow)
chat_manager.workflow = mock_workflow
# Test 1: Basic action definition without retry
logger.info("Test 1: Basic action definition")
task_step = {
"id": "task_1",
"description": "Test task",
"expected_outputs": ["output1"],
"success_criteria": ["criteria1"],
"ai_prompt": "Test AI prompt"
}
actions = await chat_manager.defineTaskActions(task_step, mock_workflow, [])
logger.info(f"Generated {len(actions)} actions without retry context")
# Test 2: Action definition with retry context
logger.info("Test 2: Action definition with retry context")
enhanced_context = {
'task_step': task_step,
'workflow': mock_workflow,
'workflow_id': mock_workflow.id,
'available_documents': ["test_doc.txt"],
'previous_results': ["task0_action1_results"],
'improvements': "Previous attempt failed - ensure comprehensive extraction",
'retry_count': 1,
'previous_action_results': [
{
'actionMethod': 'document',
'actionName': 'extract',
'status': 'failed',
'error': 'Empty result returned',
'result': 'No content extracted',
'resultLabel': 'task1_action1_failed'
}
],
'previous_review_result': {
'status': 'retry',
'reason': 'Incomplete extraction',
'quality_score': 3,
'missing_outputs': ['detailed_analysis'],
'unmet_criteria': ['comprehensive_coverage']
}
}
retry_actions = await chat_manager.defineTaskActions(task_step, mock_workflow, [], enhanced_context)
logger.info(f"Generated {len(retry_actions)} actions with retry context")
# Test 3: Verify retry context is properly handled
logger.info("Test 3: Verifying retry context handling")
# Create a test prompt to see if retry context is included
test_prompt = await chat_manager._createActionDefinitionPrompt(enhanced_context)
# Check if retry context is in the prompt
if "RETRY CONTEXT" in test_prompt:
logger.info("✓ Retry context properly included in prompt")
else:
logger.error("✗ Retry context not found in prompt")
if "Previous action results that failed" in test_prompt:
logger.info("✓ Previous action results included in prompt")
else:
logger.error("✗ Previous action results not found in prompt")
if "Previous review feedback" in test_prompt:
logger.info("✓ Previous review feedback included in prompt")
else:
logger.error("✗ Previous review feedback not found in prompt")
if "Previous attempt failed" in test_prompt:
logger.info("✓ Improvements needed included in prompt")
else:
logger.error("✗ Improvements needed not found in prompt")
# Test 4: Verify fallback actions with retry context
logger.info("Test 4: Testing fallback actions with retry context")
fallback_actions = chat_manager._createFallbackActions(task_step, enhanced_context)
logger.info(f"Generated {len(fallback_actions)} fallback actions with retry context")
# Check if fallback actions include retry information
if any("retry" in action.get("resultLabel", "") for action in fallback_actions):
logger.info("✓ Fallback actions include retry information")
else:
logger.error("✗ Fallback actions missing retry information")
logger.info("Retry enhancement test completed successfully!")
if __name__ == "__main__":
asyncio.run(test_retry_enhancement())

View file

@ -1,47 +0,0 @@
PowerOn System Architecture Overview
This document provides a comprehensive overview of the PowerOn system architecture, including its key components, data flow, and technical specifications.
MAJOR TOPICS:
1. System Architecture
- Frontend Agents: Web-based user interface components
- Gateway: Central API and workflow management system
- Database: JSON-based data storage with component interfaces
- AI Integration: Anthropic and OpenAI connectors for intelligent processing
2. Core Components
- Document Manager: Handles file processing and content extraction
- Workflow Manager: Orchestrates complex business processes
- Service Container: Provides unified access to all system services
- Neutralizer: Data anonymization and privacy protection
3. Data Flow Architecture
- User authentication and authorization
- Document upload and processing pipeline
- AI-powered content analysis and extraction
- Workflow execution and task management
- Result generation and storage
4. Technical Specifications
- Python-based backend with async/await support
- RESTful API design with JSON data exchange
- Modular component architecture
- Extensible method system for business logic
- Comprehensive logging and monitoring
5. Security Features
- Multi-authentication authority support (Local, Microsoft, Google)
- Token-based session management
- Data encryption and anonymization
- Role-based access control
- Audit trail and compliance features
6. Integration Capabilities
- SharePoint document management
- Email system integration (Outlook)
- Web crawling and data collection
- AI service integration (Anthropic, OpenAI)
- Custom method development framework
The PowerOn system is designed to provide a comprehensive platform for intelligent document processing, workflow automation, and AI-powered business process management. It combines modern web technologies with advanced AI capabilities to deliver a robust and scalable solution for enterprise document management and workflow automation.

View file

@ -1,23 +0,0 @@
#!/usr/bin/env python3
from modules.workflow.serviceContainer import ServiceContainer
from modules.interfaces.interfaceAppObjects import User
from modules.interfaces.interfaceChatModel import ChatWorkflow
def test_signatures():
user = User(id='test', mandateId='test', username='test', email='test@test.com',
fullName='Test User', enabled=True, language='en', privilege='user',
authenticationAuthority='local')
workflow = ChatWorkflow(id='test', mandateId='test', status='running', name='Test',
currentRound=1, lastActivity='2025-01-01T00:00:00Z',
startedAt='2025-01-01T00:00:00Z', logs=[], messages=[],
stats=None, tasks=[])
service = ServiceContainer(user, workflow)
print("Method signatures:")
methodList = service.getMethodsList()
for sig in methodList[:5]: # Show first 5
print(f" {sig}")
if __name__ == "__main__":
test_signatures()

View file

@ -1,488 +0,0 @@
#!/usr/bin/env python3
"""
Test routine for WorkflowManager.workflowProcess() with new unified workflow architecture
"""
import asyncio
import sys
import os
import json
from datetime import datetime, UTC, timedelta
import uuid
from typing import List
print("Starting test_workflow.py...")
# Configure logging FIRST, before any other imports
import logging
# Clear any existing handlers to avoid duplicate logs
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stdout),
logging.FileHandler('test_workflow.log', mode='w', encoding='utf-8') # 'w' mode clears the file
],
force=True # Force reconfiguration even if already configured
)
# Filter out httpcore messages
logging.getLogger('httpcore').setLevel(logging.WARNING)
logging.getLogger('httpx').setLevel(logging.WARNING)
logger = logging.getLogger(__name__)
# Set up test configuration
os.environ['POWERON_CONFIG_FILE'] = 'test_config.ini'
print("Set POWERON_CONFIG_FILE environment variable")
try:
# Simple imports from modules (same as app.py)
from modules.interfaces.interfaceAppObjects import User, UserConnection
from modules.interfaces.interfaceChatObjects import ChatObjects
from modules.interfaces.interfaceChatModel import UserInputRequest, ChatWorkflow
from modules.workflow.managerWorkflow import WorkflowManager
print("All imports successful")
except Exception as e:
print(f"Import error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
def log_workflow_debug(message: str, data: dict = None):
"""Log workflow debug data with JSON dumps"""
timestamp = datetime.now(UTC).isoformat()
if data:
logger.debug(f"[{timestamp}] {message}\n{json.dumps(data, indent=2, ensure_ascii=False)}")
else:
logger.debug(f"[{timestamp}] {message}")
def create_test_user() -> User:
"""Create a test user for the workflow"""
return User(
id="test-user-001",
mandateId="test-mandate-001",
username="testuser",
email="test@example.com",
fullName="Test User",
enabled=True,
language="en",
privilege="user",
authenticationAuthority="local"
)
def create_test_workflow() -> ChatWorkflow:
"""Create a test workflow"""
return ChatWorkflow(
id="test-workflow-001",
mandateId="test-mandate-001",
status="running",
name="Candidate Evaluation and Selection Workflow",
currentRound=1,
lastActivity=datetime.now(UTC).isoformat(),
startedAt=datetime.now(UTC).isoformat(),
logs=[],
messages=[],
stats=None,
tasks=[]
)
def create_test_user_input() -> UserInputRequest:
"""Create test user input with a candidate evaluation task"""
return UserInputRequest(
prompt="""I have following list of job profiles from candidates (3 job profiles as text files) and want to know, who is best suited for the position of product designer (file with criteria). Create an evaluation matrix and rate all candidates according to the matrix, then produce a presentation for the management to decide and store it on the SharePoint for an available account.
Please ensure the evaluation includes:
- Technical skills assessment
- Experience level evaluation
- Cultural fit analysis
- Portfolio quality review
- Communication skills assessment
- Overall suitability score
The output should be suitable for executive review and include both detailed analysis and clear recommendations.""",
listFileId=["candidate_1_profile.txt", "candidate_2_profile.txt", "candidate_3_profile.txt", "product_designer_criteria.txt"],
userLanguage="en"
)
def create_test_files(chat_interface) -> List[str]:
"""Create test files in the database for candidate evaluation"""
test_files = []
# Import the component interface
from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects
# Get component interface with the same user context
component_interface = getComponentObjects(chat_interface.currentUser)
# Candidate 1 Profile
candidate_1_content = """CANDIDATE 1: Sarah Johnson
Position: Senior Product Designer
Experience: 8 years
TECHNICAL SKILLS:
- Figma, Sketch, Adobe Creative Suite (Expert)
- Prototyping tools: Framer, Principle (Advanced)
- Design systems and component libraries (Expert)
- User research and usability testing (Advanced)
- HTML/CSS/JavaScript basics (Intermediate)
EXPERIENCE:
- Senior Product Designer at TechCorp (3 years)
- Product Designer at StartupXYZ (3 years)
- UI/UX Designer at DesignAgency (2 years)
PORTFOLIO HIGHLIGHTS:
- Redesigned e-commerce platform increasing conversion by 25%
- Created comprehensive design system for 50+ product team
- Led user research for mobile banking app with 1M+ users
COMMUNICATION SKILLS:
- Excellent presentation skills
- Experience presenting to C-level executives
- Strong stakeholder management
- Mentored 5 junior designers
CULTURAL FIT:
- Collaborative team player
- Proactive problem solver
- Adapts quickly to new environments
- Values user-centered design approach"""
# Candidate 2 Profile
candidate_2_content = """CANDIDATE 2: Michael Chen
Position: Product Designer
Experience: 5 years
TECHNICAL SKILLS:
- Figma, Sketch, Adobe Creative Suite (Advanced)
- Prototyping tools: InVision, Marvel (Intermediate)
- Design systems (Intermediate)
- User research (Intermediate)
- No coding experience
EXPERIENCE:
- Product Designer at MidSizeTech (3 years)
- Junior Designer at CreativeStudio (2 years)
PORTFOLIO HIGHLIGHTS:
- Designed mobile app for local restaurant chain
- Created brand identity for startup
- Improved user flow for SaaS dashboard
COMMUNICATION SKILLS:
- Good presentation skills
- Works well in small teams
- Some experience with stakeholders
- Learning to mentor others
CULTURAL FIT:
- Quiet but dedicated worker
- Detail-oriented
- Prefers structured environments
- Focuses on visual design quality"""
# Candidate 3 Profile
candidate_3_content = """CANDIDATE 3: Emma Rodriguez
Position: UX/UI Designer
Experience: 6 years
TECHNICAL SKILLS:
- Figma, Sketch, Adobe Creative Suite (Advanced)
- Prototyping tools: Framer, Axure (Advanced)
- Design systems (Advanced)
- User research and analytics (Expert)
- Basic React/JavaScript (Intermediate)
EXPERIENCE:
- UX/UI Designer at EnterpriseCorp (4 years)
- UX Designer at ConsultingFirm (2 years)
PORTFOLIO HIGHLIGHTS:
- Led UX research for enterprise software used by 10K+ users
- Implemented data-driven design improvements increasing user satisfaction by 30%
- Created accessibility-compliant design system
- Conducted international user research studies
COMMUNICATION SKILLS:
- Outstanding presentation and storytelling skills
- Experience with international stakeholders
- Strong analytical communication
- Excellent at translating user insights to business value
CULTURAL FIT:
- Natural leader and team motivator
- Strategic thinker
- Adapts well to change
- Passionate about user advocacy"""
# Product Designer Criteria
criteria_content = """PRODUCT DESIGNER POSITION CRITERIA
Company: ValueOn
Department: Product Development
Level: Senior
REQUIRED SKILLS:
- Expert proficiency in Figma and modern design tools
- Strong understanding of user-centered design principles
- Experience with design systems and component libraries
- Ability to conduct user research and usability testing
- Basic understanding of front-end development (HTML/CSS/JavaScript)
REQUIRED EXPERIENCE:
- Minimum 5 years in product design
- Experience working with cross-functional teams
- Portfolio demonstrating complex product design solutions
- Experience with SaaS or enterprise software preferred
COMMUNICATION REQUIREMENTS:
- Excellent presentation skills
- Ability to communicate design decisions to stakeholders
- Experience presenting to management/executives
- Strong collaboration and feedback skills
CULTURAL FIT:
- Team-oriented and collaborative
- Proactive and self-motivated
- Adaptable to fast-paced environment
- Passionate about user experience
RESPONSIBILITIES:
- Lead design for core product features
- Collaborate with product managers and engineers
- Conduct user research and usability testing
- Create and maintain design system
- Present design solutions to stakeholders
- Mentor junior designers
EVALUATION WEIGHTS:
- Technical Skills: 30%
- Experience: 25%
- Communication: 20%
- Cultural Fit: 15%
- Portfolio Quality: 10%"""
# Create files in database
file_contents = [
("candidate_1_profile.txt", candidate_1_content),
("candidate_2_profile.txt", candidate_2_content),
("candidate_3_profile.txt", candidate_3_content),
("product_designer_criteria.txt", criteria_content)
]
for filename, content in file_contents:
try:
# Create file in database using the component interface
file_item = component_interface.saveUploadedFile(
fileContent=content.encode('utf-8'),
fileName=filename
)
test_files.append(file_item.id)
log_workflow_debug(f"Created test file: {filename}", {
"file_id": file_item.id,
"filename": filename,
"content_length": len(content)
})
except Exception as e:
log_workflow_debug(f"Error creating test file {filename}: {str(e)}")
# Create a dummy file ID if creation fails
test_files.append(f"file_{filename.replace('.', '_')}")
return test_files
async def test_workflow_process():
print("Inside test_workflow_process()")
"""Test the workflowProcess function with new unified workflow architecture"""
try:
logger.info("=== STARTING UNIFIED WORKFLOW PROCESS TEST ===")
# Create test data
test_user = create_test_user()
test_workflow = create_test_workflow()
test_user_input = create_test_user_input()
log_workflow_debug("Test data created", {
"user_id": test_user.id,
"workflow_id": test_workflow.id,
"user_input_prompt": test_user_input.prompt,
"file_ids": test_user_input.listFileId
})
# Create test user in database through AppObjects interface
from modules.interfaces.interfaceAppObjects import getRootInterface
from modules.interfaces.interfaceAppModel import AuthAuthority, ConnectionStatus, Token, UserPrivilege
root_interface = getRootInterface()
created_user = root_interface.createUser(
username=test_user.username,
password="testpassword123", # Required for local authentication
email=test_user.email,
fullName=test_user.fullName,
language=test_user.language,
enabled=test_user.enabled,
privilege=UserPrivilege.USER,
authenticationAuthority=AuthAuthority.LOCAL
)
log_workflow_debug("Created test user in database", {
"user_id": created_user.id,
"username": created_user.username,
"email": created_user.email
})
# Create test connection through AppObjects interface
from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects
app_interface = getAppObjects(created_user)
test_connection = app_interface.addUserConnection(
userId=created_user.id,
authority=AuthAuthority.MSFT,
externalId="msft-user-123",
externalUsername="testuser@example.com",
externalEmail="testuser@example.com",
status=ConnectionStatus.ACTIVE
)
log_workflow_debug("Created test connection", {
"connection_id": test_connection.id,
"authority": test_connection.authority,
"external_username": test_connection.externalUsername
})
# Create test token for the connection
test_token = Token(
userId=created_user.id,
authority=AuthAuthority.MSFT,
tokenAccess="test-access-token-123",
tokenRefresh="test-refresh-token-456",
tokenType="bearer",
expiresAt=datetime.now(UTC).timestamp() + 3600, # 1 hour from now
createdAt=datetime.now(UTC)
)
app_interface.saveToken(test_token)
log_workflow_debug("Created test token", {
"token_id": test_token.id,
"authority": test_token.authority,
"expires_at": test_token.expiresAt
})
# Create test workflow in database through ChatObjects interface
from modules.interfaces.interfaceChatObjects import getInterface as getChatObjects
chat_interface = getChatObjects(created_user)
workflow_data = {
"name": test_workflow.name,
"status": test_workflow.status,
"mandateId": created_user.mandateId,
"currentRound": test_workflow.currentRound,
"startedAt": test_workflow.startedAt,
"lastActivity": test_workflow.lastActivity
}
created_workflow = chat_interface.createWorkflow(workflow_data)
log_workflow_debug("Created test workflow in database", {
"workflow_id": created_workflow.id,
"name": created_workflow.name,
"status": created_workflow.status
})
# Update the test_workflow object with the created workflow's ID
test_workflow.id = created_workflow.id
# Create test files in database
logger.info("Creating test files for candidate evaluation...")
test_file_ids = create_test_files(chat_interface)
log_workflow_debug("Test files created", {
"file_count": len(test_file_ids),
"file_ids": test_file_ids
})
# Update user input with real file IDs
test_user_input.listFileId = test_file_ids
log_workflow_debug("Updated user input with file IDs", {
"file_ids": test_user_input.listFileId
})
# Initialize WorkflowManager
workflow_manager = WorkflowManager(chat_interface, created_user)
logger.info("WorkflowManager initialized")
# Test the workflowProcess function
logger.info("Calling workflowProcess with unified workflow architecture...")
try:
# Execute the unified workflow process
await workflow_manager.workflowProcess(test_user_input, test_workflow)
# Log workflow results
log_workflow_debug("Workflow process completed successfully", {
"workflow_id": test_workflow.id,
"workflow_status": test_workflow.status,
"message_count": len(test_workflow.messages),
"final_messages": [
{
"role": msg.role,
"message": msg.message[:200] + "..." if len(msg.message) > 200 else msg.message,
"status": msg.status,
"sequence_nr": msg.sequenceNr
} for msg in test_workflow.messages[-3:] # Last 3 messages
]
})
# Log detailed workflow messages
for i, message in enumerate(test_workflow.messages):
log_workflow_debug(f"WORKFLOW MESSAGE {i+1}:", {
"role": message.role,
"message": message.message,
"status": message.status,
"sequence_nr": message.sequenceNr,
"published_at": message.publishedAt,
"document_count": len(message.documents) if hasattr(message, 'documents') else 0
})
return test_workflow
except Exception as e:
import traceback
error_details = {
"error_type": type(e).__name__,
"error_message": str(e),
"error_args": e.args if hasattr(e, 'args') else None,
"traceback": traceback.format_exc()
}
log_workflow_debug("WORKFLOW PROCESS EXCEPTION:", error_details)
raise
logger.info("=== UNIFIED WORKFLOW PROCESS TEST COMPLETED ===")
return test_workflow
except Exception as e:
logger.error(f"❌ Test failed with error: {str(e)}")
log_workflow_debug("Full error details", {
"error_type": type(e).__name__,
"error_message": str(e)
})
raise
async def main():
print("Inside main()")
logger.info("=" * 50)
logger.info("CANDIDATE EVALUATION UNIFIED WORKFLOW TEST")
logger.info("=" * 50)
try:
workflow = await test_workflow_process()
logger.info("=" * 50)
logger.info("TEST COMPLETED SUCCESSFULLY")
logger.info("=" * 50)
return workflow
except Exception as e:
logger.error("=" * 50)
logger.error("TEST FAILED")
logger.error("=" * 50)
raise
if __name__ == "__main__":
print("About to run main()")
asyncio.run(main())
print("main() finished")