Merge branch 'main' into dev-ida

This commit is contained in:
idittrich-valueon 2025-05-14 12:42:53 +02:00 committed by GitHub
commit 4453883f7c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
51 changed files with 1926 additions and 7224 deletions

9
app.py
View file

@ -114,7 +114,14 @@ app.add_middleware(
baseDir = pathlib.Path(__file__).parent
staticFolder = baseDir / "static"
os.makedirs(staticFolder, exist_ok=True)
app.mount("/static", StaticFiles(directory=str(staticFolder)), name="static")
# Mount static files with proper configuration
app.mount("/static", StaticFiles(directory=str(staticFolder), html=True), name="static")
# Add favicon route
@app.get("/favicon.ico")
async def favicon():
return FileResponse(str(staticFolder / "favicon.ico"), media_type="image/x-icon")
# General Elements
@app.get("/", tags=["General"])

View file

@ -1,569 +0,0 @@
import json
import os
from typing import List, Dict, Any, Optional, Union
import logging
logger = logging.getLogger(__name__)
class DatabaseConnector:
"""
A connector for JSON-based data storage.
Provides generic database operations with tenant and user context support.
"""
def __init__(self, dbHost: str, dbDatabase: str, dbUser: str = None, dbPassword: str = None,
mandateId: int = None, userId: int = None, skipInitialIdLookup: bool = False):
"""
Initializes the JSON database connector.
Args:
dbHost: Directory for the JSON files
dbDatabase: Database name
dbUser: Username for authentication (optional)
dbPassword: API key for authentication (optional)
mandateId: Context parameter for the tenant
userId: Context parameter for the user
skipInitialIdLookup: When True, skips looking up initial IDs for mandateId and userId
"""
# Store the input parameters
self.dbHost = dbHost
self.dbDatabase = dbDatabase
self.dbUser = dbUser
self.dbPassword = dbPassword
self.skipInitialIdLookup = skipInitialIdLookup
# Check if context parameters are set
if mandateId is None or userId is None:
raise ValueError("mandateId and userId must be set")
# Ensure the database directory exists
self.dbFolder = os.path.join(self.dbHost, self.dbDatabase)
os.makedirs(self.dbFolder, exist_ok=True)
# Cache for loaded data
self._tablesCache = {}
# Initialize system table
self._systemTableName = "_system"
self._initializeSystemTable()
# Temporarily store mandateId and userId
self._mandateId = mandateId
self._userId = userId
# If mandateId or userId are 0 and we're not skipping ID lookup, try to use the initial IDs
if not skipInitialIdLookup:
if mandateId == 0:
initialMandateId = self.getInitialId("mandates")
if initialMandateId is not None:
self._mandateId = initialMandateId
logger.info(f"Using initial mandateId: {initialMandateId} instead of 0")
if userId == 0:
initialUserId = self.getInitialId("users")
if initialUserId is not None:
self._userId = initialUserId
logger.info(f"Using initial userId: {initialUserId} instead of 0")
# Set the effective IDs as properties
self.mandateId = self._mandateId
self.userId = self._userId
logger.info(f"DatabaseConnector initialized for directory: {self.dbFolder}")
logger.debug(f"Context: mandateId={self.mandateId}, userId={self.userId}")
def _initializeSystemTable(self):
"""Initializes the system table if it doesn't exist yet."""
systemTablePath = self._getTablePath(self._systemTableName)
if not os.path.exists(systemTablePath):
emptySystemTable = {}
self._saveSystemTable(emptySystemTable)
logger.info(f"System table initialized in {systemTablePath}")
else:
# Load existing system table to ensure it's available
self._loadSystemTable()
logger.debug(f"Existing system table loaded from {systemTablePath}")
def _loadSystemTable(self) -> Dict[str, int]:
"""Loads the system table with the initial IDs."""
# Check if system table is in cache
if f"_{self._systemTableName}" in self._tablesCache:
return self._tablesCache[f"_{self._systemTableName}"]
systemTablePath = self._getTablePath(self._systemTableName)
try:
if os.path.exists(systemTablePath):
with open(systemTablePath, 'r', encoding='utf-8') as f:
data = json.load(f)
# Store in cache with special prefix to avoid collision with regular tables
self._tablesCache[f"_{self._systemTableName}"] = data
return data
else:
self._tablesCache[f"_{self._systemTableName}"] = {}
return {}
except Exception as e:
logger.error(f"Error loading the system table: {e}")
self._tablesCache[f"_{self._systemTableName}"] = {}
return {}
def _saveSystemTable(self, data: Dict[str, int]) -> bool:
"""Saves the system table with the initial IDs."""
systemTablePath = self._getTablePath(self._systemTableName)
try:
with open(systemTablePath, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
# Update cache
self._tablesCache[f"_{self._systemTableName}"] = data
return True
except Exception as e:
logger.error(f"Error saving the system table: {e}")
return False
def _getTablePath(self, table: str) -> str:
"""Returns the full path to a table file"""
return os.path.join(self.dbFolder, f"{table}.json")
def _loadTable(self, table: str) -> List[Dict[str, Any]]:
"""Loads a table from the corresponding JSON file"""
path = self._getTablePath(table)
# If the table is the system table, load it directly
if table == self._systemTableName:
return [] # The system table is not treated like normal tables
# If the table is already in the cache, use the cache
if table in self._tablesCache:
return self._tablesCache[table]
# Otherwise load the file
try:
if os.path.exists(path):
with open(path, 'r', encoding='utf-8') as f:
data = json.load(f)
self._tablesCache[table] = data
# If data was loaded and no initial ID is registered yet,
# register the ID of the first record (if available)
if data and not self.hasInitialId(table):
if "id" in data[0]:
self._registerInitialId(table, data[0]["id"])
logger.info(f"Initial ID {data[0]['id']} for table {table} retroactively registered")
return data
else:
# If the file doesn't exist, create an empty table
logger.info(f"New table {table}")
self._tablesCache[table] = []
self._saveTable(table, [])
return []
except Exception as e:
logger.error(f"Error loading table {table}: {e}")
return []
def _saveTable(self, table: str, data: List[Dict[str, Any]]) -> bool:
"""Saves a table to the corresponding JSON file"""
# The system table is handled specially
if table == self._systemTableName:
return False
path = self._getTablePath(table)
try:
with open(path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
# Update the cache
self._tablesCache[table] = data
return True
except Exception as e:
logger.error(f"Error saving table {table}: {e}")
return False
def _filterByContext(self, records: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Filters records by tenant and user context,
if these fields exist in the record.
"""
filteredRecords = []
for record in records:
# Check if mandateId exists in the record and is not null
hasMandate = "mandateId" in record and record["mandateId"] is not None and record["mandateId"] != ""
# Check if userId exists in the record and is not null
hasUser = "userId" in record and record["userId"] is not None and record["userId"] != ""
# If both exist, filter accordingly
if hasMandate and hasUser:
if record["mandateId"] == self.mandateId:
filteredRecords.append(record)
# If only mandateId exists
elif hasMandate and not hasUser:
if record["mandateId"] == self.mandateId:
filteredRecords.append(record)
# If neither mandateId nor userId exist, add the record
elif not hasMandate and not hasUser:
filteredRecords.append(record)
return filteredRecords
def _applyRecordFilter(self, records: List[Dict[str, Any]], recordFilter: Dict[str, Any] = None) -> List[Dict[str, Any]]:
"""Applies a record filter to the records"""
if not recordFilter:
return records
filteredRecords = []
for record in records:
match = True
for field, value in recordFilter.items():
# Check if the field exists
if field not in record:
match = False
break
# Handle type conversion for integer comparisons both ways
if isinstance(value, int) and isinstance(record[field], str) and record[field].isdigit():
# Filter value is int, record value is string
if value != int(record[field]):
match = False
break
elif isinstance(value, str) and value.isdigit() and isinstance(record[field], int):
# Filter value is string, record value is int
if record[field] != int(value):
match = False
break
# Otherwise direct comparison
elif record[field] != value:
match = False
break
if match:
filteredRecords.append(record)
return filteredRecords
def _registerInitialId(self, table: str, initialId: int) -> bool:
"""
Registers the initial ID for a table.
Args:
table: Name of the table
initialId: The initial ID
Returns:
True on success, False on error
"""
try:
# Load the current system table
systemData = self._loadSystemTable()
# Only register if not already present
if table not in systemData:
systemData[table] = initialId
success = self._saveSystemTable(systemData)
if success:
logger.info(f"Initial ID {initialId} for table {table} registered")
return success
return True # If already present, this is not an error
except Exception as e:
logger.error(f"Error registering the initial ID for table {table}: {e}")
return False
def _removeInitialId(self, table: str) -> bool:
"""
Removes the initial ID for a table from the system table.
Args:
table: Name of the table
Returns:
True on success, False on error
"""
try:
# Load the current system table
systemData = self._loadSystemTable()
# Remove the entry if it exists
if table in systemData:
del systemData[table]
success = self._saveSystemTable(systemData)
if success:
logger.info(f"Initial ID for table {table} removed from system table")
return success
return True # If not present, this is not an error
except Exception as e:
logger.error(f"Error removing initial ID for table {table}: {e}")
return False
# Public API
def getTables(self) -> List[str]:
"""
Returns a list of all available tables.
Returns:
List of table names
"""
tables = []
try:
for filename in os.listdir(self.dbFolder):
if filename.endswith('.json') and not filename.startswith('_'):
tableName = filename[:-5] # Remove the .json extension
tables.append(tableName)
except Exception as e:
logger.error(f"Error reading the database directory: {e}")
return tables
def getFields(self, table: str) -> List[str]:
"""
Returns a list of all fields in a table.
Args:
table: Name of the table
Returns:
List of field names
"""
# Load the table data
data = self._loadTable(table)
if not data:
return []
# Take the first record as a reference for the fields
fields = list(data[0].keys()) if data else []
return fields
def getSchema(self, table: str, language: str = None) -> Dict[str, Dict[str, Any]]:
"""
Returns a schema object for a table with data types and labels.
Args:
table: Name of the table
language: Language for the labels (optional)
Returns:
Schema object with fields, data types and labels
"""
# Load the table data
data = self._loadTable(table)
schema = {}
if not data:
return schema
# Take the first record as a reference for the fields and data types
firstRecord = data[0]
for field, value in firstRecord.items():
# Determine the data type
dataType = type(value).__name__
# Create label (default is the field name)
label = field
schema[field] = {
"type": dataType,
"label": label
}
return schema
def getRecordset(self, table: str, fieldFilter: List[str] = None, recordFilter: Dict[str, Any] = None) -> List[Dict[str, Any]]:
"""
Returns a list of records from a table, filtered by criteria.
Args:
table: Name of the table
fieldFilter: Filter for fields (which fields should be returned)
recordFilter: Filter for records (which records should be returned)
Returns:
List of filtered records
"""
# Load the table data
data = self._loadTable(table)
logger.debug(f"getRecordset: data volume of {len(data)} bytes")
# Filter by tenant and user context
filteredData = self._filterByContext(data)
# Apply recordFilter if available
if recordFilter:
filteredData = self._applyRecordFilter(filteredData, recordFilter)
# If fieldFilter is available, reduce the fields
if fieldFilter and isinstance(fieldFilter, list):
result = []
for record in filteredData:
filteredRecord = {}
for field in fieldFilter:
if field in record:
filteredRecord[field] = record[field]
result.append(filteredRecord)
return result
return filteredData
def recordCreate(self, table: str, recordData: Dict[str, Any]) -> Dict[str, Any]:
"""
Creates a new record in the table.
Args:
table: Name of the table
recordData: Data for the new record
Returns:
The created record
"""
# Load the table data
data = self._loadTable(table)
# Add mandateId and userId if not present or 0
if "mandateId" not in recordData or recordData["mandateId"] == 0:
recordData["mandateId"] = self.mandateId
if "userId" not in recordData or recordData["userId"] == 0:
recordData["userId"] = self.userId
# Determine the next ID if not present
if "id" not in recordData:
nextId = 1
if data:
nextId = max(record["id"] for record in data if "id" in record) + 1
recordData["id"] = nextId
# If the table is empty and a system ID should be registered
if not data:
self._registerInitialId(table, recordData["id"])
logger.info(f"Initial ID {recordData['id']} for table {table} has been registered")
# Add the new record
data.append(recordData)
# Save the updated table
if self._saveTable(table, data):
return recordData
else:
raise ValueError(f"Error creating the record in table {table}")
def recordDelete(self, table: str, recordId: Union[str, int]) -> bool:
"""
Deletes a record from the table.
Args:
table: Name of the table
recordId: ID of the record to delete
Returns:
True on success, False on error
"""
# Load table data
data = self._loadTable(table)
# Search for the record
for i, record in enumerate(data):
if "id" in record and record["id"] == recordId:
# Check if the record belongs to the current mandate
if "mandateId" in record and record["mandateId"] != self.mandateId:
raise ValueError("Not your mandate")
# Check if it's an initial record
initialId = self.getInitialId(table)
if initialId is not None and initialId == recordId:
# Remove this entry from the system table
self._removeInitialId(table)
logger.info(f"Initial ID {recordId} for table {table} has been removed from the system table")
# Delete the record
del data[i]
# Save the updated table
return self._saveTable(table, data)
# Record not found
return False
def recordModify(self, table: str, recordId: Union[str, int], recordData: Dict[str, Any]) -> Dict[str, Any]:
"""
Modifies a record in the table.
Args:
table: Name of the table
recordId: ID of the record to modify
recordData: New data for the record
Returns:
The updated record
"""
# Load table data
data = self._loadTable(table)
# Search for the record
for i, record in enumerate(data):
if "id" in record and record["id"] == recordId:
# Check if the record belongs to the current mandate
if "mandateId" in record and record["mandateId"] != self.mandateId:
raise ValueError("Not your mandate")
# Prevent changing the ID
if "id" in recordData and recordData["id"] != recordId:
raise ValueError(f"The ID of a record in table {table} cannot be changed")
# Update the record
for key, value in recordData.items():
data[i][key] = value
# Save the updated table
if self._saveTable(table, data):
return data[i]
else:
raise ValueError(f"Error updating record in table {table}")
# Record not found
raise ValueError(f"Record with ID {recordId} not found in table {table}")
def hasInitialId(self, table: str) -> bool:
"""
Checks if an initial ID is registered for a table.
Args:
table: Name of the table
Returns:
True if an initial ID is registered, otherwise False
"""
systemData = self._loadSystemTable()
return table in systemData
def getInitialId(self, table: str) -> Optional[int]:
"""
Returns the initial ID for a table.
Args:
table: Name of the table
Returns:
The initial ID or None if not present
"""
systemData = self._loadSystemTable()
initialId = systemData.get(table)
logger.debug(f"Database '{self.dbDatabase}': Initial ID for table '{table}' is {initialId}")
if initialId is None:
logger.debug(f"No initial ID found for table {table}")
return initialId
def getAllInitialIds(self) -> Dict[str, int]:
"""
Returns all registered initial IDs.
Returns:
Dictionary with table names as keys and initial IDs as values
"""
systemData = self._loadSystemTable()
return systemData.copy() # Return a copy to protect the original

View file

@ -3,7 +3,7 @@
# System Configuration
APP_ENV_TYPE = dev
APP_ENV_LABEL = Development Instance Patrick
APP_API_URL = http://localhost:8080
APP_API_URL = http://localhost:8000
# Database Configuration System
DB_SYSTEM_HOST=D:/Temp/_powerondb

View file

@ -38,11 +38,10 @@ class AgentAnalyst(AgentBase):
def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
self.mydom = mydom
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a task by focusing on required outputs and using AI to generate them.
Process a task by focusing on required outputs and using AI to guide the analysis process.
Args:
task: Task dictionary with prompt, inputDocuments, outputSpecifications
@ -53,62 +52,49 @@ class AgentAnalyst(AgentBase):
try:
# Extract task information
prompt = task.get("prompt", "")
inputDocuments = task.get("inputDocuments", [])
outputSpecs = task.get("outputSpecifications", [])
workflow = task.get("context", {}).get("workflow", {})
# Check AI service
if not self.mydom:
return {
"feedback": "The Analyst agent requires an AI service to function.",
"feedback": "The Analyst agent requires an AI service to function effectively.",
"documents": []
}
# Extract data from documents - focusing only on dataExtracted
datasets, documentContext = self._extractData(inputDocuments)
# Create analysis plan
if workflow:
self.workflowManager.logAdd(workflow, "Extracting data from documents...", level="info", progress=35)
analysisPlan = await self._createAnalysisPlan(prompt)
# Generate task analysis to understand what's needed
analysisPlan = await self._analyzeTask(prompt, documentContext, datasets, outputSpecs)
# Check if this is truly an analysis task
if not analysisPlan.get("requiresAnalysis", True):
return {
"feedback": "This task doesn't appear to require analysis. Please try a different agent.",
"documents": []
}
# Generate all required output documents
documents = []
# Analyze data
if workflow:
self.workflowManager.logAdd(workflow, "Analyzing task requirements...", level="info", progress=45)
analysisResults = await self._analyzeData(task, analysisPlan)
# If no output specs provided, create default analysis outputs
if not outputSpecs:
outputSpecs = []
# Format results into requested output documents
totalSpecs = len(outputSpecs)
for i, spec in enumerate(outputSpecs):
progress = 50 + int((i / totalSpecs) * 40) # Progress from 50% to 90%
if self.workflowManager:
self.workflowManager.logAdd(workflow, f"Creating output {i+1}/{totalSpecs}...", level="info", progress=progress)
# Process each output specification
for spec in outputSpecs:
outputLabel = spec.get("label", "")
outputDescription = spec.get("description", "")
# Determine type based on file extension
outputType = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "txt"
# Generate appropriate content based on output type
if outputType in ['png', 'jpg', 'jpeg', 'svg']:
# Create visualization
document = await self._createVisualization(
datasets, prompt, outputLabel, analysisPlan, outputDescription
)
documents.append(document)
elif outputType in ['csv', 'json', 'xlsx']:
# Create data document
document = await self._createDataDocument(
datasets, prompt, outputLabel, analysisPlan, outputDescription
)
documents.append(document)
else:
# Create text document (report, analysis, etc.)
document = await self._createTextDocument(
datasets, documentContext, prompt, outputLabel,
outputType, analysisPlan, outputDescription
)
documents.append(document)
documents = await self._createOutputDocuments(
prompt,
analysisResults,
outputSpecs,
analysisPlan
)
# Generate feedback
feedback = f"{analysisPlan.get('analysisApproach')}"
if analysisPlan.get("keyInsights"):
feedback += f"\n\n{analysisPlan.get('keyInsights')}"
feedback = analysisPlan.get("feedback", f"I analyzed '{prompt[:50]}...' and generated {len(documents)} output documents.")
return {
"feedback": feedback,
@ -116,7 +102,7 @@ class AgentAnalyst(AgentBase):
}
except Exception as e:
logger.error(f"Error in analysis: {str(e)}", exc_info=True)
logger.error(f"Error during analysis: {str(e)}", exc_info=True)
return {
"feedback": f"Error during analysis: {str(e)}",
"documents": []
@ -196,69 +182,74 @@ class AgentAnalyst(AgentBase):
return datasets, documentContext
async def _analyzeTask(self, prompt: str, context: str, datasets: Dict, outputSpecs: List) -> Dict:
async def _analyzeTask(self, prompt: str, documentContext: str, datasets: Dict[str, Any], outputSpecs: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Use AI to analyze the task and create a plan for analysis.
Analyze the task requirements using AI.
Args:
prompt: The task prompt
context: Document context text
datasets: Dictionary of extracted datasets
documentContext: Context from input documents
datasets: Available datasets
outputSpecs: Output specifications
Returns:
Analysis plan dictionary
"""
# Prepare dataset information
datasetInfo = {}
for name, df in datasets.items():
try:
datasetInfo[name] = {
"shape": df.shape,
"columns": df.columns.tolist(),
"dtypes": {col: str(df[col].dtype) for col in df.columns},
"sample": df.head(3).to_dict(orient='records')
}
except:
datasetInfo[name] = {"error": "Could not process dataset"}
# Create analysis prompt
analysisPrompt = f"""
Analyze this data analysis task and create a plan.
Analyze this data analysis task and create a detailed plan:
TASK: {prompt}
AVAILABLE DATA:
{json.dumps(datasetInfo, indent=2)}
DOCUMENT CONTEXT:
{context[:1000]}... (truncated)
{documentContext}
OUTPUT REQUIREMENTS:
AVAILABLE DATASETS:
{json.dumps(datasets, indent=2)}
REQUIRED OUTPUTS:
{json.dumps(outputSpecs, indent=2)}
Create a detailed analysis plan in JSON format with the following structure:
Create a detailed analysis plan in JSON format with:
{{
"analysisType": "statistical|trend|comparative|predictive|cluster|general",
"keyQuestions": ["question1", "question2"],
"recommendedVisualizations": [{{
"type": "chart_type",
"dataSource": "dataset_name",
"variables": ["col1", "col2"],
"purpose": "explanation"
}}],
"keyInsights": "brief summary of initial insights",
"analysisApproach": "brief description of recommended approach"
"analysisSteps": [
{{
"step": "step description",
"purpose": "why this step is needed",
"datasets": ["dataset1", "dataset2"],
"techniques": ["technique1", "technique2"],
"outputs": ["output1", "output2"]
}}
],
"visualizations": [
{{
"type": "visualization type",
"purpose": "what it shows",
"datasets": ["dataset1"],
"settings": {{"key": "value"}}
}}
],
"insights": [
{{
"type": "insight type",
"description": "what to look for",
"datasets": ["dataset1"]
}}
],
"feedback": "explanation of the analysis approach"
}}
Only return valid JSON. No preamble or explanations.
Respond with ONLY the JSON object, no additional text or explanations.
"""
try:
response = await self.mydom.callAi([
{"role": "system", "content": "You are a data analysis expert. Respond with valid JSON only."},
{"role": "user", "content": analysisPrompt}
], produceUserAnswer = True)
# Extract JSON from response
try:
# Get analysis plan from AI
response = await self.mydom.callAi([
{"role": "system", "content": "You are a data analysis expert. Create detailed analysis plans. Respond with valid JSON only."},
{"role": "user", "content": analysisPrompt}
], produceUserAnswer=True)
# Extract JSON
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
@ -266,154 +257,367 @@ class AgentAnalyst(AgentBase):
plan = json.loads(response[jsonStart:jsonEnd])
return plan
else:
# Fallback if JSON not found
# Fallback plan
logger.warning(f"Not able creating analysis plan, generating fallback plan")
return {
"analysisType": "general",
"keyQuestions": ["What insights can be extracted from this data?"],
"recommendedVisualizations": [],
"keyInsights": "Analysis plan could not be created",
"analysisApproach": "General exploratory analysis"
"analysisSteps": [
{
"step": "Basic data analysis",
"purpose": "Understand the data structure and content",
"datasets": list(datasets.keys()),
"techniques": ["summary statistics", "data visualization"],
"outputs": ["summary report", "basic visualizations"]
}
],
"visualizations": [
{
"type": "basic charts",
"purpose": "Show data distribution and relationships",
"datasets": list(datasets.keys()),
"settings": {}
}
],
"insights": [
{
"type": "basic insights",
"description": "Key findings from the data",
"datasets": list(datasets.keys())
}
],
"feedback": f"I'll analyze the data and provide insights about {prompt}"
}
except Exception as e:
logger.warning(f"Error creating analysis plan: {str(e)}")
# Simple fallback plan
return {
"analysisType": "general",
"keyQuestions": ["What insights can be extracted from this data?"],
"recommendedVisualizations": [],
"keyInsights": "Analysis plan could not be created",
"analysisApproach": "General exploratory analysis"
"analysisSteps": [
{
"step": "Basic data analysis",
"purpose": "Understand the data structure and content",
"datasets": list(datasets.keys()),
"techniques": ["summary statistics", "data visualization"],
"outputs": ["summary report", "basic visualizations"]
}
],
"visualizations": [
{
"type": "basic charts",
"purpose": "Show data distribution and relationships",
"datasets": list(datasets.keys()),
"settings": {}
}
],
"insights": [
{
"type": "basic insights",
"description": "Key findings from the data",
"datasets": list(datasets.keys())
}
],
"feedback": f"I'll analyze the data and provide insights about {prompt}"
}
async def _createAnalysisPlan(self, prompt: str) -> Dict[str, Any]:
"""
Create an analysis plan based on the task prompt.
Args:
prompt: The task prompt
Returns:
Analysis plan dictionary
"""
try:
# Create analysis prompt
analysisPrompt = f"""
Analyze this data analysis task and create a detailed plan:
TASK: {prompt}
Create a detailed analysis plan in JSON format with:
{{
"requiresAnalysis": true/false,
"analysisSteps": [
{{
"step": "step description",
"purpose": "why this step is needed",
"techniques": ["technique1", "technique2"],
"outputs": ["output1", "output2"]
}}
],
"visualizations": [
{{
"type": "visualization type",
"purpose": "what it shows",
"settings": {{"key": "value"}}
}}
],
"insights": [
{{
"type": "insight type",
"description": "what to look for"
}}
],
"feedback": "explanation of the analysis approach"
}}
Respond with ONLY the JSON object, no additional text or explanations.
"""
# Get analysis plan from AI
response = await self.mydom.callAi([
{"role": "system", "content": "You are a data analysis expert. Create detailed analysis plans. Respond with valid JSON only."},
{"role": "user", "content": analysisPrompt}
], produceUserAnswer=True)
# Extract JSON
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
if jsonStart >= 0 and jsonEnd > jsonStart:
plan = json.loads(response[jsonStart:jsonEnd])
return plan
else:
# Fallback plan
logger.warning(f"Not able creating analysis plan, generating fallback plan")
return {
"requiresAnalysis": True,
"analysisSteps": [
{
"step": "Basic data analysis",
"purpose": "Understand the data structure and content",
"techniques": ["summary statistics", "data visualization"],
"outputs": ["summary report", "basic visualizations"]
}
],
"visualizations": [
{
"type": "basic charts",
"purpose": "Show data distribution and relationships",
"settings": {}
}
],
"insights": [
{
"type": "basic insights",
"description": "Key findings from the data"
}
],
"feedback": f"I'll analyze the data and provide insights about {prompt}"
}
except Exception as e:
logger.warning(f"Error creating analysis plan: {str(e)}")
# Simple fallback plan
return {
"requiresAnalysis": True,
"analysisSteps": [
{
"step": "Basic data analysis",
"purpose": "Understand the data structure and content",
"techniques": ["summary statistics", "data visualization"],
"outputs": ["summary report", "basic visualizations"]
}
],
"visualizations": [
{
"type": "basic charts",
"purpose": "Show data distribution and relationships",
"settings": {}
}
],
"insights": [
{
"type": "basic insights",
"description": "Key findings from the data"
}
],
"feedback": f"I'll analyze the data and provide insights about {prompt}"
}
async def _createVisualization(self, datasets: Dict, prompt: str, outputLabel: str,
analysisPlan: Dict, description: str) -> Dict:
"""
Create visualization document using AI guidance.
Create a visualization based on the analysis plan.
Args:
datasets: Dictionary of datasets
prompt: Original task prompt
outputLabel: Output filename
analysisPlan: Analysis plan from AI
outputLabel: Output file label
analysisPlan: Analysis plan
description: Output description
Returns:
Visualization document
Document dictionary with visualization
"""
# Determine format from filename
formatType = outputLabel.split('.')[-1].lower()
if formatType not in ['png', 'jpg', 'jpeg', 'svg']:
formatType = 'png'
# If no datasets available, create error message image
if not datasets:
plt.figure(figsize=(10, 6))
plt.text(0.5, 0.5, "No data available for visualization",
ha='center', va='center', fontsize=14)
plt.tight_layout()
imgData = self._getImageBase64(formatType)
plt.close()
return {
"label": outputLabel,
"content": imgData,
"metadata": {
"contentType": f"image/{formatType}"
}
}
# Get recommended visualization from plan
recommendedViz = analysisPlan.get("recommendedVisualizations", [])
# Prepare dataset info for the first dataset if none specified
if not recommendedViz and datasets:
name, df = next(iter(datasets.items()))
recommendedViz = [{
"type": "auto",
"dataSource": name,
"variables": df.columns.tolist()[:5],
"purpose": "general analysis"
}]
# Create visualization code prompt
vizPrompt = f"""
Generate Python matplotlib/seaborn code to create a visualization for:
TASK: {prompt}
VISUALIZATION REQUIREMENTS:
- Output format: {formatType}
- Filename: {outputLabel}
- Description: {description}
RECOMMENDED VISUALIZATION:
{json.dumps(recommendedViz, indent=2)}
AVAILABLE DATASETS:
"""
# Add dataset info for recommended sources
for viz in recommendedViz:
dataSource = viz.get("dataSource")
if dataSource in datasets:
df = datasets[dataSource]
vizPrompt += f"\nDataset '{dataSource}':\n"
vizPrompt += f"- Shape: {df.shape}\n"
vizPrompt += f"- Columns: {df.columns.tolist()}\n"
vizPrompt += f"- Sample data: {df.head(3).to_dict(orient='records')}\n"
vizPrompt += """
Generate ONLY Python code that:
1. Uses matplotlib and/or seaborn to create a clear visualization
2. Sets figure size to (10, 6)
3. Includes appropriate titles, labels, and legend
4. Uses professional color schemes
5. Handles any missing data gracefully
Return ONLY executable Python code, no explanations or markdown.
"""
try:
# Get visualization code from AI
vizCode = await self.mydom.callAi([
{"role": "system", "content": "You are a data visualization expert. Provide only executable Python code."},
{"role": "user", "content": vizPrompt}
], produceUserAnswer = True)
# Get visualization recommendations
vizRecommendations = analysisPlan.get("visualizations", [])
# Clean code
vizCode = vizCode.replace("```python", "").replace("```", "").strip()
if not vizRecommendations:
# Generate visualization recommendations if none provided
self.mydom.logAdd(analysisPlan.get("workflowId"), "Generating visualization recommendations...", level="info", progress=50)
vizPrompt = f"""
Based on this data and task, recommend appropriate visualizations.
# Execute visualization code
plt.figure(figsize=(10, 6))
TASK: {prompt}
DESCRIPTION: {description}
# Make local variables available to the code
localVars = {
"plt": plt,
"sns": sns,
"pd": pd,
"np": __import__('numpy')
}
DATASETS:
{json.dumps({name: {"shape": df.shape, "columns": df.columns.tolist()}
for name, df in datasets.items()}, indent=2)}
# Add datasets to local variables
for name, df in datasets.items():
# Create a sanitized variable name
varName = ''.join(c if c.isalnum() else '_' for c in name)
localVars[varName] = df
Recommend visualizations in JSON format:
{{
"visualizations": [
{{
"type": "chart_type",
"dataSource": "dataset_name",
"variables": ["col1", "col2"],
"purpose": "explanation"
}}
]
}}
"""
# Also add with standard names for simpler code
if "df" not in localVars:
localVars["df"] = df
elif "df2" not in localVars:
localVars["df2"] = df
response = await self.mydom.callAi([
{"role": "system", "content": "You are a data visualization expert. Recommend appropriate visualizations based on the data and task."},
{"role": "user", "content": vizPrompt}
])
# Execute the visualization code
exec(vizCode, globals(), localVars)
# Extract JSON
jsonStart = response.find('{')
jsonEnd = response.rfind('}') + 1
# Capture the image
imgData = self._getImageBase64(formatType)
plt.close()
if jsonStart >= 0 and jsonEnd > jsonStart:
vizData = json.loads(response[jsonStart:jsonEnd])
vizRecommendations = vizData.get("visualizations", [])
return self.formatAgentDocumentOutput(outputLabel, imgData, f"image/{formatType}")
# Determine format from filename
formatType = outputLabel.split('.')[-1].lower()
if formatType not in ['png', 'jpg', 'jpeg', 'svg']:
formatType = 'png'
# If no datasets available, create error message image
if not datasets:
plt.figure(figsize=(10, 6))
plt.text(0.5, 0.5, "No data available for visualization",
ha='center', va='center', fontsize=14)
plt.tight_layout()
imgData = self._getImageBase64(formatType)
plt.close()
return {
"label": outputLabel,
"content": imgData,
"metadata": {
"contentType": f"image/{formatType}"
}
}
# Prepare dataset info for the first dataset if none specified
if not vizRecommendations and datasets:
name, df = next(iter(datasets.items()))
vizRecommendations = [{
"type": "auto",
"dataSource": name,
"variables": df.columns.tolist()[:5],
"purpose": "general analysis"
}]
# Create visualization code prompt
vizPrompt = f"""
Generate Python matplotlib/seaborn code to create a visualization for:
TASK: {prompt}
VISUALIZATION REQUIREMENTS:
- Output format: {formatType}
- Filename: {outputLabel}
- Description: {description}
RECOMMENDED VISUALIZATION:
{json.dumps(vizRecommendations, indent=2)}
AVAILABLE DATASETS:
"""
# Add dataset info for recommended sources
for viz in vizRecommendations:
dataSource = viz.get("dataSource")
if dataSource in datasets:
df = datasets[dataSource]
vizPrompt += f"\nDataset '{dataSource}':\n"
vizPrompt += f"- Shape: {df.shape}\n"
vizPrompt += f"- Columns: {df.columns.tolist()}\n"
vizPrompt += f"- Sample data: {df.head(3).to_dict(orient='records')}\n"
vizPrompt += """
Generate ONLY Python code that:
1. Uses matplotlib and/or seaborn to create a clear visualization
2. Sets figure size to (10, 6)
3. Includes appropriate titles, labels, and legend
4. Uses professional color schemes
5. Handles any missing data gracefully
Return ONLY executable Python code, no explanations or markdown.
"""
try:
# Get visualization code from AI
vizCode = await self.mydom.callAi([
{"role": "system", "content": "You are a data visualization expert. Provide only executable Python code."},
{"role": "user", "content": vizPrompt}
], produceUserAnswer = True)
# Clean code
vizCode = vizCode.replace("```python", "").replace("```", "").strip()
# Execute visualization code
plt.figure(figsize=(10, 6))
# Make local variables available to the code
localVars = {
"plt": plt,
"sns": sns,
"pd": pd,
"np": __import__('numpy')
}
# Add datasets to local variables
for name, df in datasets.items():
# Create a sanitized variable name
varName = ''.join(c if c.isalnum() else '_' for c in name)
localVars[varName] = df
# Also add with standard names for simpler code
if "df" not in localVars:
localVars["df"] = df
elif "df2" not in localVars:
localVars["df2"] = df
# Execute the visualization code
exec(vizCode, globals(), localVars)
# Capture the image
imgData = self._getImageBase64(formatType)
plt.close()
return self.formatAgentDocumentOutput(outputLabel, imgData, f"image/{formatType}")
except Exception as e:
logger.error(f"Error creating visualization: {str(e)}", exc_info=True)
# Create error message image
plt.figure(figsize=(10, 6))
plt.text(0.5, 0.5, f"Visualization error: {str(e)}",
ha='center', va='center', fontsize=12)
plt.tight_layout()
imgData = self._getImageBase64(formatType)
plt.close()
return self.formatAgentDocumentOutput(outputLabel, imgData, f"image/{formatType}")
except Exception as e:
logger.error(f"Error creating visualization: {str(e)}", exc_info=True)
@ -664,6 +868,102 @@ class AgentAnalyst(AgentBase):
# Convert to base64
return base64.b64encode(imageData).decode('utf-8')
async def _analyzeData(self, task: Dict[str, Any], analysisPlan: Dict[str, Any]) -> Dict[str, Any]:
"""
Analyze data based on the analysis plan.
Args:
task: Task dictionary with input documents and specifications
analysisPlan: Analysis plan from _createAnalysisPlan
Returns:
Analysis results dictionary
"""
try:
# Extract data from input documents
inputDocuments = task.get("inputDocuments", [])
datasets, documentContext = self._extractData(inputDocuments)
# Get task information
prompt = task.get("prompt", "")
outputSpecs = task.get("outputSpecifications", [])
# Analyze task requirements
analysisResults = await self._analyzeTask(prompt, documentContext, datasets, outputSpecs)
# Add datasets and context to results
analysisResults["datasets"] = datasets
analysisResults["documentContext"] = documentContext
return analysisResults
except Exception as e:
logger.error(f"Error analyzing data: {str(e)}", exc_info=True)
return {
"error": str(e),
"datasets": {},
"documentContext": ""
}
async def _createOutputDocuments(self, prompt: str, analysisResults: Dict[str, Any],
outputSpecs: List[Dict[str, Any]], analysisPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Create output documents based on analysis results.
Args:
prompt: Original task prompt
analysisResults: Results from data analysis
outputSpecs: List of output specifications
analysisPlan: Analysis plan from _createAnalysisPlan
Returns:
List of document objects
"""
documents = []
datasets = analysisResults.get("datasets", {})
documentContext = analysisResults.get("documentContext", "")
# Process each output specification
for spec in outputSpecs:
outputLabel = spec.get("label", "")
outputDescription = spec.get("description", "")
# Determine format from filename
formatType = outputLabel.split('.')[-1].lower() if '.' in outputLabel else "txt"
try:
# Create appropriate document based on format
if formatType in ["png", "jpg", "jpeg", "svg"]:
# Visualization output
document = await self._createVisualization(
datasets, prompt, outputLabel, analysisPlan, outputDescription
)
elif formatType in ["csv", "json", "xlsx"]:
# Data document output
document = await self._createDataDocument(
datasets, prompt, outputLabel, analysisPlan, outputDescription
)
else:
# Text document output (markdown, html, text)
document = await self._createTextDocument(
datasets, documentContext, prompt, outputLabel, formatType,
analysisPlan, outputDescription
)
documents.append(document)
except Exception as e:
logger.error(f"Error creating output document {outputLabel}: {str(e)}", exc_info=True)
# Create error document
errorDoc = self.formatAgentDocumentOutput(
outputLabel,
f"Error creating document: {str(e)}",
"text/plain"
)
documents.append(errorDoc)
return documents
# Factory function for the Analyst agent
def getAgentAnalyst():

View file

@ -33,7 +33,6 @@ class AgentCoach(AgentBase):
def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
self.mydom = mydom
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""

View file

@ -41,7 +41,6 @@ class AgentCoder(AgentBase):
def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
self.mydom = mydom
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""

View file

@ -30,7 +30,6 @@ class AgentDocumentation(AgentBase):
def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
self.mydom = mydom
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""

View file

@ -7,8 +7,8 @@ import logging
import json
import base64
import os
import msal
import requests
import msal
from typing import Dict, Any, List, Optional
from modules.configuration import APP_CONFIG
@ -41,15 +41,11 @@ class AgentEmail(AgentBase):
self.authority = None
self.scopes = ["Mail.ReadWrite", "User.Read"]
# Token storage directory
self.token_dir = './token_storage'
if not os.path.exists(self.token_dir):
os.makedirs(self.token_dir)
logger.info(f"Created token storage directory: {self.token_dir}")
# API base URL for Microsoft authentication
self.api_base_url = APP_CONFIG.get("APP_API_URL", "(no-url)")
def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
self.mydom = mydom
self._loadConfiguration()
def _loadConfiguration(self):
@ -84,6 +80,7 @@ class AgentEmail(AgentBase):
# Extract task information
prompt = task.get("prompt", "")
inputDocuments = task.get("inputDocuments", [])
outputSpecs = task.get("outputSpecifications", [])
# Check AI service
if not self.mydom:
@ -131,22 +128,36 @@ class AgentEmail(AgentBase):
# Prepare output documents
documents = []
# Add HTML preview document
previewDoc = self.formatAgentDocumentOutput(
"email_preview.html",
htmlPreview,
"text/html"
)
documents.append(previewDoc)
# Process output specifications
for spec in outputSpecs:
label = spec.get("label", "")
description = spec.get("description", "")
# Add email template as JSON for reference
templateJson = json.dumps(emailTemplate, indent=2)
templateDoc = self.formatAgentDocumentOutput(
"email_template.json",
templateJson,
"application/json"
)
documents.append(templateDoc)
if label.endswith(".html"):
# Create the HTML template file
templateDoc = self.formatAgentDocumentOutput(
label,
emailTemplate["htmlBody"], # Use the actual HTML body, not the preview
"text/html"
)
documents.append(templateDoc)
elif label.endswith(".json"):
# Create JSON template if requested
templateJson = json.dumps(emailTemplate, indent=2)
templateDoc = self.formatAgentDocumentOutput(
label,
templateJson,
"application/json"
)
documents.append(templateDoc)
else:
# Default to preview for other cases
previewDoc = self.formatAgentDocumentOutput(
label,
htmlPreview,
"text/html"
)
documents.append(previewDoc)
# Prepare feedback message
if draft_result:
@ -233,28 +244,20 @@ class AgentEmail(AgentBase):
# Add document name to contents
documentContents.append(f"\n\n--- {docName} ---\n")
# Process contents
hasAttachment = False
for content in doc.get("contents", []):
# Add extracted text to document contents
if content.get("dataExtracted"):
documentContents.append(content.get("dataExtracted", ""))
# Prepare attachment if it has content data
if content.get("data"):
# Check if this content should be an attachment
# Typically files like PDFs, images, etc.
contentType = content.get("contentType", "")
if (not contentType.startswith("text/") or
contentType in ["application/pdf", "application/msword"]):
hasAttachment = True
# If document has content to attach, add to attachments
if hasAttachment:
# Process document data directly
if doc.get("data"):
# Add to attachments with proper metadata
attachments.append({
"name": docName,
"document": doc
"document": {
"data": doc["data"],
"mimeType": doc.get("mimeType", "application/octet-stream"),
"base64Encoded": doc.get("base64Encoded", False)
}
})
documentContents.append(f"Document attached: {docName}")
else:
documentContents.append(f"Document referenced: {docName}")
return "\n".join(documentContents), attachments
@ -294,7 +297,7 @@ class AgentEmail(AgentBase):
try:
response = await self.mydom.callAi([
{"role": "system", "content": "You are an email template specialist. Respond with valid JSON only."},
{"role": "system", "content": "You are an email template specialist. Create professional emails. Respond with valid JSON only."},
{"role": "user", "content": emailPrompt}
], produceUserAnswer=True)
@ -306,7 +309,8 @@ class AgentEmail(AgentBase):
template = json.loads(response[jsonStart:jsonEnd])
return template
else:
# Fallback if JSON not found
# Fallback plan
logger.warning(f"Not able creating email template, generating fallback plan")
return {
"recipient": "recipient@example.com",
"subject": "Information Regarding Your Request",
@ -377,125 +381,86 @@ class AgentEmail(AgentBase):
"""
return html
def _getCurrentUserToken(self):
def _getCurrentUserToken(self) -> tuple:
"""
Get the current user's token from the token store.
Does not attempt to initiate authentication flow.
Returns:
Tuple of (user info, access token) or (None, None) if no valid token
Get the current user's Microsoft token using the current user context.
Returns tuple of (user_info, access_token) or (None, None) if not authenticated.
"""
try:
# Check if we have any token files
if not os.path.exists(self.token_dir) or not os.listdir(self.token_dir):
logger.warning("No token files found. User needs to authenticate with Microsoft.")
if not self.mydom:
logger.error("No mydom interface available")
return None, None
# Find the most recently modified token file
token_files = [os.path.join(self.token_dir, f) for f in os.listdir(self.token_dir) if f.endswith('.json')]
if not token_files:
# Get token data from database
token_data = self.mydom.getMsftToken()
if not token_data:
logger.info("No Microsoft token found for user")
return None, None
most_recent = max(token_files, key=os.path.getmtime)
user_id = os.path.basename(most_recent).split('.')[0]
# Verify token is still valid
if not self._verifyToken(token_data.get("access_token")):
logger.info("Token invalid, attempting refresh")
if not self._refreshToken(token_data):
logger.info("Token refresh failed")
return None, None
# Get updated token data after refresh
token_data = self.mydom.getMsftToken()
# Load the token
token_data = self._loadTokenFromFile(user_id)
if not token_data or not token_data.get("access_token"):
logger.warning(f"No valid token data for user {user_id}")
return None, None
return token_data.get("user_info"), token_data.get("access_token")
# Get user info from token
user_info = self._getUserInfoFromToken(token_data["access_token"])
if not user_info:
# Try to refresh the token
if self._refreshToken(user_id):
# Load the refreshed token
token_data = self._loadTokenFromFile(user_id)
if token_data and token_data.get("access_token"):
user_info = self._getUserInfoFromToken(token_data["access_token"])
if user_info:
return user_info, token_data["access_token"]
logger.warning(f"Could not get user info for user {user_id}")
return None, None
return user_info, token_data["access_token"]
except Exception as e:
logger.error(f"Error getting current user token: {str(e)}")
return None, None
def _loadTokenFromFile(self, user_id):
"""Load token data from a file"""
filename = os.path.join(self.token_dir, f"{user_id}.json")
if os.path.exists(filename):
try:
with open(filename, 'r') as f:
return json.load(f)
except Exception as e:
logger.error(f"Error loading token file: {str(e)}")
return None
return None
def _getUserInfoFromToken(self, access_token):
"""Get user information using the access token"""
headers = {
'Authorization': f'Bearer {access_token}',
'Content-Type': 'application/json'
}
def _verifyToken(self, token: str) -> bool:
"""Verify the access token is valid"""
try:
headers = {
'Authorization': f'Bearer {token}',
'Content-Type': 'application/json'
}
response = requests.get('https://graph.microsoft.com/v1.0/me', headers=headers)
if response.status_code == 200:
user_data = response.json()
return {
"name": user_data.get("displayName", ""),
"email": user_data.get("userPrincipalName", ""),
"id": user_data.get("id", "")
}
else:
logger.error(f"Error getting user info: {response.status_code} - {response.text}")
return None
except Exception as e:
logger.error(f"Exception getting user info: {str(e)}")
return None
return response.status_code == 200
def _refreshToken(self, user_id):
except Exception as e:
logger.error(f"Error verifying token: {str(e)}")
return False
def _refreshToken(self, token_data: Dict[str, Any]) -> bool:
"""Refresh the access token using the stored refresh token"""
token_data = self._loadTokenFromFile(user_id)
if not token_data or not token_data.get("refresh_token"):
logger.warning("No refresh token available")
return False
msal_app = msal.ConfidentialClientApplication(
self.client_id,
authority=self.authority,
client_credential=self.client_secret
)
result = msal_app.acquire_token_by_refresh_token(
token_data["refresh_token"],
scopes=self.scopes
)
if "error" in result:
logger.error(f"Error refreshing token: {result.get('error')}")
return False
# Update tokens in storage
token_data["access_token"] = result["access_token"]
if "refresh_token" in result:
token_data["refresh_token"] = result["refresh_token"]
# Save the updated token
filename = os.path.join(self.token_dir, f"{user_id}.json")
try:
with open(filename, 'w') as f:
json.dump(token_data, f)
logger.info(f"Token saved for user: {user_id}")
if not token_data or not token_data.get("refresh_token"):
logger.warning("No refresh token available")
return False
msal_app = msal.ConfidentialClientApplication(
self.client_id,
authority=self.authority,
client_credential=self.client_secret
)
result = msal_app.acquire_token_by_refresh_token(
token_data["refresh_token"],
scopes=self.scopes
)
if "error" in result:
logger.error(f"Error refreshing token: {result.get('error')}")
return False
# Update token data
token_data["access_token"] = result["access_token"]
if "refresh_token" in result:
token_data["refresh_token"] = result["refresh_token"]
# Save updated token
self.mydom.saveMsftToken(token_data)
logger.info("Access token refreshed successfully")
return True
except Exception as e:
logger.error(f"Error saving token file: {str(e)}")
logger.error(f"Error refreshing token: {str(e)}")
return False
def _createDraftEmail(self, recipient, subject, body, attachments=None):
@ -522,8 +487,8 @@ class AgentEmail(AgentBase):
def _createGraphDraftEmail(self, access_token, recipient, subject, body, attachments=None):
"""
Create a draft email using Microsoft Graph API with fixed attachment handling.
Directly uses the document's data attribute for attachments.
Create a draft email using Microsoft Graph API.
Treats all files as binary attachments without content analysis.
Args:
access_token: Microsoft Graph access token
@ -540,7 +505,7 @@ class AgentEmail(AgentBase):
'Content-Type': 'application/json'
}
# Prepare email data
# Prepare email data with proper structure
email_data = {
'subject': subject,
'body': {
@ -561,90 +526,80 @@ class AgentEmail(AgentBase):
email_data['attachments'] = []
for attachment in attachments:
# Get the document object
doc = attachment.get('document', {})
file_name = attachment.get('name', 'attachment.file')
logger.info(f"Processing attachment: {file_name}")
# Directly access the data attribute from the document
if 'data' in doc:
file_content = doc['data']
is_base64 = doc.get('base64Encoded', False)
# Get the document data directly
file_content = doc.get('data')
if not file_content:
logger.warning(f"No data found for attachment: {file_name}")
continue
# Determine content type
content_type = "application/octet-stream"
if 'mimeType' in doc:
content_type = doc['mimeType']
elif 'contentType' in doc:
content_type = doc['contentType']
# Get content type from document metadata
mime_type = doc.get('mimeType', 'application/octet-stream')
is_base64 = doc.get('base64Encoded', False)
# Check if we need to encode the content
if not is_base64:
logger.info(f"Base64 encoding content for {file_name}")
# Handle content encoding
try:
if is_base64:
# Content is already base64 encoded
content_bytes = file_content
else:
# Content needs to be base64 encoded
if isinstance(file_content, str):
try:
# Check if already valid base64
base64.b64decode(file_content)
logger.info("Content appears to be valid base64 already")
except:
# Not valid base64, encode it
logger.info("Encoding string content to base64")
file_content = base64.b64encode(file_content.encode('utf-8')).decode('utf-8')
# For text files, encode the string to bytes first
content_bytes = base64.b64encode(file_content.encode('utf-8')).decode('utf-8')
elif isinstance(file_content, bytes):
logger.info("Encoding bytes content to base64")
file_content = base64.b64encode(file_content).decode('utf-8')
# For binary files, encode directly
content_bytes = base64.b64encode(file_content).decode('utf-8')
else:
logger.warning(f"Unexpected content type for {file_name}")
continue
# Calculate size from decoded content
decoded_size = len(base64.b64decode(content_bytes))
# Add attachment to email data
logger.info(f"Adding attachment: {file_name} ({content_type})")
logger.info(f"Adding attachment: {file_name} ({mime_type}, size: {decoded_size} bytes)")
attachment_data = {
'@odata.type': '#microsoft.graph.fileAttachment',
'name': file_name,
'contentType': content_type,
'contentBytes': file_content
'contentType': mime_type,
'contentBytes': content_bytes,
'isInline': False,
'size': decoded_size
}
email_data['attachments'].append(attachment_data)
logger.info(f"Successfully added attachment: {file_name}")
else:
logger.warning(f"Document does not contain 'data' attribute: {file_name}")
# Try to find data in the fileId
if 'fileId' in doc:
logger.info(f"Found fileId: {doc['fileId']} - could implement fileId-based attachment lookup here")
# Future enhancement: implement file lookup by fileId
# Try to create draft using drafts folder endpoint (Option 1)
except Exception as e:
logger.error(f"Error processing attachment {file_name}: {str(e)}")
continue
# Try to create draft using drafts folder endpoint
try:
logger.info("Attempting to create draft email using drafts folder endpoint")
logger.info("Attempting to create draft email using messages endpoint")
logger.info(f"Email data structure: subject={subject}, recipient={recipient}, " +
f"has_attachments={bool(email_data.get('attachments'))}, " +
f"attachment_count={len(email_data.get('attachments', []))}")
f"has_attachments={bool(email_data.get('attachments'))}, " +
f"attachment_count={len(email_data.get('attachments', []))}")
# Create the draft message
response = requests.post(
'https://graph.microsoft.com/v1.0/me/mailFolders/drafts/messages',
'https://graph.microsoft.com/v1.0/me/messages',
headers=headers,
json=email_data
)
if response.status_code >= 200 and response.status_code < 300:
logger.info("Successfully created draft email using drafts folder endpoint")
logger.info("Successfully created draft email using messages endpoint")
return response.json()
else:
logger.error(f"Drafts folder method failed: {response.status_code} - {response.text}")
# Try fallback method with messages endpoint (Option 2)
logger.info("Trying fallback with messages endpoint")
response = requests.post(
'https://graph.microsoft.com/v1.0/me/messages',
headers=headers,
json=email_data
)
if response.status_code >= 200 and response.status_code < 300:
logger.info("Successfully created draft email using messages endpoint")
return response.json()
else:
logger.error(f"Messages endpoint method also failed: {response.status_code} - {response.text}")
return None
logger.error(f"Messages endpoint method failed: {response.status_code} - {response.text}")
logger.error(f"Request headers: {headers}")
logger.error(f"Request body: {json.dumps(email_data, indent=2)}")
return None
except Exception as e:
logger.error(f"Exception creating draft email: {str(e)}", exc_info=True)

View file

@ -52,7 +52,6 @@ class AgentWebcrawler(AgentBase):
def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
self.mydom = mydom
async def processTask(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
@ -68,6 +67,7 @@ class AgentWebcrawler(AgentBase):
# Extract task information
prompt = task.get("prompt", "")
outputSpecs = task.get("outputSpecifications", [])
workflow = task.get("context", {}).get("workflow", {})
# Check AI service
if not self.mydom:
@ -77,6 +77,8 @@ class AgentWebcrawler(AgentBase):
}
# Create research plan
if workflow:
self.workflowManager.logAdd(workflow, "Creating research plan...", level="info", progress=35)
researchPlan = await self._createResearchPlan(prompt)
# Check if this is truly a web research task
@ -87,9 +89,13 @@ class AgentWebcrawler(AgentBase):
}
# Gather raw material through web research
rawResults = await self._gatherResearchMaterial(researchPlan)
if workflow:
self.workflowManager.logAdd(workflow, "Gathering research material...", level="info", progress=45)
rawResults = await self._gatherResearchMaterial(researchPlan, workflow)
# Format results into requested output documents
if workflow:
self.workflowManager.logAdd(workflow, "Creating output documents...", level="info", progress=55)
documents = await self._createOutputDocuments(
prompt,
rawResults,
@ -142,9 +148,9 @@ class AgentWebcrawler(AgentBase):
try:
# Get research plan from AI
response = await self.mydom.callAi([
{"role": "system", "content": "You are a web research planning expert. Create precise research plans in JSON format only."},
{"role": "system", "content": "You are a web research planning expert. Create precise research plans. Respond with valid JSON only."},
{"role": "user", "content": researchPrompt}
])
], produceUserAnswer=True)
# Extract JSON
jsonStart = response.find('{')
@ -188,12 +194,13 @@ class AgentWebcrawler(AgentBase):
"feedback": f"I'll conduct web research on '{prompt}' and gather relevant information."
}
async def _gatherResearchMaterial(self, researchPlan: Dict[str, Any]) -> List[Dict[str, Any]]:
async def _gatherResearchMaterial(self, researchPlan: Dict[str, Any], workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Gather research material based on the research plan.
Args:
researchPlan: Research plan dictionary
workflow: Current workflow object
Returns:
List of research results
@ -202,7 +209,10 @@ class AgentWebcrawler(AgentBase):
# Process direct URLs
directUrls = researchPlan.get("directUrls", [])[:self.maxUrl]
for url in directUrls:
for i, url in enumerate(directUrls):
progress = 45 + int((i / len(directUrls)) * 5) # Progress from 45% to 50%
if hasattr(self, 'workflowManager') and self.workflowManager:
self.workflowManager.logAdd(workflow, f"Processing direct URL {i+1}/{len(directUrls)}...", level="info", progress=progress)
logger.info(f"Processing direct URL: {url}")
try:
# Fetch and extract content
@ -226,7 +236,10 @@ class AgentWebcrawler(AgentBase):
# Process search terms
searchTerms = researchPlan.get("searchTerms", [])[:self.maxSearchTerms]
for term in searchTerms:
for i, term in enumerate(searchTerms):
progress = 50 + int((i / len(searchTerms)) * 5) # Progress from 50% to 55%
if hasattr(self, 'workflowManager') and self.workflowManager:
self.workflowManager.logAdd(workflow, f"Searching term {i+1}/{len(searchTerms)}...", level="info", progress=progress)
logger.info(f"Searching for: {term}")
try:
# Perform search
@ -255,7 +268,7 @@ class AgentWebcrawler(AgentBase):
if len(allResults) >= self.maxResults:
break
# Create summaries in parallel for all results
# Create summaries for all results
allResults = await self._summarizeAllResults(allResults, researchPlan)
return allResults
@ -302,18 +315,14 @@ class AgentWebcrawler(AgentBase):
Only include information actually found in the content. No fabrications or assumptions.
"""
if self.mydom:
summary = await self.mydom.callAi([
{"role": "system", "content": "You summarize web content accurately and concisely, focusing only on what is actually in the content."},
{"role": "user", "content": summaryPrompt}
])
# Get summary from AI
summary = await self.mydom.callAi([
{"role": "system", "content": "You are a web content summarization expert. Create concise summaries."},
{"role": "user", "content": summaryPrompt}
], produceUserAnswer=True)
# Store the summary
result["summary"] = summary
else:
# Fallback if no AI service
logger.warning(f"Not able to summarize result, using fallback plan.")
result["summary"] = f"Content from {result['url']} ({len(content)} characters)"
# Add summary to result
result["summary"] = summary.strip()
except Exception as e:
logger.warning(f"Error summarizing result {i+1}: {str(e)}")

View file

@ -17,6 +17,10 @@ pdfExtractorLoaded = False
officeExtractorLoaded = False
imageProcessorLoaded = False
class FileProcessingError(Exception):
"""Custom exception for file processing errors."""
pass
def getDocumentContents(fileMetadata: Dict[str, Any], fileContent: bytes) -> List[Dict[str, Any]]:
"""
Main function for extracting content from a file based on its MIME type.
@ -38,8 +42,50 @@ def getDocumentContents(fileMetadata: Dict[str, Any], fileContent: bytes) -> Lis
# Extract content based on MIME type
contents = []
# Try to detect actual file type from content for unknown MIME types
if mimeType == "application/octet-stream":
# Check file extension first
ext = os.path.splitext(fileName)[1].lower()
if ext:
# Map common extensions to MIME types
ext_to_mime = {
'.txt': 'text/plain',
'.md': 'text/markdown',
'.csv': 'text/csv',
'.json': 'application/json',
'.xml': 'application/xml',
'.js': 'application/javascript',
'.py': 'application/x-python',
'.svg': 'image/svg+xml',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.pdf': 'application/pdf',
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'.doc': 'application/msword',
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'.xls': 'application/vnd.ms-excel',
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'.ppt': 'application/vnd.ms-powerpoint'
}
if ext in ext_to_mime:
mimeType = ext_to_mime[ext]
logger.info(f"Detected MIME type {mimeType} from extension {ext}")
else:
logger.warning(f"Unknown file extension {ext} for file {fileName}")
# Try to detect if it's text content
try:
text_content = fileContent.decode('utf-8')
logger.info(f"Successfully decoded file {fileName} as text")
contents.extend(extractTextContent(fileName, fileContent, "text/plain"))
except UnicodeDecodeError:
logger.info(f"File {fileName} is not text, treating as binary")
contents.extend(extractBinaryContent(fileName, fileContent, mimeType))
# Text-based formats (excluding CSV which has its own handler)
if mimeType == "text/csv":
elif mimeType == "text/csv":
contents.extend(extractCsvContent(fileName, fileContent))
# Then handle other text-based formats
@ -86,6 +132,7 @@ def getDocumentContents(fileMetadata: Dict[str, Any], fileContent: bytes) -> Lis
# Binary data as fallback for unknown formats
else:
logger.warning(f"Unknown MIME type {mimeType} for file {fileName}, treating as binary")
contents.extend(extractBinaryContent(fileName, fileContent, mimeType))
# Fallback when no content could be extracted
@ -99,7 +146,7 @@ def getDocumentContents(fileMetadata: Dict[str, Any], fileContent: bytes) -> Lis
"sequenceNr": 1,
"name": '1_undefined',
"ext": os.path.splitext(fileName)[1][1:] if os.path.splitext(fileName)[1] else "bin",
"contentType": mimeType,
"mimeType": mimeType,
"data": encoded_data,
"base64Encoded": True,
"metadata": {
@ -130,13 +177,13 @@ def getDocumentContents(fileMetadata: Dict[str, Any], fileContent: bytes) -> Lis
return contents
except Exception as e:
logger.error(f"Error during content extraction: {str(e)}")
logger.error(f"Error during content extraction for file {fileMetadata.get('name', 'unknown')}: {str(e)}", exc_info=True)
# Fallback on error - return original data
return [{
"sequenceNr": 1,
"name": fileMetadata.get("name", "unknown"),
"ext": os.path.splitext(fileMetadata.get("name", ""))[1][1:] if os.path.splitext(fileMetadata.get("name", ""))[1] else "bin",
"contentType": fileMetadata.get("mimeType", "application/octet-stream"),
"mimeType": fileMetadata.get("mimeType", "application/octet-stream"),
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -206,7 +253,7 @@ def extractTextContent(fileName: str, fileContent: bytes, mimeType: str) -> List
"sequenceNr": 1,
"name": "1_text", # Simplified naming
"ext": fileExtension,
"contentType": "text/plain",
"mimeType": "text/plain",
"data": textContent,
"base64Encoded": False,
"metadata": {
@ -225,7 +272,7 @@ def extractTextContent(fileName: str, fileContent: bytes, mimeType: str) -> List
"sequenceNr": 1,
"name": "1_text", # Simplified naming
"ext": fileExtension,
"contentType": "text/plain",
"mimeType": "text/plain",
"data": textContent,
"base64Encoded": False,
"metadata": {
@ -242,7 +289,7 @@ def extractTextContent(fileName: str, fileContent: bytes, mimeType: str) -> List
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"mimeType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -256,7 +303,7 @@ def extractTextContent(fileName: str, fileContent: bytes, mimeType: str) -> List
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"mimeType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -282,7 +329,7 @@ def extractCsvContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]
"sequenceNr": 1,
"name": "1_csv", # Simplified naming
"ext": "csv",
"contentType": "text/csv",
"mimeType": "text/csv",
"data": csvContent,
"base64Encoded": False,
"metadata": {
@ -302,7 +349,7 @@ def extractCsvContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]
"sequenceNr": 1,
"name": "1_csv", # Simplified naming
"ext": "csv",
"contentType": "text/csv",
"mimeType": "text/csv",
"data": csvContent,
"base64Encoded": False,
"metadata": {
@ -319,7 +366,7 @@ def extractCsvContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": "csv",
"contentType": "text/csv",
"mimeType": "text/csv",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -332,7 +379,7 @@ def extractCsvContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": "csv",
"contentType": "text/csv",
"mimeType": "text/csv",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -364,7 +411,7 @@ def extractSvgContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]
"sequenceNr": 1,
"name": "1_svg", # Simplified naming
"ext": "svg",
"contentType": "image/svg+xml",
"mimeType": "image/svg+xml",
"data": svgText,
"base64Encoded": False,
"metadata": {
@ -380,7 +427,7 @@ def extractSvgContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]
"sequenceNr": 1,
"name": "1_text",
"ext": "svg",
"contentType": "text/plain",
"mimeType": "text/plain",
"data": svgText,
"base64Encoded": False,
"metadata": {
@ -401,7 +448,7 @@ def extractSvgContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]
"sequenceNr": 1,
"name": "1_svg", # Simplified naming
"ext": "svg",
"contentType": "image/svg+xml",
"mimeType": "image/svg+xml",
"data": svgText,
"base64Encoded": False,
"metadata": {
@ -422,7 +469,7 @@ def extractSvgContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": "svg",
"contentType": "image/svg+xml",
"mimeType": "image/svg+xml",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -438,7 +485,7 @@ def extractSvgContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": "svg",
"contentType": "image/svg+xml",
"mimeType": "image/svg+xml",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -519,7 +566,7 @@ def extractImageContent(fileName: str, fileContent: bytes, mimeType: str) -> Lis
"sequenceNr": 1,
"name": "1_image", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"mimeType": mimeType,
"data": encoded_data,
"base64Encoded": True,
"metadata": imageMetadata
@ -531,7 +578,7 @@ def extractImageContent(fileName: str, fileContent: bytes, mimeType: str) -> Lis
"sequenceNr": 2,
"name": "2_text_image_info", # Simplified naming with label
"ext": "txt",
"contentType": "text/plain",
"mimeType": "text/plain",
"data": imageDescription,
"base64Encoded": False,
"metadata": {
@ -566,7 +613,7 @@ def extractPdfContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]
"sequenceNr": 1,
"name": "1_pdf", # Simplified naming
"ext": "pdf",
"contentType": "application/pdf",
"mimeType": "application/pdf",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -604,7 +651,7 @@ def extractPdfContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]
"sequenceNr": len(contents) + 1,
"name": f"{len(contents) + 1}_text", # Simplified naming
"ext": "txt",
"contentType": "text/plain",
"mimeType": "text/plain",
"data": extractedText,
"base64Encoded": False,
"metadata": {
@ -639,7 +686,7 @@ def extractPdfContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]
"sequenceNr": len(contents) + 1,
"name": f"{len(contents) + 1}_image_page{pageNum+1}_{imgIndex+1}", # Simplified naming with label
"ext": imageExt,
"contentType": f"image/{imageExt}",
"mimeType": f"image/{imageExt}",
"data": base64.b64encode(imageBytes).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -667,7 +714,7 @@ def extractPdfContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]
"sequenceNr": 1,
"name": "1_pdf", # Simplified naming
"ext": "pdf",
"contentType": "application/pdf",
"mimeType": "application/pdf",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -706,7 +753,7 @@ def extractWordContent(fileName: str, fileContent: bytes, mimeType: str) -> List
"sequenceNr": 1,
"name": "1_word", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"mimeType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -743,7 +790,7 @@ def extractWordContent(fileName: str, fileContent: bytes, mimeType: str) -> List
"sequenceNr": 1,
"name": "1_text", # Simplified naming
"ext": "txt",
"contentType": "text/plain",
"mimeType": "text/plain",
"data": extractedText,
"base64Encoded": False,
"metadata": {
@ -765,7 +812,7 @@ def extractWordContent(fileName: str, fileContent: bytes, mimeType: str) -> List
"sequenceNr": 1,
"name": "1_word", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"mimeType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -804,7 +851,7 @@ def extractExcelContent(fileName: str, fileContent: bytes, mimeType: str) -> Lis
"sequenceNr": 1,
"name": "1_excel", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"mimeType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -845,7 +892,7 @@ def extractExcelContent(fileName: str, fileContent: bytes, mimeType: str) -> Lis
"sequenceNr": len(contents) + 1,
"name": f"{len(contents) + 1}_csv_{sheetSafeName}", # Simplified naming with sheet label
"ext": "csv",
"contentType": "text/csv",
"mimeType": "text/csv",
"data": csvContent,
"base64Encoded": False,
"metadata": {
@ -867,7 +914,7 @@ def extractExcelContent(fileName: str, fileContent: bytes, mimeType: str) -> Lis
"sequenceNr": 1,
"name": "1_excel", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"mimeType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -897,7 +944,7 @@ def extractPowerpointContent(fileName: str, fileContent: bytes, mimeType: str) -
"sequenceNr": 1,
"name": "1_powerpoint", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"mimeType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -923,7 +970,7 @@ def extractBinaryContent(fileName: str, fileContent: bytes, mimeType: str) -> Li
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"mimeType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
@ -931,3 +978,157 @@ def extractBinaryContent(fileName: str, fileContent: bytes, mimeType: str) -> Li
"format": "binary"
}
}]
def processFile(self, fileContent: bytes, fileName: str, fileMetadata: Dict[str, Any] = None) -> List[Dict[str, Any]]:
"""
Process a file and return its contents as a list of documents.
Args:
fileContent: Binary content of the file
fileName: Name of the file
fileMetadata: Optional metadata about the file
Returns:
List of document dictionaries
"""
try:
# Get file extension and MIME type
fileExtension = os.path.splitext(fileName)[1].lower()[1:]
mimeType = fileMetadata.get("mimeType", self.mydom.getMimeType(fileName)) if fileMetadata else self.mydom.getMimeType(fileName)
# Process based on file type
if mimeType.startswith("image/"):
return self._processImageFile(fileContent, fileName, fileExtension, mimeType, fileMetadata)
elif mimeType == "application/pdf":
return self._processPdfFile(fileContent, fileName, fileMetadata)
elif mimeType == "text/csv":
return self._processCsvFile(fileContent, fileName, fileMetadata)
elif mimeType == "text/plain":
return self._processTextFile(fileContent, fileName, fileMetadata)
else:
# Default binary file handling
return [{
"name": fileName,
"ext": fileExtension,
"mimeType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False
}
}]
except Exception as e:
logger.error(f"Error processing file {fileName}: {str(e)}")
raise FileProcessingError(f"Error processing file: {str(e)}")
def _processImageFile(self, fileContent: bytes, fileName: str, fileExtension: str, mimeType: str, fileMetadata: Dict[str, Any] = None) -> List[Dict[str, Any]]:
"""Process an image file."""
try:
# Create image document
imageDoc = {
"name": fileName,
"ext": fileExtension,
"mimeType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"isImage": True,
"format": fileExtension
}
}
# Add image description if available
if fileMetadata and "description" in fileMetadata:
imageDoc["metadata"]["description"] = fileMetadata["description"]
return [imageDoc]
except Exception as e:
logger.error(f"Error processing image file {fileName}: {str(e)}")
raise FileProcessingError(f"Error processing image file: {str(e)}")
def _processPdfFile(self, fileContent: bytes, fileName: str, fileMetadata: Dict[str, Any] = None) -> List[Dict[str, Any]]:
"""Process a PDF file."""
try:
# Create PDF document
pdfDoc = {
"name": fileName,
"ext": "pdf",
"mimeType": "application/pdf",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"isPdf": True
}
}
return [pdfDoc]
except Exception as e:
logger.error(f"Error processing PDF file {fileName}: {str(e)}")
raise FileProcessingError(f"Error processing PDF file: {str(e)}")
def _processCsvFile(self, fileContent: bytes, fileName: str, fileMetadata: Dict[str, Any] = None) -> List[Dict[str, Any]]:
"""Process a CSV file."""
try:
# Try to decode as text first
try:
csvContent = fileContent.decode('utf-8')
base64Encoded = False
except UnicodeDecodeError:
# If not valid UTF-8, encode as base64
csvContent = base64.b64encode(fileContent).decode('utf-8')
base64Encoded = True
# Create CSV document
csvDoc = {
"name": fileName,
"ext": "csv",
"mimeType": "text/csv",
"data": csvContent,
"base64Encoded": base64Encoded,
"metadata": {
"isText": True,
"isCsv": True,
"format": "csv"
}
}
return [csvDoc]
except Exception as e:
logger.error(f"Error processing CSV file {fileName}: {str(e)}")
raise FileProcessingError(f"Error processing CSV file: {str(e)}")
def _processTextFile(self, fileContent: bytes, fileName: str, fileMetadata: Dict[str, Any] = None) -> List[Dict[str, Any]]:
"""Process a text file."""
try:
# Try to decode as text
try:
textContent = fileContent.decode('utf-8')
base64Encoded = False
except UnicodeDecodeError:
# If not valid UTF-8, encode as base64
textContent = base64.b64encode(fileContent).decode('utf-8')
base64Encoded = True
# Create text document
textDoc = {
"name": fileName,
"ext": "txt",
"mimeType": "text/plain",
"data": textContent,
"base64Encoded": base64Encoded,
"metadata": {
"isText": True
}
}
return [textDoc]
except Exception as e:
logger.error(f"Error processing text file {fileName}: {str(e)}")
raise FileProcessingError(f"Error processing text file: {str(e)}")

View file

@ -123,28 +123,51 @@ class GatewayInterface:
def _uam(self, table: str, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Unified user access management function that filters data based on user privileges.
Unified user access management function that filters data based on user privileges
and adds access control attributes.
Args:
table: Name of the table
recordset: Recordset to filter based on access rules
Returns:
Filtered recordset based on user privilege level
Filtered recordset with access control attributes
"""
userPrivilege = self.currentUser.get("privilege", "user")
filtered_records = []
# Apply filtering based on privilege
if userPrivilege == "sysadmin":
return recordset # System admins see all records
filtered_records = recordset # System admins see all records
elif userPrivilege == "admin":
# Admins see records in their mandate
return [r for r in recordset if r.get("mandateId") == self.mandateId]
filtered_records = [r for r in recordset if r.get("mandateId") == self.mandateId]
else: # Regular users
# Users only see records they own within their mandate
return [r for r in recordset
filtered_records = [r for r in recordset
if r.get("mandateId") == self.mandateId and r.get("userId") == self.userId]
# Add access control attributes to each record
for record in filtered_records:
record_id = record.get("id")
# Set access control flags based on user permissions
if table == "mandates":
record["_hideView"] = False # Everyone can view
record["_hideEdit"] = not self._canModify("mandates", record_id)
record["_hideDelete"] = not self._canModify("mandates", record_id)
elif table == "users":
record["_hideView"] = False # Everyone can view
record["_hideEdit"] = not self._canModify("users", record_id)
record["_hideDelete"] = not self._canModify("users", record_id)
else:
# Default access control for other tables
record["_hideView"] = False
record["_hideEdit"] = not self._canModify(table, record_id)
record["_hideDelete"] = not self._canModify(table, record_id)
return filtered_records
def _canModify(self, table: str, recordId: Optional[int] = None) -> bool:
"""
Checks if the current user can modify (create/update/delete) records in a table.
@ -393,7 +416,11 @@ class GatewayInterface:
def authenticateUser(self, username: str, password: str) -> Optional[Dict[str, Any]]:
"""Authenticates a user by username and password."""
# Instead of using UAM filtering, directly get user from database
# Clear the users table from cache and reload it
if "users" in self.db._tablesCache:
del self.db._tablesCache["users"]
# Get fresh user data
users = self.db.getRecordset("users")
user = next((u for u in users if u.get("username") == username), None)

View file

@ -11,6 +11,7 @@ from typing import Dict, Any, List, Optional, Union
import importlib
import hashlib
import json
from modules.mimeUtils import isTextMimeType, determineContentEncoding
@ -161,35 +162,72 @@ class LucyDOMInterface:
def _uam(self, table: str, recordset: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Unified user access management function that filters data based on user privileges.
Unified user access management function that filters data based on user privileges
and adds access control attributes.
Args:
table: Name of the table
recordset: Recordset to filter based on access rules
Returns:
Filtered recordset based on user privilege level
Filtered recordset with access control attributes
"""
userPrivilege = self.currentUser.get("privilege", "user")
filtered_records = []
# Apply filtering based on privilege
if userPrivilege == "sysadmin":
return recordset # System admins see all records
filtered_records = recordset # System admins see all records
elif userPrivilege == "admin":
# Admins see records in their mandate
return [r for r in recordset if r.get("mandateId") == self.mandateId]
filtered_records = [r for r in recordset if r.get("mandateId") == self.mandateId]
else: # Regular users
# To see all prompts from mandate 0 and own
if table == "prompts":
return [r for r in recordset if
filtered_records = [r for r in recordset if
(r.get("mandateId") == self.mandateId and r.get("userId") == self.userId)
or
(r.get("mandateId") == 0)
]
# Users see only their records
return [r for r in recordset
else:
# Users see only their records
filtered_records = [r for r in recordset
if r.get("mandateId") == self.mandateId and r.get("userId") == self.userId]
# Add access control attributes to each record
for record in filtered_records:
record_id = record.get("id")
# Set access control flags based on user permissions
if table == "prompts":
record["_hideView"] = False # Everyone can view
record["_hideEdit"] = not self._canModify("prompts", record_id)
record["_hideDelete"] = not self._canModify("prompts", record_id)
elif table == "files":
record["_hideView"] = False # Everyone can view
record["_hideEdit"] = not self._canModify("files", record_id)
record["_hideDelete"] = not self._canModify("files", record_id)
record["_hideDownload"] = not self._canModify("files", record_id)
elif table == "workflows":
record["_hideView"] = False # Everyone can view
record["_hideEdit"] = not self._canModify("workflows", record_id)
record["_hideDelete"] = not self._canModify("workflows", record_id)
elif table == "workflowMessages":
record["_hideView"] = False # Everyone can view
record["_hideEdit"] = not self._canModify("workflows", record.get("workflowId"))
record["_hideDelete"] = not self._canModify("workflows", record.get("workflowId"))
elif table == "workflowLogs":
record["_hideView"] = False # Everyone can view
record["_hideEdit"] = not self._canModify("workflows", record.get("workflowId"))
record["_hideDelete"] = not self._canModify("workflows", record.get("workflowId"))
else:
# Default access control for other tables
record["_hideView"] = False
record["_hideEdit"] = not self._canModify(table, record_id)
record["_hideDelete"] = not self._canModify(table, record_id)
return filtered_records
def _canModify(self, table: str, recordId: Optional[int] = None) -> bool:
"""
Checks if the current user can modify (create/update/delete) records in a table.
@ -357,11 +395,14 @@ class LucyDOMInterface:
return hashlib.sha256(fileContent).hexdigest()
def checkForDuplicateFile(self, fileHash: str) -> Optional[Dict[str, Any]]:
"""Checks if a file with the same hash already exists."""
files = self.db.getRecordset("files", recordFilter={"fileHash": fileHash})
filteredFiles = self._uam("files", files)
if filteredFiles:
return filteredFiles[0]
"""Checks if a file with the same hash already exists for the current user and mandate."""
files = self.db.getRecordset("files", recordFilter={
"fileHash": fileHash,
"mandateId": self.mandateId,
"userId": self.userId
})
if files:
return files[0]
return None
def getMimeType(self, filename: str) -> str:
@ -669,7 +710,7 @@ class LucyDOMInterface:
fileHash = self.calculateFileHash(fileContent)
logger.debug(f"Calculated file hash: {fileHash}")
# Check for duplicate
# Check for duplicate within same user/mandate
existingFile = self.checkForDuplicateFile(fileHash)
if existingFile:
logger.info(f"Duplicate found for {fileName}: {existingFile['id']}")
@ -692,9 +733,6 @@ class LucyDOMInterface:
logger.info(f"Saving file content to database for file: {fileName}")
self.createFileData(dbFile["id"], fileContent)
# Debug: Export file to static folder
self._exportFileToStatic(fileContent, dbFile["id"], fileName)
logger.info(f"File upload process completed for: {fileName}")
return dbFile
@ -730,12 +768,6 @@ class LucyDOMInterface:
logger.error(f"Error downloading file {fileId}: {str(e)}")
raise FileError(f"Error downloading file: {str(e)}")
def _exportFileToStatic(self, fileContent: bytes, fileId: int, fileName: str):
"""Debug helper to export files to static folder."""
debugFilename = f"{fileId}_{fileName}"
with open(f"./static/{debugFilename}", 'wb') as f:
f.write(fileContent)
# Workflow methods
def getAllWorkflows(self) -> List[Dict[str, Any]]:
@ -1287,6 +1319,64 @@ class LucyDOMInterface:
logger.error(f"Error loading workflow state: {str(e)}")
return None
# Microsoft Login
def getMsftToken(self) -> Optional[Dict[str, Any]]:
"""Get Microsoft token data for the current user from database"""
try:
# Get token from database using current user's mandateId and userId
tokens = self.db.getRecordset("msftTokens", recordFilter={
"mandateId": self.mandateId,
"userId": self.userId
})
if tokens and len(tokens) > 0:
token_data = json.loads(tokens[0]["token_data"])
logger.info(f"Retrieved Microsoft token for user {self.userId}")
return token_data
else:
logger.info(f"No Microsoft token found for user {self.userId}")
return None
except Exception as e:
logger.error(f"Error retrieving Microsoft token: {str(e)}")
return None
def saveMsftToken(self, token_data: Dict[str, Any]) -> bool:
"""Save Microsoft token data for the current user to database"""
try:
# Check if token already exists
tokens = self.db.getRecordset("msftTokens", recordFilter={
"mandateId": self.mandateId,
"userId": self.userId
})
if tokens and len(tokens) > 0:
# Update existing token
token_id = tokens[0]["id"]
updated_data = {
"token_data": json.dumps(token_data),
"updated_at": datetime.now().isoformat()
}
self.db.recordModify("msftTokens", token_id, updated_data)
logger.info(f"Updated Microsoft token for user {self.userId}")
else:
# Create new token
new_token = {
"mandateId": self.mandateId,
"userId": self.userId,
"token_data": json.dumps(token_data),
"created_at": datetime.now().isoformat(),
"updated_at": datetime.now().isoformat()
}
self.db.recordCreate("msftTokens", new_token)
logger.info(f"Saved new Microsoft token for user {self.userId}")
return True
except Exception as e:
logger.error(f"Error saving Microsoft token: {str(e)}")
return False
# Singleton factory for LucyDOMInterface instances per context
_lucydomInterfaces = {}

View file

@ -78,6 +78,31 @@ class FileData(BaseModel):
base64Encoded: bool = Field(description="Flag indicating whether the data is base64 encoded")
class MsftToken(BaseModel):
"""Data model for Microsoft authentication tokens"""
id: int = Field(description="Unique ID of the token")
mandateId: int = Field(description="ID of the associated mandate")
userId: int = Field(description="ID of the user")
token_data: str = Field(description="JSON string containing the token data")
created_at: str = Field(description="Timestamp when the token was created")
updated_at: str = Field(description="Timestamp when the token was last updated")
label: Label = Field(
default=Label(default="Microsoft Token", translations={"en": "Microsoft Token", "fr": "Jeton Microsoft"}),
description="Label for the class"
)
# Labels for attributes
fieldLabels: Dict[str, Label] = {
"id": Label(default="ID", translations={}),
"mandateId": Label(default="Mandate ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
"userId": Label(default="User ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}),
"token_data": Label(default="Token Data", translations={"en": "Token Data", "fr": "Données du jeton"}),
"created_at": Label(default="Created At", translations={"en": "Created At", "fr": "Créé le"}),
"updated_at": Label(default="Updated At", translations={"en": "Updated At", "fr": "Mis à jour le"})
}
# Workflow model classes
class DocumentContent(BaseModel):
@ -85,7 +110,7 @@ class DocumentContent(BaseModel):
sequenceNr: int = Field(1, description="Sequence number of the content in the source document")
name: str = Field(description="Designation")
ext: str = Field(description="Content extension for export: txt, csv, json, jpg, png")
contentType: str = Field(description="MIME type")
mimeType: str = Field(description="MIME type")
summary: str = Field(description="Summary of the file content")
data: str = Field(description="Actual content, text or base64 encoded based on base64Encoded flag")
base64Encoded: bool = Field(description="Flag indicating whether the data is base64 encoded")
@ -97,6 +122,7 @@ class Document(BaseModel):
name: str = Field(description="Name of the data object")
ext: str = Field(description="Extension of the data object")
fileId: int = Field(description="ID of the referenced file in the database")
mimeType: str = Field(description="MIME type")
data: str = Field(description="Content of the data as text or base64 encoded based on base64Encoded flag")
base64Encoded: bool = Field(description="Flag indicating whether the data is base64 encoded")
contents: List[DocumentContent] = Field(description="Document contents")

View file

@ -32,6 +32,7 @@ class AgentBase:
self.description = "Basic agent functionality"
self.capabilities = []
self.mydom = None
self.workflowManager = None # Will be set by workflow manager
def setDependencies(self, mydom=None):
"""Set external dependencies for the agent."""
@ -58,11 +59,16 @@ class AgentBase:
Args:
task: A dictionary containing:
- taskId: Unique ID for this task
- workflowId: ID of the parent workflow (optional)
- workflowId: ID of the parent workflow
- prompt: The main instruction for the agent
- inputDocuments: List of document objects to process
- outputSpecifications: List of required output documents
- context: Additional contextual information
- context: Additional contextual information including:
- workflow: The complete workflow object
- workflowRound: Current workflow round
- agentType: Type of agent
- timestamp: Task timestamp
- language: User language
Returns:
A dictionary containing:
@ -85,51 +91,45 @@ class AgentBase:
"""Wrapper for the utility function"""
return isTextMimeType(mimeType)
def formatAgentDocumentOutput(self, label: str, content: Any, contentType: str = None) -> Dict[str, Any]:
def formatAgentDocumentOutput(self, label: str, content: Any, mimeType: str = None) -> Dict[str, Any]:
"""
Helper method to properly format a document output with base64Encoded flag and metadata.
Format agent output as a document.
Args:
label: Name of the document
label: Label for the document
content: Content of the document
contentType: Optional content type for the document
Returns:
Properly formatted document dictionary
mimeType: Optional MIME type for the document
"""
import base64
# Determine if content should be base64 encoded
should_base64_encode = self.determineBase64EncodingFlag(label, content)
# Process content based on type and encoding flag
formatted_content = content
if should_base64_encode:
if isinstance(content, bytes):
# Convert binary to base64
formatted_content = base64.b64encode(content).decode('utf-8')
elif isinstance(content, str):
try:
# Check if it's already base64 encoded
base64.b64decode(content)
# If we get here, it appears to be valid base64
formatted_content = content
except:
# Not valid base64, so encode it
formatted_content = base64.b64encode(content.encode('utf-8')).decode('utf-8')
# Create document with metadata
# Create document structure
doc = {
"label": label,
"content": formatted_content,
"base64Encoded": should_base64_encode,
"metadata": {}
"id": str(uuid.uuid4()),
"name": label,
"ext": "txt", # Default extension
"data": content,
"base64Encoded": False,
"metadata": {
"isText": True
}
}
# Add content type if provided
if contentType:
doc["metadata"]["contentType"] = contentType
# Set MIME type if provided
if mimeType:
doc["mimeType"] = mimeType
# Update extension based on MIME type
if mimeType == "text/markdown":
doc["ext"] = "md"
elif mimeType == "text/html":
doc["ext"] = "html"
elif mimeType == "text/csv":
doc["ext"] = "csv"
elif mimeType == "application/json":
doc["ext"] = "json"
elif mimeType.startswith("image/"):
doc["ext"] = mimeType.split("/")[1]
doc["metadata"]["isText"] = False
elif mimeType == "application/pdf":
doc["ext"] = "pdf"
doc["metadata"]["isText"] = False
return doc
@ -214,6 +214,11 @@ class AgentRegistry:
self.mydom = mydom
self.updateAgentDependencies()
def setWorkflowManager(self, workflowManager):
"""Set the workflow manager reference for all agents."""
for agent in self.agents.values():
agent.workflowManager = workflowManager
def updateAgentDependencies(self):
"""Update dependencies for all registered agents."""
for agentId, agent in self.agents.items():
@ -245,8 +250,8 @@ class AgentRegistry:
if agentIdentifier in self.agents:
agent = self.agents[agentIdentifier]
# Ensure the agent has the AI service
if hasattr(agent, 'setDependencies') and self.mydom:
agent.setDependencies(mydom=self.mydom)
if self.mydom:
agent.mydom = self.mydom
return agent
logger.error(f"Agent with identifier '{agentIdentifier}' not found")
return None

View file

@ -10,8 +10,9 @@ import json
import re
import uuid
import base64
from datetime import datetime
from datetime import datetime, timedelta
from typing import Dict, Any, List, Optional, Union, Tuple
import time
from modules.mimeUtils import isTextMimeType, determineContentEncoding
@ -58,6 +59,7 @@ class WorkflowManager:
self.mydom = domInterface(mandateId, userId)
self.agentRegistry = getAgentRegistry()
self.agentRegistry.setMydom(self.mydom)
self.agentRegistry.setWorkflowManager(self) # Set self as workflow manager for all agents
### Workflow State Machine Implementation
@ -132,6 +134,7 @@ class WorkflowManager:
Returns:
Updated workflow with processing results
"""
startTime = time.time()
try:
# State 3: User Message Processing
self.checkExitCriteria(workflow)
@ -161,8 +164,42 @@ class WorkflowManager:
}
self.messageAdd(workflow, responseMessage)
self.logAdd(workflow, f"Planned outputs: {len(objFinalDocuments)} documents", level="info", progress=20)
self.logAdd(workflow, f"Work plan created with {len(objWorkplan)} steps", level="info", progress=25)
# Add detailed log entry about the task plan
taskPlanLog = "Input: "
if objFinalDocuments:
taskPlanLog += ", ".join(objFinalDocuments) + "<br>"
else:
taskPlanLog += "No input files<br>"
# Work Plan Steps
for i, task in enumerate(objWorkplan, 1):
agentName = task.get("agent", "unknown")
taskPlanLog += f"{i}. Agent {agentName}<br>"
# Input Documents
inputDocs = task.get("inputDocuments", [])
if inputDocs:
inputLabels = [doc.get("label", "unknown") for doc in inputDocs]
taskPlanLog += f"- Input: {', '.join(inputLabels)}<br>"
# Task Prompt
prompt = task.get('prompt', 'No prompt')
taskPlanLog += f"- Task: {prompt}<br>"
# Output Documents
outputDocs = task.get("outputDocuments", [])
if outputDocs:
outputLabels = [doc.get("label", "unknown") for doc in outputDocs]
taskPlanLog += f"- Output: {', '.join(outputLabels)}<br>"
# Final Results
taskPlanLog += "Result: "
if objFinalDocuments:
taskPlanLog += ", ".join(objFinalDocuments)
else:
taskPlanLog += "No result files"
self.logAdd(workflow, taskPlanLog, level="info", progress=25)
# State 5: Agent Execution
objResults = []
@ -199,6 +236,10 @@ class WorkflowManager:
self.checkExitCriteria(workflow)
self.workflowFinish(workflow)
# Update processing time
endTime = time.time()
workflow["dataStats"]["processingTime"] = endTime - startTime
return workflow
except Exception as e:
@ -207,10 +248,15 @@ class WorkflowManager:
workflow["status"] = "failed"
workflow["lastActivity"] = datetime.now().isoformat()
# Update processing time even on error
endTime = time.time()
workflow["dataStats"]["processingTime"] = endTime - startTime
# Update in database
self.mydom.updateWorkflow(workflow["id"], {
"status": "failed",
"lastActivity": workflow["lastActivity"]
"lastActivity": workflow["lastActivity"],
"dataStats": workflow["dataStats"]
})
self.logAdd(workflow, f"Workflow failed: {str(e)}", level="error", progress=100)
@ -241,7 +287,12 @@ class WorkflowManager:
"messages": [], # Empty list - will be filled with references
"messageIds": [], # Initialize empty messageIds list
"logs": [],
"dataStats": {},
"dataStats": {
"bytesSent": 0,
"bytesReceived": 0,
"tokensUsed": 0,
"processingTime": 0.0
},
"currentRound": 1,
"status": "running",
"lastActivity": currentTime,
@ -287,11 +338,24 @@ class WorkflowManager:
else:
workflow["currentRound"] = 1
# Ensure dataStats exists with correct field names
if "dataStats" not in workflow:
workflow["dataStats"] = {
"bytesSent": 0,
"bytesReceived": 0,
"tokensUsed": 0,
"processingTime": 0.0
}
elif "tokenCount" in workflow["dataStats"]:
# Convert old tokenCount to tokensUsed if needed
workflow["dataStats"]["tokensUsed"] = workflow["dataStats"].pop("tokenCount", 0)
# Update in database - only the relevant workflow fields
workflowUpdate = {
"status": workflow["status"],
"lastActivity": workflow["lastActivity"],
"currentRound": workflow["currentRound"]
"currentRound": workflow["currentRound"],
"dataStats": workflow["dataStats"] # Include updated dataStats
}
self.mydom.updateWorkflow(workflowId, workflowUpdate)
@ -382,6 +446,7 @@ Please analyze the request and create:
3. Do not define document inputs that don't exist or haven't been generated beforehand.
4. Create a logical sequence - earlier agents can create documents that are later used as inputs.
5. If the user has provided documents but hasn't clearly stated what they want, try to act according to the context.
6. ALL documents provided by the user (where fileSource is "user") MUST be included in the work plan, even if they don't have content summaries or if content extraction failed.
Your answer must be strictly in the JSON_OUTPUT format, with no additions before or after the JSON object.
@ -415,6 +480,7 @@ JSON_OUTPUT = {{
## RULES for inputDocuments:
1. The user request refers to documents where "fileSource" in available documents is "user". Those documents are in the focus for input
2. In case of redundant label in available documents, use document with highest sequenceNr if not specified differently
3. ALL documents provided by the user MUST be included in the work plan, even if they don't have content summaries or if content extraction failed
## STRICT RULES FOR document "label":
1. Every document label MUST include a proper file extension that matches the content type.
@ -472,6 +538,9 @@ JSON_OUTPUT = {{
return []
agentLabel = agent.label
# Set workflow manager reference on the agent
agent.workflowManager = self
# Log the current step
outputLabels = []
for doc in task.get("outputDocuments", []):
@ -505,6 +574,7 @@ JSON_OUTPUT = {{
"inputDocuments": inputDocuments,
"outputSpecifications": outputSpecs,
"context": {
"workflow": workflow, # Add the complete workflow object
"workflowRound": workflow.get("currentRound", 1),
"agentType": agentName,
"timestamp": datetime.now().isoformat(),
@ -518,7 +588,47 @@ JSON_OUTPUT = {{
logger.debug("TASK: "+self.parseJson2text(agentTask))
logger.debug(f"Agent '{agentName}' AI service available: {agent.mydom is not None}")
# Calculate bytes sent before processing
bytesSent = len(json.dumps(agentTask).encode('utf-8'))
for doc in inputDocuments:
if doc.get('data'):
bytesSent += len(doc['data'].encode('utf-8'))
for content in doc.get('contents', []):
if content.get('data'):
bytesSent += len(content['data'].encode('utf-8'))
# Process the task
startTime = time.time()
agentResults = await agent.processTask(agentTask)
endTime = time.time()
# Calculate bytes received
bytesReceived = len(json.dumps(agentResults).encode('utf-8'))
for doc in agentResults.get('documents', []):
if doc.get('content'):
bytesReceived += len(doc['content'].encode('utf-8'))
# Calculate tokens used (now using bytes)
tokensUsed = bytesSent + bytesReceived
# Update workflow statistics
if 'dataStats' not in workflow:
workflow['dataStats'] = {
'bytesSent': 0,
'bytesReceived': 0,
'tokensUsed': 0,
'processingTime': 0
}
workflow['dataStats']['bytesSent'] += bytesSent
workflow['dataStats']['bytesReceived'] += bytesReceived
workflow['dataStats']['tokensUsed'] += tokensUsed
workflow['dataStats']['processingTime'] += (endTime - startTime)
# Update in database
self.mydom.updateWorkflow(workflow["id"], {
"dataStats": workflow['dataStats']
})
logger.debug(f"Agent '{agentName}' completed task. RESULT: {self.parseJson2text(agentResults)}")
@ -710,6 +820,38 @@ filesDelivered = {self.parseJson2text(matchingDocuments)}
messageObject = self.messageAdd(workflow, messageObject)
logger.debug(f"message_user = {self.parseJson2text(messageObject)}.")
# Update statistics for user input
if role == "user":
# Calculate bytes sent
bytesSent = len(messageContent.encode('utf-8'))
for doc in additionalFiles:
if doc.get('data'):
bytesSent += len(doc['data'].encode('utf-8'))
for content in doc.get('contents', []):
if content.get('data'):
bytesSent += len(content['data'].encode('utf-8'))
# Calculate tokens used (now using bytes)
tokensUsed = bytesSent
# Update workflow statistics
if 'dataStats' not in workflow:
workflow['dataStats'] = {
'bytesSent': 0,
'bytesReceived': 0,
'tokensUsed': 0,
'processingTime': 0
}
workflow['dataStats']['bytesSent'] += bytesSent
workflow['dataStats']['tokensUsed'] += tokensUsed
# Update in database
self.mydom.updateWorkflow(workflow["id"], {
"dataStats": workflow['dataStats']
})
return messageObject
async def processFileIds(self, fileIds: List[int]) -> List[Dict[str, Any]]:
@ -789,8 +931,13 @@ filesDelivered = {self.parseJson2text(matchingDocuments)}
"fileId": fileId,
"name": os.path.splitext(fileNameExt)[0] if os.path.splitext(fileNameExt)[0] else "noname",
"ext": os.path.splitext(fileNameExt)[1][1:] if os.path.splitext(fileNameExt)[1] else "bin",
"mimeType": mimeType,
"data": encodedData,
"base64Encoded": base64Encoded,
"metadata": {
"isText": isTextFormat,
"base64Encoded": base64Encoded # For backward compatibility
},
"contents": []
}
@ -799,7 +946,7 @@ filesDelivered = {self.parseJson2text(matchingDocuments)}
# Add summaries to each content item
for content in contents:
content["summary"] = await self.messageSummarizeContent(content)
content["summary"] = await self.getContentExtraction(content)
# Ensure base64Encoded flag is set
if "base64Encoded" not in content:
@ -861,97 +1008,93 @@ filesDelivered = {self.parseJson2text(matchingDocuments)}
return preparedInputs
async def messageSummarizeContent(self, content: Dict[str, Any]) -> str:
return await self.getContentExtraction(
content,
"Create a very concise summary (1-2 sentences, maximum 200 characters) about this content."
)
async def processDocumentForAgent(self, document: Dict[str, Any], docSpec: Dict[str, Any]) -> Dict[str, Any]:
"""
Processes a document for an agent based on the document specification.
Uses AI to extract relevant content from the document based on the specification.
"""
Processes a document for an agent based on the document specification.
Uses AI to extract relevant content from the document based on the specification.
Args:
document: The document to process
docSpec: The document specification from the project manager
Args:
document: The document to process
docSpec: The document specification from the project manager
Returns:
Processed document with AI-extracted content
"""
processedDoc = document.copy()
partSpec = docSpec.get("contentPart", "")
Returns:
Processed document with AI-extracted content
"""
processedDoc = document.copy()
partSpec = docSpec.get("contentPart", "")
# Process each content item in the document
if "contents" in processedDoc:
processedContents = []
# Process each content item in the document
if "contents" in processedDoc:
processedContents = []
for content in processedDoc["contents"]:
# Check if part required
if partSpec != "" and partSpec != content.get("name"):
continue
for content in processedDoc["contents"]:
# Check if part required
if partSpec != "" and partSpec != content.get("name"):
continue
# Get the prompt from the document specification
summary = docSpec.get("prompt", "Extract the relevant information from this document")
# Get the prompt from the document specification
summary = docSpec.get("prompt", "Extract the relevant information from this document")
# Process content using the shared helper function
processedContent = content.copy()
processedContent["dataExtracted"] = await self.getContentExtraction(content, summary)
processedContent["metadata"]["aiProcessed"] = True
# Process content using the shared helper function
processedContent = content.copy()
processedContent["dataExtracted"] = await self.getContentExtraction(content, summary)
processedContent["metadata"]["aiProcessed"] = True
processedContents.append(processedContent)
processedContents.append(processedContent)
processedDoc["contents"] = processedContents
processedDoc["contents"] = processedContents
return processedDoc
return processedDoc
async def getContentExtraction(self, content: Dict[str, Any], prompt: str = None) -> str:
"""
Helper function that extracts or summarizes content based on its type (text/image/binary).
Helper function that extracts or summarizes content based on its encoding.
For base64 encoded content, uses callAi4Image. For non-base64 content, uses callAi.
Args:
content: Content item to analyze
prompt: Optional custom prompt for extraction (default prompts used if not provided)
prompt: Custom prompt for extraction (default prompts used if not provided)
Returns:
Extracted or summarized content as text
"""
# Extract relevant information
data = content.get("data", "")
contentType = content.get("contentType", "text/plain")
base64Encoded = content.get("base64Encoded", False)
# Default prompts if none provided
if prompt is None:
text_prompt = "Create a very concise summary (1-2 sentences, maximum 200 characters) about this content."
image_prompt = "Create a very concise summary (1-2 sentences, maximum 200 characters) about this image."
else:
text_prompt = prompt
image_prompt = prompt
try:
# For image content, use the specialized image analysis
if base64Encoded:
return await self.mydom.callAi4Image(data, contentType, image_prompt)
# Get content data and encoding status
data = content.get("data", "")
isBase64 = content.get("base64Encoded", False)
# For text data, use the regular AI processing
# Default prompts if none provided
if prompt is None:
textPrompt = "Create a very concise summary (1-2 sentences, maximum 200 characters) about this content."
imagePrompt = "Create a very concise summary (1-2 sentences, maximum 200 characters) about this image."
else:
textPrompt = prompt
imagePrompt = prompt
# Handle base64 encoded content
if isBase64:
try:
# Pass base64 encoded data directly to callAi4Image
return await self.mydom.callAi4Image(data, content.get("mimeType", "application/octet-stream"), imagePrompt)
except Exception as e:
logger.error(f"Error processing base64 content: {str(e)}")
return f"Error processing content: {str(e)}"
else:
# For non-base64 content, use callAi
return await self.mydom.callAi([
{"role": "system", "content": "You are a content analyzer. Process the provided content as instructed."},
{"role": "user", "content": f"{text_prompt}\n\n{data}"}
])
{"role": "system", "content": "You are a content analyzer. Extract relevant information from the provided content."},
{"role": "user", "content": f"{textPrompt}\n\nContent:\n{data}"}
], produceUserAnswer=True)
except Exception as e:
logger.error(f"Error processing content: {str(e)}")
return f"Content of type {contentType} (processing failed)"
return f"Error processing content: {str(e)}"
def messageAdd(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> Dict[str, Any]:
"""
Adds a message to the workflow and updates lastActivity.
Saves the message in the database and updates the workflow with references.
Also updates statistics for the message.
Args:
workflow: Workflow object
@ -990,6 +1133,35 @@ filesDelivered = {self.parseJson2text(matchingDocuments)}
if "status" not in message:
message["status"] = "step"
# Calculate statistics for the message
bytesSent = len(message.get("content", "").encode('utf-8'))
for doc in message.get("documents", []):
if doc.get("data"):
bytesSent += len(doc["data"].encode('utf-8'))
for content in doc.get("contents", []):
if content.get("data"):
bytesSent += len(content["data"].encode('utf-8'))
# Calculate tokens used (now using bytes)
tokensUsed = bytesSent
# Update workflow statistics
if "dataStats" not in workflow:
workflow["dataStats"] = {
"bytesSent": 0,
"bytesReceived": 0,
"tokensUsed": 0,
"processingTime": 0
}
# Update statistics based on message role
if message["role"] == "user":
workflow["dataStats"]["bytesSent"] += bytesSent
workflow["dataStats"]["tokensUsed"] += tokensUsed
else: # assistant messages
workflow["dataStats"]["bytesReceived"] += bytesSent
workflow["dataStats"]["tokensUsed"] += tokensUsed
# Add message to workflow
workflow["messages"].append(message)
@ -1006,15 +1178,39 @@ filesDelivered = {self.parseJson2text(matchingDocuments)}
# Save to database - first the message itself
self.mydom.createWorkflowMessage(message)
# Then save the workflow with updated references
# Then save the workflow with updated references and statistics
workflowUpdate = {
"lastActivity": currentTime,
"messageIds": workflow["messageIds"] # Update the messageIds field
"messageIds": workflow["messageIds"],
"dataStats": workflow["dataStats"] # Include updated statistics
}
self.mydom.updateWorkflow(workflow["id"], workflowUpdate)
return message
def _trimDataInJson(self, jsonObj: Any) -> Any:
"""
Trims the data attribute in JSON objects while preserving other content.
Args:
jsonObj: JSON object to process
Returns:
Processed JSON object with trimmed data attribute
"""
if isinstance(jsonObj, dict):
# Create a copy to avoid modifying the original
result = jsonObj.copy()
if 'data' in result:
# Trim data attribute if it's a string
if isinstance(result['data'], str):
result['data'] = result['data'][:100] + '...'
# If it's a dict or list, convert to string and trim
else:
result['data'] = str(result['data'])[:100] + '...'
return result
return jsonObj
def logAdd(self, workflow: Dict[str, Any], message: str, level: str = "info",
progress: Optional[int] = None) -> str:
"""
@ -1043,11 +1239,24 @@ filesDelivered = {self.parseJson2text(matchingDocuments)}
# Set agentName from global settings
agentName = GLOBAL_WORKFLOW_LABELS.get("systemName", "unknown")
# Process message if it contains JSON
processedMessage = message
try:
if isinstance(message, str) and ("{" in message or "[" in message):
# Try to parse as JSON
jsonObj = json.loads(message)
# Trim data attribute if present
processedJson = self._trimDataInJson(jsonObj)
processedMessage = json.dumps(processedJson)
except json.JSONDecodeError:
# If parsing fails, use original message
pass
# Create log entry
logEntry = {
"id": logId,
"workflowId": workflow["id"],
"message": message,
"message": processedMessage,
"type": level,
"timestamp": datetime.now().isoformat(),
"agentName": agentName,
@ -1066,11 +1275,11 @@ filesDelivered = {self.parseJson2text(matchingDocuments)}
# Also log in logger
if level == "info":
logger.info(f"Workflow {workflow['id']}: {message}")
logger.info(f"Workflow {workflow['id']}: {processedMessage}")
elif level == "warning":
logger.warning(f"Workflow {workflow['id']}: {message}")
logger.warning(f"Workflow {workflow['id']}: {processedMessage}")
elif level == "error":
logger.error(f"Workflow {workflow['id']}: {message}")
logger.error(f"Workflow {workflow['id']}: {processedMessage}")
return logId
@ -1086,56 +1295,69 @@ filesDelivered = {self.parseJson2text(matchingDocuments)}
List of file IDs for the saved documents
"""
fileIds = []
used_names = set() # Track used names to prevent duplicates
# Extract documents from agent results
documents = agentResults.get("documents", [])
for doc in documents:
try:
# Extract label (filename) and content
label = doc.get("label", "unnamed_file.txt")
content = doc.get("content", "")
# Extract document data according to LucyDOM model
name = doc.get("name", "")
ext = doc.get("ext", "")
data = doc.get("data", "")
base64Encoded = doc.get("base64Encoded", False)
# Split label into name and extension
name, ext = os.path.splitext(label)
if ext.startswith('.'):
ext = ext[1:] # Remove leading dot
elif not ext:
# If no extension is provided, default to .txt for text content
ext = "txt"
label = f"{label}.{ext}"
# Skip if no name or data
if not name or not data:
logger.warning(f"Skipping document with missing name or data. Name: {name}, Has data: {bool(data)}")
continue
# Ensure unique filename
base_name = name
counter = 1
while f"{base_name}.{ext}" in used_names:
base_name = f"{name}_{counter}"
counter += 1
used_names.add(f"{base_name}.{ext}")
# Convert content to bytes based on base64Encoded flag
if isinstance(content, str):
if isinstance(data, str):
if base64Encoded:
# Decode base64 to bytes
try:
import base64
fileContent = base64.b64decode(content)
fileContent = base64.b64decode(data)
except Exception as e:
logger.warning(f"Failed to decode base64 content: {str(e)}")
fileContent = content.encode('utf-8')
fileContent = data.encode('utf-8')
base64Encoded = False
else:
# Convert text to bytes
fileContent = content.encode('utf-8')
fileContent = data.encode('utf-8')
else:
# Already bytes
fileContent = content
fileContent = data
# Determine MIME type based on extension
mimeType = self.mydom.getMimeType(label)
mimeType = self.mydom.getMimeType(f"{base_name}.{ext}")
# Save file to database
fileMeta = self.mydom.saveUploadedFile(fileContent, label)
# Create file metadata
fileMeta = self.mydom.createFile(
name=base_name,
mimeType=mimeType,
size=len(fileContent)
)
if fileMeta and "id" in fileMeta:
fileId = fileMeta["id"]
fileIds.append(fileId)
logger.info(f"Saved document '{label}' with file ID: {fileId} (base64Encoded: {base64Encoded})")
# Save file content
if self.mydom.createFileData(fileMeta["id"], fileContent):
fileIds.append(fileMeta["id"])
logger.info(f"Saved document '{base_name}.{ext}' with file ID: {fileMeta['id']} (base64Encoded: {base64Encoded})")
else:
logger.warning(f"Failed to save content for document '{base_name}.{ext}'")
else:
logger.warning(f"Failed to save document '{label}'")
logger.warning(f"Failed to create file metadata for '{base_name}.{ext}'")
except Exception as e:
logger.error(f"Error saving document from agent results: {str(e)}")
@ -1174,11 +1396,19 @@ filesDelivered = {self.parseJson2text(matchingDocuments)}
# Extract summaries from all contents
contentSummaries = []
for content in doc.get("contents", []):
if "contents" in doc and doc["contents"]:
for content in doc["contents"]:
contentSummaries.append({
"contentPart": content.get("name", "noname"),
"metadata": content.get("metadata", ""),
"summary": content.get("summary", "No summary"),
})
else:
# Add a default content summary if no contents exist
contentSummaries.append({
"contentPart": content.get("name", "noname"),
"metadata": content.get("metadata", ""),
"summary": content.get("summary", "No summary"),
"contentPart": "1_undefined",
"metadata": "",
"summary": "No content extracted",
})
# Create document info
@ -1277,11 +1507,12 @@ filesDelivered = {self.parseJson2text(matchingDocuments)}
# Singleton factory for the WorkflowManager
_workflowManagers = {}
_workflowManagerLastAccess = {} # Track last access time for cleanup
def getWorkflowManager(mandateId: int = 0, userId: int = 0) -> WorkflowManager:
"""
Returns a WorkflowManager for the specified context.
Reuses existing instances.
Reuses existing instances but implements cleanup for inactive instances.
Args:
mandateId: ID of the mandate
@ -1291,6 +1522,32 @@ def getWorkflowManager(mandateId: int = 0, userId: int = 0) -> WorkflowManager:
WorkflowManager instance
"""
contextKey = f"{mandateId}_{userId}"
current_time = datetime.now()
# Update last access time
_workflowManagerLastAccess[contextKey] = current_time
# Cleanup old instances (older than 1 hour)
cleanup_threshold = current_time - timedelta(hours=1)
for key in list(_workflowManagers.keys()):
if _workflowManagerLastAccess.get(key, current_time) < cleanup_threshold:
del _workflowManagers[key]
del _workflowManagerLastAccess[key]
if contextKey not in _workflowManagers:
_workflowManagers[contextKey] = WorkflowManager(mandateId, userId)
return _workflowManagers[contextKey]
def cleanupWorkflowManager(mandateId: int, userId: int) -> None:
"""
Explicitly cleanup a WorkflowManager instance.
Args:
mandateId: ID of the mandate
userId: ID of the user
"""
contextKey = f"{mandateId}_{userId}"
if contextKey in _workflowManagers:
del _workflowManagers[contextKey]
if contextKey in _workflowManagerLastAccess:
del _workflowManagerLastAccess[contextKey]

View file

@ -1,28 +1,33 @@
....................... TASKS
agentDocumentation delivers a ".docx" file, but the content is a ".md" text markup file
access management to extract into separate modules "lucydomAccess.py" and "gatewayAccess.py". Here to move the functions from "*Interface.py", which define what access which role has.
check data extraction tabelle im pdf
Check data extraction of types!
----------------------- OPEN
PRIO1:
CHECK: If pictures not displayed to check utf-8 encoding in the base64 string!! general file writing and reading (example with svg)
add connector to myoutlook
sharepoint connector with document search, content search, content extraction
PRIO2:
todo an agent for "code writing and editing" connected to the codebase, working in loops over each document...
sharepoint connector with document search, content search, content extraction
Split big files into content-parts
Integrate NDA Text as modal form - Data governance agreement by login with checkbox
frontend to react
frontend: no labels definition
PRIO3:
@ -30,7 +35,7 @@ PRIO3:
Tools to transfer incl funds:
- Google SERPAPI (shelly)
- Anthropic Claude (valueon + shelly)
-
- Cursor Pro
----------------------- DONE

View file

@ -1,12 +1,11 @@
from fastapi import APIRouter, HTTPException, Depends, Request, Response, status, Cookie
from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
import msal
import os
import logging
import sys
import json
from typing import Dict, Any, Optional
from typing import Dict, Any, Optional, List
from datetime import datetime, timedelta
import secrets
from modules.auth import getCurrentActiveUser, getUserContext, createAccessToken, ACCESS_TOKEN_EXPIRE_MINUTES
from modules.configuration import APP_CONFIG
@ -45,26 +44,67 @@ app_config = {
"redirect_uri": REDIRECT_URI
}
# Create a simple file-based token storage
TOKEN_DIR = './token_storage'
if not os.path.exists(TOKEN_DIR):
os.makedirs(TOKEN_DIR)
logger.info(f"Created token storage directory: {TOKEN_DIR}")
async def save_token_to_file(token_data, currentUser: Dict[str, Any]):
"""Save token data to database using LucyDOMInterface"""
try:
# Get current user context
mandateId, userId = await getUserContext(currentUser)
if not mandateId or not userId:
logger.error("No user context available for token storage")
return False
def save_token_to_file(user_id: str, token_data: Dict[str, Any]):
"""Save token data to a file"""
filename = os.path.join(TOKEN_DIR, f"{user_id}.json")
with open(filename, 'w') as f:
json.dump(token_data, f)
logger.info(f"Token saved for user: {user_id}")
# Get LucyDOM interface for current user
mydom = getLucydomInterface(
mandateId=mandateId,
userId=userId
)
if not mydom:
logger.error("No LucyDOM interface available for token storage")
return False
def load_token_from_file(user_id: str) -> Optional[Dict[str, Any]]:
"""Load token data from a file"""
filename = os.path.join(TOKEN_DIR, f"{user_id}.json")
if os.path.exists(filename):
with open(filename, 'r') as f:
return json.load(f)
return None
# Save token to database
success = mydom.saveMsftToken(token_data)
if success:
logger.info("Token saved successfully to database")
return True
else:
logger.error("Failed to save token to database")
return False
except Exception as e:
logger.error(f"Error saving token: {str(e)}")
return False
async def load_token_from_file(currentUser: Dict[str, Any]):
"""Load token data from database using LucyDOMInterface"""
try:
# Get current user context
mandateId, userId = await getUserContext(currentUser)
if not mandateId or not userId:
logger.error("No user context available for token retrieval")
return None
# Get LucyDOM interface for current user
mydom = getLucydomInterface(
mandateId=mandateId,
userId=userId
)
if not mydom:
logger.error("No LucyDOM interface available for token retrieval")
return None
# Get token from database
token_data = mydom.getMsftToken()
if token_data:
logger.info("Token loaded successfully from database")
return token_data
else:
logger.info("No token found in database")
return None
except Exception as e:
logger.error(f"Error loading token: {str(e)}")
return None
def get_user_info_from_token(access_token: str) -> Optional[Dict[str, Any]]:
"""Get user information using the access token"""
@ -112,9 +152,9 @@ def verify_token(token: str) -> bool:
logger.error(f"Exception verifying token: {str(e)}")
return False
def refresh_token(user_id: str) -> bool:
async def refresh_token(user_id: str, currentUser: Dict[str, Any]) -> bool:
"""Refresh the access token using the stored refresh token"""
token_data = load_token_from_file(user_id)
token_data = await load_token_from_file(currentUser)
if not token_data or not token_data.get("refresh_token"):
logger.warning("No refresh token available")
return False
@ -139,45 +179,13 @@ def refresh_token(user_id: str) -> bool:
if "refresh_token" in result:
token_data["refresh_token"] = result["refresh_token"]
save_token_to_file(user_id, token_data)
await save_token_to_file(token_data, currentUser)
logger.info("Access token refreshed successfully")
return True
def silent_login(user_id: str) -> bool:
"""Try to silently log in a user using their refresh token"""
token_data = load_token_from_file(user_id)
if not token_data or not token_data.get("refresh_token"):
logger.info(f"No refresh token found for user: {user_id}")
return False
# Try to refresh the token
msal_app = msal.ConfidentialClientApplication(
app_config["client_id"],
authority=app_config["authority"],
client_credential=app_config["client_credential"]
)
result = msal_app.acquire_token_by_refresh_token(
token_data["refresh_token"],
scopes=SCOPES
)
if "error" in result:
logger.error(f"Error refreshing token: {result.get('error')}")
return False
# Update tokens in storage
token_data["access_token"] = result["access_token"]
if "refresh_token" in result:
token_data["refresh_token"] = result["refresh_token"]
save_token_to_file(user_id, token_data)
return True
@router.get("/login")
async def login():
# Modified implementation without requiring current user
"""Initiate Microsoft login for the current user"""
try:
# Create a confidential client application
msal_app = msal.ConfidentialClientApplication(
@ -186,224 +194,292 @@ async def login():
client_credential=app_config["client_credential"]
)
# Build the auth URL
# Build the auth URL with a random state
state = secrets.token_urlsafe(32)
auth_url = msal_app.get_authorization_request_url(
SCOPES,
state="anonymous-user", # Use a general state since we don't have user context
state=state, # Use random state
redirect_uri=app_config["redirect_uri"]
)
logger.info(f"Redirecting to Microsoft login: {auth_url[:60]}...")
logger.info(f"Redirecting to Microsoft login")
return RedirectResponse(auth_url)
except Exception as e:
logger.error(f"Error initiating Microsoft login: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error initiating Microsoft login: {str(e)}"
detail=f"Failed to initiate Microsoft login: {str(e)}"
)
@router.get("/auth/callback")
async def auth_callback(request: Request, code: str = None, state: str = None):
"""Handle callback from Microsoft login"""
async def auth_callback(code: str, state: str, request: Request):
"""Handle Microsoft OAuth callback"""
try:
# Log callback for debugging
logger.info("Received callback from Microsoft login")
if not code:
logger.error("No authorization code received in callback")
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST,
content={"message": "No authorization code received"}
)
# Extract user and mandate info from state if available
user_id = None
mandate_id = None
if state and state != "anonymous-user":
try:
mandate_id, user_id = state.split(":")
logger.info(f"State contains mandate_id: {mandate_id}, user_id: {user_id}")
except ValueError:
logger.warning(f"Invalid state format: {state}")
# Generate a generic user ID if state is invalid
user_id = f"user_{datetime.now().strftime('%Y%m%d%H%M%S')}"
else:
# For anonymous authentication, create a generic user ID
logger.info("Anonymous authentication (no user context)")
user_id = f"user_{datetime.now().strftime('%Y%m%d%H%M%S')}"
# Create a confidential client application
msal_app = msal.ConfidentialClientApplication(
app_config["client_id"],
authority=app_config["authority"],
client_credential=app_config["client_credential"]
# Create MSAL app instance
app = msal.ConfidentialClientApplication(
client_id=CLIENT_ID,
client_credential=CLIENT_SECRET,
authority=AUTHORITY
)
# Get tokens using the authorization code
result = msal_app.acquire_token_by_authorization_code(
code,
# Exchange code for token
token_response = app.acquire_token_by_authorization_code(
code=code,
scopes=SCOPES,
redirect_uri=app_config["redirect_uri"]
redirect_uri=REDIRECT_URI
)
if "error" in result:
logger.error(f"Error acquiring token: {result.get('error')}")
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST,
content={"message": f"Error acquiring token: {result.get('error_description', result.get('error'))}"}
if "error" in token_response:
logger.error(f"Token acquisition failed: {token_response['error']}")
return HTMLResponse(
content="""
<html>
<head>
<title>Authentication Failed</title>
<style>
body { font-family: Arial, sans-serif; text-align: center; margin-top: 50px; }
.error { color: red; }
</style>
</head>
<body>
<h1 class="error">Authentication Failed</h1>
<p>Please try again.</p>
<script>
setTimeout(() => window.close(), 3000);
</script>
</body>
</html>
""",
status_code=400
)
# Store user information
user_info = {}
if "id_token_claims" in result:
user_info = {
"name": result["id_token_claims"].get("name", ""),
"email": result["id_token_claims"].get("preferred_username", ""),
}
# Get user info from token
user_info = get_user_info_from_token(token_response["access_token"])
if not user_info:
logger.error("Failed to get user info from token")
return HTMLResponse(
content="""
<html>
<head>
<title>Authentication Failed</title>
<style>
body { font-family: Arial, sans-serif; text-align: center; margin-top: 50px; }
.error { color: red; }
</style>
</head>
<body>
<h1 class="error">Authentication Failed</h1>
<p>Could not retrieve user information.</p>
<script>
setTimeout(() => window.close(), 3000);
</script>
</body>
</html>
""",
status_code=400
)
# If we have user info from the token, use that for user_id
token_user_id = result["id_token_claims"].get("oid") or result["id_token_claims"].get("sub")
if token_user_id:
user_id = token_user_id
elif not user_id and user_info.get("email"):
# Fall back to email-based ID if no other ID is available
user_id = user_info.get("email", "user").replace("@", "_").replace(".", "_")
# Add user info to token data
token_response["user_info"] = user_info
# Save tokens to file
token_data = {
"access_token": result["access_token"],
"refresh_token": result.get("refresh_token", ""),
"user_info": user_info,
"timestamp": datetime.now().isoformat()
}
# Ensure token directory exists
if not os.path.exists(TOKEN_DIR):
os.makedirs(TOKEN_DIR)
# Save token to file
token_file = os.path.join(TOKEN_DIR, f"{user_id}.json")
with open(token_file, 'w') as f:
json.dump(token_data, f)
logger.info(f"User authenticated: {user_info.get('email', 'unknown')}")
# Create a success page
html_content = """
<!DOCTYPE html>
# Store tokens in session storage for the frontend to pick up
response = HTMLResponse(
content=f"""
<html>
<head>
<meta charset="UTF-8">
<title>Authentication Successful</title>
<style>
body { font-family: Arial, sans-serif; margin: 40px; text-align: center; }
.success-container { max-width: 600px; margin: 0 auto; }
h1 { color: #0078d4; }
.success-icon { font-size: 72px; color: #107c10; margin: 20px 0; }
.button { display: inline-block; background-color: #0078d4; color: white;
padding: 10px 20px; text-decoration: none; border-radius: 4px;
font-weight: bold; margin-top: 20px; }
</style>
</head>
<body>
<div class="success-container">
<h1>Authentication Successful</h1>
<div class="success-icon"></div>
<p>You have successfully authenticated with Microsoft.</p>
<p>You can now close this tab and return to the application.</p>
<p>Your email templates will now be able to create drafts in your mailbox.</p>
<a href="javascript:window.close()" class="button">Close Window</a>
</div>
<script>
// Attempt to notify the opener window that authentication is complete
if (window.opener && !window.opener.closed) {
try {
window.opener.postMessage({ type: 'msft_auth_complete', success: true }, '*');
} catch (e) {
console.error('Error notifying opener:', e);
}
}
</script>
</body>
<head>
<title>Authentication Successful</title>
<style>
body {{ font-family: Arial, sans-serif; text-align: center; margin-top: 50px; }}
.success {{ color: green; }}
</style>
</head>
<body>
<h1 class="success">Authentication Successful</h1>
<p>Welcome, {user_info.get('name', 'User')}!</p>
<p>This window will close automatically.</p>
<script>
// Store token data in session storage
sessionStorage.setItem('msft_token_data', JSON.stringify({json.dumps(token_response)}));
// Notify parent window of success
if (window.opener) {{
window.opener.postMessage({{
type: 'msft_auth_success',
user: {json.dumps(user_info)},
token_data: {json.dumps(token_response)}
}}, '*');
}}
// Close window after 3 seconds
setTimeout(() => window.close(), 3000);
</script>
</body>
</html>
"""
)
return HTMLResponse(content=html_content)
else:
logger.warning("No id_token_claims found in result")
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST,
content={"message": "Failed to retrieve user information"}
)
return response
except Exception as e:
logger.error(f"Error in auth callback: {str(e)}", exc_info=True)
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"message": f"Error in auth callback: {str(e)}"}
logger.error(f"Authentication failed: {str(e)}")
return HTMLResponse(
content="""
<html>
<head>
<title>Authentication Failed</title>
<style>
body { font-family: Arial, sans-serif; text-align: center; margin-top: 50px; }
.error { color: red; }
</style>
</head>
<body>
<h1 class="error">Authentication Failed</h1>
<p>An error occurred during authentication.</p>
<script>
setTimeout(() => window.close(), 3000);
</script>
</body>
</html>
""",
status_code=500
)
@router.get("/status")
async def auth_status(
msft_user_id: Optional[str] = Cookie(None),
currentUser: Dict[str, Any] = Depends(getCurrentActiveUser)
):
async def auth_status(currentUser: Dict[str, Any] = Depends(getCurrentActiveUser)):
"""Check Microsoft authentication status"""
try:
# Get user ID
if not msft_user_id:
mandateId, userId = await getUserContext(currentUser)
user_id = str(userId)
else:
user_id = msft_user_id
# Get current user context
mandateId, userId = await getUserContext(currentUser)
if not mandateId or not userId:
logger.info("No user context found")
return JSONResponse({
"authenticated": False,
"message": "Not authenticated with Microsoft"
})
# Check if we have a token for the current user
token_data = await load_token_from_file(currentUser)
# Check if user has a token
token_data = load_token_from_file(user_id)
if not token_data:
return JSONResponse(
content={"authenticated": False, "message": "Not authenticated with Microsoft"}
)
logger.info(f"No token data found for user {userId}")
return JSONResponse({
"authenticated": False,
"message": "Not authenticated with Microsoft"
})
# Check if token is valid
if not verify_token(token_data.get("access_token", "")):
# Try to refresh token
if refresh_token(user_id):
token_data = load_token_from_file(user_id)
user_info = token_data.get("user_info", {})
return JSONResponse(
content={
"authenticated": True,
"message": "Token refreshed successfully",
"user": user_info
}
)
else:
return JSONResponse(
content={
"authenticated": False,
"message": "Token expired and couldn't be refreshed"
}
)
# Verify token is still valid
if not verify_token(token_data["access_token"]):
logger.info("Token invalid, attempting refresh")
# Try to refresh the token
if not await refresh_token(userId, currentUser):
logger.info("Token refresh failed")
return JSONResponse({
"authenticated": False,
"message": "Token expired and refresh failed"
})
# Reload token data after refresh
token_data = await load_token_from_file(currentUser)
# Token is valid, return user info
user_info = token_data.get("user_info", {})
return JSONResponse(
content={
"authenticated": True,
"message": "Authenticated with Microsoft",
"user": user_info
}
)
# Get user info from token data
user_info = token_data.get("user_info")
if not user_info:
logger.info("No user info found in token data")
return JSONResponse({
"authenticated": False,
"message": "No user information available"
})
logger.info(f"User {user_info.get('name')} is authenticated")
return JSONResponse({
"authenticated": True,
"user": user_info
})
except Exception as e:
logger.error(f"Error checking auth status: {str(e)}")
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
content={"message": f"Error checking auth status: {str(e)}"}
logger.error(f"Error checking authentication status: {str(e)}")
return JSONResponse({
"authenticated": False,
"message": f"Error checking authentication status: {str(e)}"
})
@router.post("/logout")
async def logout(currentUser: Dict[str, Any] = Depends(getCurrentActiveUser)):
"""Logout from Microsoft"""
try:
# Get current user context
mandateId, userId = await getUserContext(currentUser)
if not mandateId or not userId:
return JSONResponse({
"message": "Not authenticated with Microsoft"
})
# Get LucyDOM interface for current user
mydom = getLucydomInterface(
mandateId=mandateId,
userId=userId
)
if not mydom:
return JSONResponse({
"message": "Not authenticated with Microsoft"
})
# Remove token from database
tokens = mydom.db.getRecordset("msftTokens", recordFilter={
"mandateId": mandateId,
"userId": userId
})
if tokens and len(tokens) > 0:
mydom.db.recordDelete("msftTokens", tokens[0]["id"])
logger.info(f"Removed Microsoft token for user {userId}")
return JSONResponse({
"message": "Successfully logged out from Microsoft"
})
except Exception as e:
logger.error(f"Error during logout: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Logout failed: {str(e)}"
)
@router.get("/token")
async def get_access_token(currentUser: Dict[str, Any] = Depends(getCurrentActiveUser)):
"""Get the current user's access token for Microsoft Graph API"""
try:
# Check if we have a token for the current user
token_data = await load_token_from_file(currentUser)
if not token_data:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Not authenticated with Microsoft"
)
# Verify token is still valid
if not verify_token(token_data["access_token"]):
# Try to refresh the token
if not await refresh_token(currentUser["id"], currentUser):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Token expired and refresh failed"
)
# Reload token data after refresh
token_data = await load_token_from_file(currentUser)
return JSONResponse({
"access_token": token_data["access_token"]
})
except Exception as e:
logger.error(f"Error getting access token: {str(e)}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error getting access token: {str(e)}"
)
@router.post("/token")
async def get_backend_token(request: Request):
@ -467,3 +543,74 @@ async def get_backend_token(request: Request):
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Error processing MSAL token: {str(e)}"
)
=======
@router.post("/save-token")
async def save_token(token_data: Dict[str, Any], currentUser: Dict[str, Any] = Depends(getCurrentActiveUser)):
"""Save Microsoft token data from frontend"""
try:
# Save token to database
success = await save_token_to_file(token_data, currentUser)
if success:
return JSONResponse({
"success": True,
"message": "Token saved successfully"
})
else:
return JSONResponse({
"success": False,
"message": "Failed to save token"
})
except Exception as e:
logger.error(f"Error saving token: {str(e)}")
return JSONResponse({
"success": False,
"message": f"Error saving token: {str(e)}"
})
async def generateFinalMessage(self, objUserResponse: str, objFinalDocuments: List[str], objResults: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Generate the final message for the workflow"""
try:
# Get list of delivered documents
matchingDocuments = []
for result in objResults:
if "documents" in result:
for doc in result["documents"]:
if doc.get("label") in objFinalDocuments:
matchingDocuments.append(doc.get("label"))
# Use the mydom for language-aware AI calls
finalPrompt = await self.mydom.callAi([
{"role": "system", "content": "You are a project manager, who delivers results to a user."},
{"role": "user", "content": f"""
Give a brief summary of what has been accomplished, referencing the initial request (objUserResponse). List only the files that have been successfully delivered (filesDelivered). Keep the message concise and professional.
Here the data:
objUserResponse = {self.parseJson2text(objUserResponse)}
filesDelivered = {self.parseJson2text(matchingDocuments)}
"""
}
], produceUserAnswer=True)
# Create basic message structure with proper fields
logger.debug(f"FINAL PROMPT = {self.parseJson2text(finalPrompt)}.")
finalMessage = {
"role": "assistant",
"agentName": "Project Manager",
"content": finalPrompt,
"documents": [] # DO NOT include the results documents, already with agents
}
logger.debug(f"FINAL MESSAGE = {self.parseJson2text(finalMessage)}.")
return finalMessage
except Exception as e:
logger.error(f"Error generating final message: {str(e)}")
return {
"role": "assistant",
"agentName": "Project Manager",
"content": "I apologize, but there was an error generating the final message. Please check the logs for more details.",
"documents": []
}

View file

@ -1,42 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Email Preview: Verschiebung des Meetings auf Freitag</title>
<style>
body { font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5; }
.email-container { max-width: 600px; margin: 20px auto; background-color: white; border: 1px solid #ddd; border-radius: 5px; overflow: hidden; }
.email-header { background-color: #f0f0f0; padding: 15px; border-bottom: 1px solid #ddd; }
.email-content { padding: 20px; }
.email-footer { background-color: #f0f0f0; padding: 15px; border-top: 1px solid #ddd; font-size: 12px; color: #666; }
.field { margin-bottom: 10px; }
.field-label { font-weight: bold; color: #555; }
.email-body { margin-top: 20px; padding-top: 20px; border-top: 1px solid #eee; }
</style>
</head>
<body>
<div class="email-container">
<div class="email-header">
<h2>Email Template Preview</h2>
</div>
<div class="email-content">
<div class="field">
<div class="field-label">To:</div>
<div>peter.muster@domain.com</div>
</div>
<div class="field">
<div class="field-label">Subject:</div>
<div>Verschiebung des Meetings auf Freitag</div>
</div>
<div class="email-body">
<p>Sehr geehrter Herr Muster,</p><p>ich hoffe, es geht Ihnen gut. Ich schreibe Ihnen, um unser geplantes Meeting von 10 Uhr auf Freitag zu verschieben. Bitte lassen Sie mich wissen, ob dieser neue Termin für Sie passt.</p><p>Vielen Dank für Ihr Verständnis.</p><p>Mit freundlichen Grüßen,<br>[Ihr Name]</p>
</div>
</div>
<div class="email-footer">
<p>This is a preview of the email template. The actual email may appear differently in various email clients.</p>
</div>
</div>
</body>
</html>

View file

@ -1,6 +0,0 @@
{
"recipient": "peter.muster@domain.com",
"subject": "Verschiebung des Meetings auf Freitag",
"plainBody": "Sehr geehrter Herr Muster,\n\nich hoffe, es geht Ihnen gut. Ich schreibe Ihnen, um unser geplantes Meeting von 10 Uhr auf Freitag zu verschieben. Bitte lassen Sie mich wissen, ob dieser neue Termin f\u00fcr Sie passt.\n\nVielen Dank f\u00fcr Ihr Verst\u00e4ndnis.\n\nMit freundlichen Gr\u00fc\u00dfen,\n\n[Ihr Name]",
"htmlBody": "<p>Sehr geehrter Herr Muster,</p><p>ich hoffe, es geht Ihnen gut. Ich schreibe Ihnen, um unser geplantes Meeting von 10 Uhr auf Freitag zu verschieben. Bitte lassen Sie mich wissen, ob dieser neue Termin f\u00fcr Sie passt.</p><p>Vielen Dank f\u00fcr Ihr Verst\u00e4ndnis.</p><p>Mit freundlichen Gr\u00fc\u00dfen,<br>[Ihr Name]</p>"
}

View file

@ -1,42 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Email Preview: Anfrage zur Terminverschiebung</title>
<style>
body { font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5; }
.email-container { max-width: 600px; margin: 20px auto; background-color: white; border: 1px solid #ddd; border-radius: 5px; overflow: hidden; }
.email-header { background-color: #f0f0f0; padding: 15px; border-bottom: 1px solid #ddd; }
.email-content { padding: 20px; }
.email-footer { background-color: #f0f0f0; padding: 15px; border-top: 1px solid #ddd; font-size: 12px; color: #666; }
.field { margin-bottom: 10px; }
.field-label { font-weight: bold; color: #555; }
.email-body { margin-top: 20px; padding-top: 20px; border-top: 1px solid #eee; }
</style>
</head>
<body>
<div class="email-container">
<div class="email-header">
<h2>Email Template Preview</h2>
</div>
<div class="email-content">
<div class="field">
<div class="field-label">To:</div>
<div>peter.muster@domain.com</div>
</div>
<div class="field">
<div class="field-label">Subject:</div>
<div>Anfrage zur Terminverschiebung</div>
</div>
<div class="email-body">
<p>Sehr geehrter Herr Muster,</p><p>ich hoffe, diese Nachricht trifft Sie wohl. Ich schreibe Ihnen, um eine Verschiebung unseres Termins von 10 Uhr auf Freitag zu erbitten. Bitte lassen Sie mich wissen, ob dies für Sie möglich ist.</p><p>Vielen Dank im Voraus für Ihre Flexibilität.</p><p>Mit freundlichen Grüßen,<br>[Ihr Name]</p>
</div>
</div>
<div class="email-footer">
<p>This is a preview of the email template. The actual email may appear differently in various email clients.</p>
</div>
</div>
</body>
</html>

View file

@ -1,6 +0,0 @@
{
"recipient": "peter.muster@domain.com",
"subject": "Anfrage zur Terminverschiebung",
"plainBody": "Sehr geehrter Herr Muster,\n\nich hoffe, diese Nachricht trifft Sie wohl. Ich schreibe Ihnen, um eine Verschiebung unseres Termins von 10 Uhr auf Freitag zu erbitten. Bitte lassen Sie mich wissen, ob dies f\u00fcr Sie m\u00f6glich ist.\n\nVielen Dank im Voraus f\u00fcr Ihre Flexibilit\u00e4t.\n\nMit freundlichen Gr\u00fc\u00dfen,\n\n[Ihr Name]",
"htmlBody": "<p>Sehr geehrter Herr Muster,</p><p>ich hoffe, diese Nachricht trifft Sie wohl. Ich schreibe Ihnen, um eine Verschiebung unseres Termins von 10 Uhr auf Freitag zu erbitten. Bitte lassen Sie mich wissen, ob dies f\u00fcr Sie m\u00f6glich ist.</p><p>Vielen Dank im Voraus f\u00fcr Ihre Flexibilit\u00e4t.</p><p>Mit freundlichen Gr\u00fc\u00dfen,<br>[Ihr Name]</p>"
}

View file

@ -1,47 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Microsoft Authentication Required</title>
<style>
body { font-family: Arial, sans-serif; margin: 0; padding: 20px; background-color: #f5f5f5; }
.container { max-width: 800px; margin: 0 auto; background-color: white; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); padding: 30px; }
h1 { color: #0078d4; margin-top: 0; }
.step { margin-bottom: 20px; }
.step-number { display: inline-block; width: 30px; height: 30px; background-color: #0078d4; color: white; border-radius: 50%; text-align: center; line-height: 30px; margin-right: 10px; font-weight: bold; }
.auth-button { display: inline-block; background-color: #0078d4; color: white; padding: 12px 24px; text-decoration: none; border-radius: 4px; font-weight: bold; margin: 20px 0; }
.auth-button:hover { background-color: #106ebe; }
.note { background-color: #fff4e5; border-left: 4px solid #ff8c00; padding: 15px; margin: 20px 0; }
</style>
</head>
<body>
<div class="container">
<h1>Microsoft Authentication Required</h1>
<p>To create email templates and drafts, you need to authenticate with your Microsoft account. Follow these steps:</p>
<div class="step">
<span class="step-number">1</span>
<strong>Click the authentication link below</strong>
</div>
<a href="http://localhost:8080/api/msft/login" class="auth-button" target="_blank">Authenticate with Microsoft</a>
<div class="step">
<span class="step-number">2</span>
<strong>Sign in with your Microsoft account</strong> and grant the required permissions
</div>
<div class="step">
<span class="step-number">3</span>
<strong>Return to this application</strong> and run the email agent again after completing authentication
</div>
<div class="note">
<p><strong>Note:</strong> You only need to authenticate once. Your session will be remembered for future email operations.</p>
</div>
</div>
</body>
</html>

View file

@ -1,28 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Microsoft Authentication Required</title>
<style>
body { font-family: Arial, sans-serif; margin: 0; padding: 20px; background-color: #f5f5f5; }
.container { max-width: 800px; margin: 0 auto; background-color: white; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); padding: 30px; }
h1 { color: #0078d4; margin-top: 0; }
.note { background-color: #fff4e5; border-left: 4px solid #ff8c00; padding: 15px; margin: 20px 0; }
</style>
</head>
<body>
<div class="container">
<h1>Microsoft Authentication Required</h1>
<p>To create email templates and drafts, you need to authenticate with your Microsoft account.</p>
<p>The application will now initiate the Microsoft authentication process. Please follow the instructions in the authentication window.</p>
<div class="note">
<p><strong>Note:</strong> You only need to authenticate once. Your session will be remembered for future email operations.</p>
</div>
</div>
</body>
</html>

View file

@ -1,42 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Email Preview: Verschiebung des Meetings auf Freitag</title>
<style>
body { font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5; }
.email-container { max-width: 600px; margin: 20px auto; background-color: white; border: 1px solid #ddd; border-radius: 5px; overflow: hidden; }
.email-header { background-color: #f0f0f0; padding: 15px; border-bottom: 1px solid #ddd; }
.email-content { padding: 20px; }
.email-footer { background-color: #f0f0f0; padding: 15px; border-top: 1px solid #ddd; font-size: 12px; color: #666; }
.field { margin-bottom: 10px; }
.field-label { font-weight: bold; color: #555; }
.email-body { margin-top: 20px; padding-top: 20px; border-top: 1px solid #eee; }
</style>
</head>
<body>
<div class="email-container">
<div class="email-header">
<h2>Email Template Preview</h2>
</div>
<div class="email-content">
<div class="field">
<div class="field-label">To:</div>
<div>peter.muster@domain.com</div>
</div>
<div class="field">
<div class="field-label">Subject:</div>
<div>Verschiebung des Meetings auf Freitag</div>
</div>
<div class="email-body">
<p>Sehr geehrter Herr Muster,</p><p>ich hoffe, es geht Ihnen gut. Ich schreibe Ihnen, um unser geplantes Meeting um 10 Uhr auf Freitag zu verschieben. Bitte lassen Sie mich wissen, ob dieser Termin für Sie passt.</p><p>Vielen Dank für Ihr Verständnis.</p><p>Mit freundlichen Grüßen,</p><p>[Ihr Name]</p>
</div>
</div>
<div class="email-footer">
<p>This is a preview of the email template. The actual email may appear differently in various email clients.</p>
</div>
</div>
</body>
</html>

View file

@ -1,6 +0,0 @@
{
"recipient": "peter.muster@domain.com",
"subject": "Verschiebung des Meetings auf Freitag",
"plainBody": "Sehr geehrter Herr Muster,\n\nich hoffe, es geht Ihnen gut. Ich schreibe Ihnen, um unser geplantes Meeting um 10 Uhr auf Freitag zu verschieben. Bitte lassen Sie mich wissen, ob dieser Termin f\u00fcr Sie passt.\n\nVielen Dank f\u00fcr Ihr Verst\u00e4ndnis.\n\nMit freundlichen Gr\u00fc\u00dfen,\n\n[Ihr Name]",
"htmlBody": "<p>Sehr geehrter Herr Muster,</p><p>ich hoffe, es geht Ihnen gut. Ich schreibe Ihnen, um unser geplantes Meeting um 10 Uhr auf Freitag zu verschieben. Bitte lassen Sie mich wissen, ob dieser Termin f\u00fcr Sie passt.</p><p>Vielen Dank f\u00fcr Ihr Verst\u00e4ndnis.</p><p>Mit freundlichen Gr\u00fc\u00dfen,</p><p>[Ihr Name]</p>"
}

View file

@ -1,48 +0,0 @@
inputFiles = [] # DO NOT CHANGE THIS LINE
# REQUIREMENTS:
import json
import csv
from io import StringIO
def is_prime(n):
if n <= 1:
return False
if n <= 3:
return True
if n % 2 == 0 or n % 3 == 0:
return False
i = 5
while i * i <= n:
if n % i == 0 or n % (i + 2) == 0:
return False
i += 6
return True
def generate_primes(limit):
primes = []
num = 2
while len(primes) < limit:
if is_prime(num):
primes.append(num)
num += 1
return primes
primes = generate_primes(1000)
output = StringIO()
csv_writer = csv.writer(output)
for prime in primes:
csv_writer.writerow([prime])
result = {
"prime_numbers.csv": {
"content": output.getvalue(),
"base64Encoded": False,
"contentType": "text/csv"
}
}
import json
print(json.dumps(result))

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 247 KiB

File diff suppressed because it is too large Load diff

View file

@ -1,42 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Email Preview: Prime Numbers CSV</title>
<style>
body { font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5; }
.email-container { max-width: 600px; margin: 20px auto; background-color: white; border: 1px solid #ddd; border-radius: 5px; overflow: hidden; }
.email-header { background-color: #f0f0f0; padding: 15px; border-bottom: 1px solid #ddd; }
.email-content { padding: 20px; }
.email-footer { background-color: #f0f0f0; padding: 15px; border-top: 1px solid #ddd; font-size: 12px; color: #666; }
.field { margin-bottom: 10px; }
.field-label { font-weight: bold; color: #555; }
.email-body { margin-top: 20px; padding-top: 20px; border-top: 1px solid #eee; }
</style>
</head>
<body>
<div class="email-container">
<div class="email-header">
<h2>Email Template Preview</h2>
</div>
<div class="email-content">
<div class="field">
<div class="field-label">To:</div>
<div>recipient@example.com</div>
</div>
<div class="field">
<div class="field-label">Subject:</div>
<div>Prime Numbers CSV</div>
</div>
<div class="email-body">
<p>Sehr geehrte Damen und Herren,</p><p>anbei finden Sie die Datei <strong>'prime_numbers.csv'</strong>, die die Liste der Primzahlen enthält.</p><p>Mit freundlichen Grüßen,<br>Ihr Team</p>
</div>
</div>
<div class="email-footer">
<p>This is a preview of the email template. The actual email may appear differently in various email clients.</p>
</div>
</div>
</body>
</html>

View file

@ -1,6 +0,0 @@
{
"recipient": "recipient@example.com",
"subject": "Prime Numbers CSV",
"plainBody": "Sehr geehrte Damen und Herren,\n\nanbei finden Sie die Datei 'prime_numbers.csv', die die Liste der Primzahlen enth\u00e4lt.\n\nMit freundlichen Gr\u00fc\u00dfen,\nIhr Team",
"htmlBody": "<p>Sehr geehrte Damen und Herren,</p><p>anbei finden Sie die Datei <strong>'prime_numbers.csv'</strong>, die die Liste der Primzahlen enth\u00e4lt.</p><p>Mit freundlichen Gr\u00fc\u00dfen,<br>Ihr Team</p>"
}

View file

@ -1,933 +0,0 @@
"""
Module for extracting content from various file formats.
Provides specialized functions for processing text, PDF, Office documents, images, etc.
"""
import logging
import os
import io
from typing import Dict, Any, List, Optional, Union, Tuple
import base64
# Configure logger
logger = logging.getLogger(__name__)
# Optional imports - only loaded when needed
pdfExtractorLoaded = False
officeExtractorLoaded = False
imageProcessorLoaded = False
def getDocumentContents(fileMetadata: Dict[str, Any], fileContent: bytes) -> List[Dict[str, Any]]:
"""
Main function for extracting content from a file based on its MIME type.
Delegates to specialized extraction functions.
Args:
fileMetadata: File metadata (Name, MIME type, etc.)
fileContent: Binary data of the file
Returns:
List of Document-Content objects with metadata and base64Encoded flag
"""
try:
mimeType = fileMetadata.get("mimeType", "application/octet-stream")
fileName = fileMetadata.get("name", "unknown")
logger.info(f"Extracting content from file '{fileName}' (MIME type: {mimeType})")
# Extract content based on MIME type
contents = []
# Text-based formats (excluding CSV which has its own handler)
if mimeType == "text/csv":
contents.extend(extractCsvContent(fileName, fileContent))
# Then handle other text-based formats
elif mimeType.startswith("text/") or mimeType in [
"application/json",
"application/xml",
"application/javascript",
"application/x-python"
]:
contents.extend(extractTextContent(fileName, fileContent, mimeType))
# SVG Files
elif mimeType == "image/svg+xml":
contents.extend(extractSvgContent(fileName, fileContent))
# Images
elif mimeType.startswith("image/"):
contents.extend(extractImageContent(fileName, fileContent, mimeType))
# PDF Documents
elif mimeType == "application/pdf":
contents.extend(extractPdfContent(fileName, fileContent))
# Word Documents
elif mimeType in [
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/msword"
]:
contents.extend(extractWordContent(fileName, fileContent, mimeType))
# Excel Documents
elif mimeType in [
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.ms-excel"
]:
contents.extend(extractExcelContent(fileName, fileContent, mimeType))
# PowerPoint Documents
elif mimeType in [
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/vnd.ms-powerpoint"
]:
contents.extend(extractPowerpointContent(fileName, fileContent, mimeType))
# Binary data as fallback for unknown formats
else:
contents.extend(extractBinaryContent(fileName, fileContent, mimeType))
# Fallback when no content could be extracted
if not contents:
logger.warning(f"No content extracted from file '{fileName}', using binary fallback")
# Convert binary content to base64
encoded_data = base64.b64encode(fileContent).decode('utf-8')
contents.append({
"sequenceNr": 1,
"name": '1_undefined',
"ext": os.path.splitext(fileName)[1][1:] if os.path.splitext(fileName)[1] else "bin",
"contentType": mimeType,
"data": encoded_data,
"base64Encoded": True,
"metadata": {
"isText": False
}
})
# Add generic attributes for all documents
for content in contents:
# Make sure all content items have the base64Encoded flag
if "base64Encoded" not in content:
if isinstance(content.get("data"), bytes):
# Convert bytes to base64
content["data"] = base64.b64encode(content["data"]).decode('utf-8')
content["base64Encoded"] = True
else:
# Assume text content if not explicitly marked
content["base64Encoded"] = False
# Maintain backward compatibility with old "base64Encoded" flag in metadata
if "metadata" not in content:
content["metadata"] = {}
# Set base64Encoded in metadata for backward compatibility
content["metadata"]["base64Encoded"] = content["base64Encoded"]
logger.info(f"Successfully extracted {len(contents)} content items from file '{fileName}'")
return contents
except Exception as e:
logger.error(f"Error during content extraction: {str(e)}")
# Fallback on error - return original data
return [{
"sequenceNr": 1,
"name": fileMetadata.get("name", "unknown"),
"ext": os.path.splitext(fileMetadata.get("name", ""))[1][1:] if os.path.splitext(fileMetadata.get("name", ""))[1] else "bin",
"contentType": fileMetadata.get("mimeType", "application/octet-stream"),
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"base64Encoded": True # For backward compatibility
}
}]
def _loadPdfExtractor():
"""Loads PDF extraction libraries when needed"""
global pdfExtractorLoaded
if not pdfExtractorLoaded:
try:
global PyPDF2, fitz
import PyPDF2
import fitz # PyMuPDF for more extensive PDF processing
pdfExtractorLoaded = True
logger.info("PDF extraction libraries successfully loaded")
except ImportError as e:
logger.warning(f"PDF extraction libraries could not be loaded: {e}")
def _loadOfficeExtractor():
"""Loads Office document extraction libraries when needed"""
global officeExtractorLoaded
if not officeExtractorLoaded:
try:
global docx, openpyxl
import docx # python-docx for Word documents
import openpyxl # for Excel files
officeExtractorLoaded = True
logger.info("Office extraction libraries successfully loaded")
except ImportError as e:
logger.warning(f"Office extraction libraries could not be loaded: {e}")
def _loadImageProcessor():
"""Loads image processing libraries when needed"""
global imageProcessorLoaded
if not imageProcessorLoaded:
try:
global PIL, Image
from PIL import Image
imageProcessorLoaded = True
logger.info("Image processing libraries successfully loaded")
except ImportError as e:
logger.warning(f"Image processing libraries could not be loaded: {e}")
def extractTextContent(fileName: str, fileContent: bytes, mimeType: str) -> List[Dict[str, Any]]:
"""
Extracts text from text files.
Args:
fileName: Name of the file
fileContent: Binary data of the file
mimeType: MIME type of the file
Returns:
List of Text-Content objects with base64Encoded = False
"""
try:
# Keep original file extension
fileExtension = os.path.splitext(fileName)[1][1:] if os.path.splitext(fileName)[1] else "txt"
# Extract text content
textContent = fileContent.decode('utf-8')
return [{
"sequenceNr": 1,
"name": "1_text", # Simplified naming
"ext": fileExtension,
"contentType": "text/plain",
"data": textContent,
"base64Encoded": False,
"metadata": {
"isText": True
}
}]
except UnicodeDecodeError:
logger.warning(f"Could not decode text from file '{fileName}' as UTF-8, trying alternative encodings")
try:
# Try alternative encodings
for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
try:
textContent = fileContent.decode(encoding)
logger.info(f"Text successfully decoded with encoding {encoding}")
return [{
"sequenceNr": 1,
"name": "1_text", # Simplified naming
"ext": fileExtension,
"contentType": "text/plain",
"data": textContent,
"base64Encoded": False,
"metadata": {
"isText": True,
"encoding": encoding
}
}]
except UnicodeDecodeError:
continue
# Fallback to binary data if no encoding works
logger.warning(f"Could not decode text, using binary data")
return [{
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False
}
}]
except Exception as e:
logger.error(f"Error in alternative text decoding: {str(e)}")
# Return binary data as fallback
return [{
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False
}
}]
def extractCsvContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]:
"""
Extracts content from CSV files.
Args:
fileName: Name of the file
fileContent: Binary data of the file
Returns:
List of CSV-Content objects with base64Encoded = False
"""
try:
# Extract text content
csvContent = fileContent.decode('utf-8')
return [{
"sequenceNr": 1,
"name": "1_csv", # Simplified naming
"ext": "csv",
"contentType": "text/csv",
"data": csvContent,
"base64Encoded": False,
"metadata": {
"isText": True,
"format": "csv"
}
}]
except UnicodeDecodeError:
logger.warning(f"Could not decode CSV from file '{fileName}' as UTF-8, trying alternative encodings")
try:
# Try alternative encodings for CSV
for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
try:
csvContent = fileContent.decode(encoding)
logger.info(f"CSV successfully decoded with encoding {encoding}")
return [{
"sequenceNr": 1,
"name": "1_csv", # Simplified naming
"ext": "csv",
"contentType": "text/csv",
"data": csvContent,
"base64Encoded": False,
"metadata": {
"isText": True,
"encoding": encoding,
"format": "csv"
}
}]
except UnicodeDecodeError:
continue
# Fallback to binary data
return [{
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": "csv",
"contentType": "text/csv",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False
}
}]
except Exception as e:
logger.error(f"Error in alternative CSV decoding: {str(e)}")
return [{
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": "csv",
"contentType": "text/csv",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False
}
}]
def extractSvgContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]:
"""
Extracts content from SVG files.
Args:
fileName: Name of the file
fileContent: Binary data of the file
Returns:
List of SVG-Content objects with dual text/image metadata
"""
contents = []
try:
# Extract SVG as text content (XML)
svgText = fileContent.decode('utf-8')
# Check if it's actually SVG by looking for the SVG tag
if "<svg" in svgText.lower():
# SVG is both text (XML) and an image
contents.append({
"sequenceNr": 1,
"name": "1_svg", # Simplified naming
"ext": "svg",
"contentType": "image/svg+xml",
"data": svgText,
"base64Encoded": False,
"metadata": {
"isText": True, # SVG is text-based (XML)
"format": "svg",
"isImage": True # But also represents an image
}
})
else:
# Doesn't appear to be a valid SVG file
logger.warning(f"File '{fileName}' has SVG extension but does not contain SVG markup")
contents.append({
"sequenceNr": 1,
"name": "1_text",
"ext": "svg",
"contentType": "text/plain",
"data": svgText,
"base64Encoded": False,
"metadata": {
"isText": True,
"format": "text"
}
})
except UnicodeDecodeError:
logger.warning(f"Could not decode SVG from file '{fileName}' as UTF-8, trying alternative encodings")
try:
# Try alternative encodings
for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
try:
svgText = fileContent.decode(encoding)
if "<svg" in svgText.lower():
logger.info(f"SVG successfully decoded with encoding {encoding}")
contents.append({
"sequenceNr": 1,
"name": "1_svg", # Simplified naming
"ext": "svg",
"contentType": "image/svg+xml",
"data": svgText,
"base64Encoded": False,
"metadata": {
"isText": True,
"format": "svg",
"isImage": True,
"encoding": encoding
}
})
break
except UnicodeDecodeError:
continue
# Fallback to binary data if no encoding works
if not contents:
logger.warning(f"Could not decode SVG text, using binary data")
contents.append({
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": "svg",
"contentType": "image/svg+xml",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "svg",
"isImage": True
}
})
except Exception as e:
logger.error(f"Error in alternative SVG decoding: {str(e)}")
# Return binary data as fallback
contents.append({
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": "svg",
"contentType": "image/svg+xml",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "svg",
"isImage": True
}
})
return contents
def extractImageContent(fileName: str, fileContent: bytes, mimeType: str) -> List[Dict[str, Any]]:
"""
Extracts content from image files and optionally generates metadata descriptions.
Args:
fileName: Name of the file
fileContent: Binary data of the file
mimeType: MIME type of the file
Returns:
List of Image-Content objects with base64Encoded = True
"""
# Extract file extension from MIME type or filename
fileExtension = mimeType.split('/')[-1]
if fileExtension == "jpeg":
fileExtension = "jpg"
# If possible, analyze image and extract metadata
imageMetadata = {
"isText": False,
"format": "image"
}
imageDescription = None
try:
_loadImageProcessor()
if imageProcessorLoaded and fileContent and len(fileContent) > 0:
with io.BytesIO(fileContent) as imgStream:
try:
img = Image.open(imgStream)
# Check if the image was actually loaded
img.verify()
# To safely continue working, reload
imgStream.seek(0)
img = Image.open(imgStream)
imageMetadata.update({
"format": img.format,
"mode": img.mode,
"width": img.width,
"height": img.height
})
# Extract EXIF data if available
if hasattr(img, '_getexif') and callable(img._getexif):
exif = img._getexif()
if exif:
exifData = {}
for tagId, value in exif.items():
exifData[f"tag_{tagId}"] = str(value)
imageMetadata["exif"] = exifData
# Generate image description
imageDescription = f"Image ({img.width}x{img.height}, {img.format}, {img.mode})"
except Exception as innerE:
logger.warning(f"Error processing image: {str(innerE)}")
imageMetadata["error"] = str(innerE)
imageDescription = f"Image (unable to process: {str(innerE)})"
except Exception as e:
logger.warning(f"Could not extract image metadata: {str(e)}")
imageMetadata["error"] = str(e)
# Convert binary image to base64
encoded_data = base64.b64encode(fileContent).decode('utf-8')
# Return image content
contents = [{
"sequenceNr": 1,
"name": "1_image", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": encoded_data,
"base64Encoded": True,
"metadata": imageMetadata
}]
# If image description available, add as additional text content
if imageDescription:
contents.append({
"sequenceNr": 2,
"name": "2_text_image_info", # Simplified naming with label
"ext": "txt",
"contentType": "text/plain",
"data": imageDescription,
"base64Encoded": False,
"metadata": {
"isText": True,
"imageDescription": True
}
})
return contents
def extractPdfContent(fileName: str, fileContent: bytes) -> List[Dict[str, Any]]:
"""
Extracts text and images from PDF files.
Args:
fileName: Name of the file
fileContent: Binary data of the file
Returns:
List of PDF-Content objects (text and images) with appropriate base64Encoded flags
"""
contents = []
extractedContentFound = False
try:
# Load PDF extraction libraries
_loadPdfExtractor()
if not pdfExtractorLoaded:
logger.warning("PDF extraction not possible: Libraries not available")
# Add original file as binary content
contents.append({
"sequenceNr": 1,
"name": "1_pdf", # Simplified naming
"ext": "pdf",
"contentType": "application/pdf",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "pdf"
}
})
return contents
# Extract text with PyPDF2
extractedText = ""
pdfMetadata = {}
with io.BytesIO(fileContent) as pdfStream:
pdfReader = PyPDF2.PdfReader(pdfStream)
# Extract metadata
pdfInfo = pdfReader.metadata or {}
for key, value in pdfInfo.items():
if key.startswith('/'):
pdfMetadata[key[1:]] = value
else:
pdfMetadata[key] = value
# Extract text from all pages
for pageNum in range(len(pdfReader.pages)):
page = pdfReader.pages[pageNum]
pageText = page.extract_text()
if pageText:
extractedText += f"--- Page {pageNum + 1} ---\n{pageText}\n\n"
# If text was found, add as separate content
if extractedText.strip():
extractedContentFound = True
contents.append({
"sequenceNr": len(contents) + 1,
"name": f"{len(contents) + 1}_text", # Simplified naming
"ext": "txt",
"contentType": "text/plain",
"data": extractedText,
"base64Encoded": False,
"metadata": {
"isText": True,
"source": "pdf",
"pages": len(pdfReader.pages),
"pdfMetadata": pdfMetadata
}
})
# Extract images with PyMuPDF (fitz)
try:
with io.BytesIO(fileContent) as pdfStream:
doc = fitz.open(stream=pdfStream, filetype="pdf")
imageCount = 0
for pageNum in range(len(doc)):
page = doc[pageNum]
imageList = page.get_images(full=True)
for imgIndex, imgInfo in enumerate(imageList):
try:
imageCount += 1
xref = imgInfo[0]
baseImage = doc.extract_image(xref)
imageBytes = baseImage["image"]
imageExt = baseImage["ext"]
# Add image as content - encode as base64
extractedContentFound = True
contents.append({
"sequenceNr": len(contents) + 1,
"name": f"{len(contents) + 1}_image_page{pageNum+1}_{imgIndex+1}", # Simplified naming with label
"ext": imageExt,
"contentType": f"image/{imageExt}",
"data": base64.b64encode(imageBytes).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"source": "pdf",
"page": pageNum + 1,
"index": imgIndex
}
})
except Exception as imgE:
logger.warning(f"Error extracting image {imgIndex} on page {pageNum + 1}: {str(imgE)}")
# Close document
doc.close()
except Exception as imgExtractE:
logger.warning(f"Error extracting images from PDF: {str(imgExtractE)}")
except Exception as e:
logger.error(f"Error in PDF extraction: {str(e)}")
# If no content was extracted, add the original PDF
if not extractedContentFound:
contents.append({
"sequenceNr": 1,
"name": "1_pdf", # Simplified naming
"ext": "pdf",
"contentType": "application/pdf",
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "pdf"
}
})
return contents
def extractWordContent(fileName: str, fileContent: bytes, mimeType: str) -> List[Dict[str, Any]]:
"""
Extracts text and images from Word documents.
Args:
fileName: Name of the file
fileContent: Binary data of the file
mimeType: MIME type of the file
Returns:
List of Word-Content objects (text and possibly images) with appropriate base64Encoded flags
"""
contents = []
extractedContentFound = False
# Determine file extension
fileExtension = "docx" if mimeType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" else "doc"
try:
# Load Office extraction libraries
_loadOfficeExtractor()
if not officeExtractorLoaded:
logger.warning("Word extraction not possible: Libraries not available")
# Add original file as binary content
contents.append({
"sequenceNr": 1,
"name": "1_word", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "word"
}
})
return contents
# Only supports DOCX (newer format)
if mimeType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
with io.BytesIO(fileContent) as docxStream:
doc = docx.Document(docxStream)
# Extract text
fullText = []
for para in doc.paragraphs:
fullText.append(para.text)
# Extract tables
for table in doc.tables:
for row in table.rows:
rowText = []
for cell in row.cells:
rowText.append(cell.text)
fullText.append(" | ".join(rowText))
extractedText = "\n\n".join(fullText)
# Add extracted text as content
if extractedText.strip():
extractedContentFound = True
contents.append({
"sequenceNr": 1,
"name": "1_text", # Simplified naming
"ext": "txt",
"contentType": "text/plain",
"data": extractedText,
"base64Encoded": False,
"metadata": {
"isText": True,
"source": "docx",
"paragraphCount": len(doc.paragraphs),
"tableCount": len(doc.tables)
}
})
else:
logger.warning(f"Extraction from old Word format (DOC) not supported")
except Exception as e:
logger.error(f"Error in Word extraction: {str(e)}")
# If no content was extracted, add the original document
if not extractedContentFound:
contents.append({
"sequenceNr": 1,
"name": "1_word", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "word"
}
})
return contents
def extractExcelContent(fileName: str, fileContent: bytes, mimeType: str) -> List[Dict[str, Any]]:
"""
Extracts table data from Excel files.
Args:
fileName: Name of the file
fileContent: Binary data of the file
mimeType: MIME type of the file
Returns:
List of Excel-Content objects with appropriate base64Encoded flags
"""
contents = []
extractedContentFound = False
# Determine file extension
fileExtension = "xlsx" if mimeType == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" else "xls"
try:
# Load Office extraction libraries
_loadOfficeExtractor()
if not officeExtractorLoaded:
logger.warning("Excel extraction not possible: Libraries not available")
# Add original file as binary content
contents.append({
"sequenceNr": 1,
"name": "1_excel", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "excel"
}
})
return contents
# Only supports XLSX (newer format)
if mimeType == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
with io.BytesIO(fileContent) as xlsxStream:
workbook = openpyxl.load_workbook(xlsxStream, data_only=True)
# Extract each worksheet as separate CSV content
for sheetIndex, sheetName in enumerate(workbook.sheetnames):
sheet = workbook[sheetName]
# Format data as CSV
csvRows = []
for row in sheet.iter_rows():
csvRow = []
for cell in row:
value = cell.value
if value is None:
csvRow.append("")
else:
csvRow.append(str(value).replace('"', '""'))
csvRows.append(','.join(f'"{cell}"' for cell in csvRow))
csvContent = "\n".join(csvRows)
# Add as CSV content
if csvContent.strip():
extractedContentFound = True
sheetSafeName = sheetName.replace(" ", "_").replace("/", "_").replace("\\", "_")
contents.append({
"sequenceNr": len(contents) + 1,
"name": f"{len(contents) + 1}_csv_{sheetSafeName}", # Simplified naming with sheet label
"ext": "csv",
"contentType": "text/csv",
"data": csvContent,
"base64Encoded": False,
"metadata": {
"isText": True,
"source": "xlsx",
"sheet": sheetName,
"format": "csv"
}
})
else:
logger.warning(f"Extraction from old Excel format (XLS) not supported")
except Exception as e:
logger.error(f"Error in Excel extraction: {str(e)}")
# If no content was extracted, add the original document
if not extractedContentFound:
contents.append({
"sequenceNr": 1,
"name": "1_excel", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "excel"
}
})
return contents
def extractPowerpointContent(fileName: str, fileContent: bytes, mimeType: str) -> List[Dict[str, Any]]:
"""
Extracts content from PowerPoint presentations.
Args:
fileName: Name of the file
fileContent: Binary data of the file
mimeType: MIME type of the file
Returns:
List of PowerPoint-Content objects with base64Encoded = True
"""
# For PowerPoint, we currently only return the original binary file
# A complete extraction would require more specialized libraries
fileExtension = "pptx" if mimeType == "application/vnd.openxmlformats-officedocument.presentationml.presentation" else "ppt"
return [{
"sequenceNr": 1,
"name": "1_powerpoint", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "powerpoint"
}
}]
def extractBinaryContent(fileName: str, fileContent: bytes, mimeType: str) -> List[Dict[str, Any]]:
"""
Fallback for binary files where no specific extraction is possible.
Args:
fileName: Name of the file
fileContent: Binary data of the file
mimeType: MIME type of the file
Returns:
List with a binary Content object with base64Encoded = True
"""
fileExtension = os.path.splitext(fileName)[1][1:] if os.path.splitext(fileName)[1] else "bin"
return [{
"sequenceNr": 1,
"name": "1_binary", # Simplified naming
"ext": fileExtension,
"contentType": mimeType,
"data": base64.b64encode(fileContent).decode('utf-8'),
"base64Encoded": True,
"metadata": {
"isText": False,
"format": "binary"
}
}]

View file

@ -1,123 +0,0 @@
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
# Define the model for attribute definitions
class AttributeDefinition(BaseModel):
name: str
label: str
type: str
required: bool = False
placeholder: Optional[str] = None
defaultValue: Optional[Any] = None
options: Optional[List[Dict[str, Any]]] = None
editable: bool = True
visible: bool = True
order: int = 0
validation: Optional[Dict[str, Any]] = None
helpText: Optional[str] = None
# Helper classes for type mapping
typeMappings = {
"int": "number",
"str": "string",
"float": "number",
"bool": "boolean",
"List[int]": "array",
"List[str]": "array",
"Dict[str, Any]": "object",
"Optional[str]": "string",
"Optional[int]": "number",
"Optional[Dict[str, Any]]": "object"
}
# Special field types based on naming conventions
specialFieldTypes = {
"content": "textarea",
"description": "textarea",
"instructions": "textarea",
"password": "password",
"email": "email",
"workspaceId": "select",
"agentId": "select",
"type": "select"
}
# Function to convert a Pydantic model into attribute definitions
def getModelAttributes(modelClass, userLanguage="de"):
"""
Converts a Pydantic model into a list of AttributeDefinition objects
"""
attributes = []
# Go through all fields in the model
for i, (fieldName, field) in enumerate(modelClass.__fields__.items()):
# Skip internal fields
if fieldName.startswith('_') or fieldName in ["label", "fieldLabels"]:
continue
# Determine the field type
fieldType = typeMappings.get(str(field.type_), "string")
# Check for special field types
if fieldName in specialFieldTypes:
fieldType = specialFieldTypes[fieldName]
# Get the label (if available)
fieldLabel = fieldName.replace('_', ' ').capitalize()
if hasattr(modelClass, 'fieldLabels') and fieldName in modelClass.fieldLabels:
labelObj = modelClass.fieldLabels[fieldName]
fieldLabel = labelObj.getLabel(userLanguage)
# Determine default values and required status
required = field.required
defaultValue = field.default if not field.required else None
# Check for validation rules
validation = None
if field.validators:
validation = {"hasValidators": True}
# Placeholder text
placeholder = f"Please enter {fieldLabel}"
# Special options for Select fields
options = None
if fieldType == "select":
if fieldName == "type" and modelClass.__name__ == "Agent":
options = [
{"value": "Analysis", "label": "Analysis"},
{"value": "Transformation", "label": "Transformation"},
{"value": "Generation", "label": "Generation"},
{"value": "Classification", "label": "Classification"},
{"value": "Custom", "label": "Custom"}
]
# Extract description from Field object
description = None
# Try to get description from various possible sources
if hasattr(field, 'field_info') and hasattr(field.field_info, 'description'):
description = field.field_info.description
elif hasattr(field, 'description'):
description = field.description
elif hasattr(field, 'schema') and hasattr(field.schema, 'description'):
description = field.schema.description
# Create attribute definition
attrDef = AttributeDefinition(
name=fieldName,
label=fieldLabel,
type=fieldType,
required=required,
placeholder=placeholder,
defaultValue=defaultValue,
options=options,
editable=fieldName not in ["id", "mandateId", "userId", "createdAt", "uploadDate"],
visible=fieldName not in ["hashedPassword", "mandateId", "userId"],
order=i,
validation=validation,
helpText=description or "" # Set empty string as default value if no description found
)
attributes.append(attrDef)
return attributes

View file

@ -1,42 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Email Preview: Attached: documentProcessor.py and defAttributes.py</title>
<style>
body { font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5; }
.email-container { max-width: 600px; margin: 20px auto; background-color: white; border: 1px solid #ddd; border-radius: 5px; overflow: hidden; }
.email-header { background-color: #f0f0f0; padding: 15px; border-bottom: 1px solid #ddd; }
.email-content { padding: 20px; }
.email-footer { background-color: #f0f0f0; padding: 15px; border-top: 1px solid #ddd; font-size: 12px; color: #666; }
.field { margin-bottom: 10px; }
.field-label { font-weight: bold; color: #555; }
.email-body { margin-top: 20px; padding-top: 20px; border-top: 1px solid #eee; }
</style>
</head>
<body>
<div class="email-container">
<div class="email-header">
<h2>Email Template Preview</h2>
</div>
<div class="email-content">
<div class="field">
<div class="field-label">To:</div>
<div>recipient@example.com</div>
</div>
<div class="field">
<div class="field-label">Subject:</div>
<div>Attached: documentProcessor.py and defAttributes.py</div>
</div>
<div class="email-body">
<html><body><p>Sehr geehrte Damen und Herren,</p><p>anbei finden Sie die angeforderten Dokumente <strong>documentProcessor.py</strong> und <strong>defAttributes.py</strong>. Bitte zögern Sie nicht, sich bei Fragen oder weiteren Anliegen an uns zu wenden.</p><p>Mit freundlichen Grüßen,<br>Ihr Team</p></body></html>
</div>
</div>
<div class="email-footer">
<p>This is a preview of the email template. The actual email may appear differently in various email clients.</p>
</div>
</div>
</body>
</html>

View file

@ -1,6 +0,0 @@
{
"recipient": "recipient@example.com",
"subject": "Attached: documentProcessor.py and defAttributes.py",
"plainBody": "Sehr geehrte Damen und Herren,\n\nanbei finden Sie die angeforderten Dokumente 'documentProcessor.py' und 'defAttributes.py'. Bitte z\u00f6gern Sie nicht, sich bei Fragen oder weiteren Anliegen an uns zu wenden.\n\nMit freundlichen Gr\u00fc\u00dfen,\n\nIhr Team",
"htmlBody": "<html><body><p>Sehr geehrte Damen und Herren,</p><p>anbei finden Sie die angeforderten Dokumente <strong>documentProcessor.py</strong> und <strong>defAttributes.py</strong>. Bitte z\u00f6gern Sie nicht, sich bei Fragen oder weiteren Anliegen an uns zu wenden.</p><p>Mit freundlichen Gr\u00fc\u00dfen,<br>Ihr Team</p></body></html>"
}

View file

@ -1,42 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Email Preview: Angehängt: documentProcessor.py und defAttributes.py</title>
<style>
body { font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5; }
.email-container { max-width: 600px; margin: 20px auto; background-color: white; border: 1px solid #ddd; border-radius: 5px; overflow: hidden; }
.email-header { background-color: #f0f0f0; padding: 15px; border-bottom: 1px solid #ddd; }
.email-content { padding: 20px; }
.email-footer { background-color: #f0f0f0; padding: 15px; border-top: 1px solid #ddd; font-size: 12px; color: #666; }
.field { margin-bottom: 10px; }
.field-label { font-weight: bold; color: #555; }
.email-body { margin-top: 20px; padding-top: 20px; border-top: 1px solid #eee; }
</style>
</head>
<body>
<div class="email-container">
<div class="email-header">
<h2>Email Template Preview</h2>
</div>
<div class="email-content">
<div class="field">
<div class="field-label">To:</div>
<div>team@example.com</div>
</div>
<div class="field">
<div class="field-label">Subject:</div>
<div>Angehängt: documentProcessor.py und defAttributes.py</div>
</div>
<div class="email-body">
<!DOCTYPE html><html><head><meta charset="UTF-8"><style>body { background-color: #f4f4f4; font-family: Arial, sans-serif; } .email-container { background-color: #ffffff; border: 1px solid #dddddd; border-radius: 5px; margin: 20px auto; padding: 20px; max-width: 600px; } .email-header { background-color: #eeeeee; padding: 10px; text-align: center; font-weight: bold; } .email-content { margin: 20px 0; } .email-footer { font-size: 12px; color: #888888; text-align: center; }</style></head><body><div class="email-container"><div class="email-header">E-Mail-Vorschau</div><div class="email-content"><p>Liebe Teammitglieder,</p><p>im Anhang finden Sie die Dateien <strong>documentProcessor.py</strong> und <strong>defAttributes.py</strong>. Bitte überprüfen Sie diese und geben Sie mir Ihr Feedback.</p><p>Mit freundlichen Grüßen,<br>[Ihr Name]</p></div><div class="email-footer">Dies ist eine Vorschau der E-Mail und kann in verschiedenen E-Mail-Clients unterschiedlich angezeigt werden.</div></div></body></html>
</div>
</div>
<div class="email-footer">
<p>This is a preview of the email template. The actual email may appear differently in various email clients.</p>
</div>
</div>
</body>
</html>

View file

@ -1,6 +0,0 @@
{
"recipient": "team@example.com",
"subject": "Angeh\u00e4ngt: documentProcessor.py und defAttributes.py",
"plainBody": "Liebe Teammitglieder,\n\nim Anhang finden Sie die Dateien documentProcessor.py und defAttributes.py. Bitte \u00fcberpr\u00fcfen Sie diese und geben Sie mir Ihr Feedback.\n\nMit freundlichen Gr\u00fc\u00dfen,\n[Ihr Name]",
"htmlBody": "<!DOCTYPE html><html><head><meta charset=\"UTF-8\"><style>body { background-color: #f4f4f4; font-family: Arial, sans-serif; } .email-container { background-color: #ffffff; border: 1px solid #dddddd; border-radius: 5px; margin: 20px auto; padding: 20px; max-width: 600px; } .email-header { background-color: #eeeeee; padding: 10px; text-align: center; font-weight: bold; } .email-content { margin: 20px 0; } .email-footer { font-size: 12px; color: #888888; text-align: center; }</style></head><body><div class=\"email-container\"><div class=\"email-header\">E-Mail-Vorschau</div><div class=\"email-content\"><p>Liebe Teammitglieder,</p><p>im Anhang finden Sie die Dateien <strong>documentProcessor.py</strong> und <strong>defAttributes.py</strong>. Bitte \u00fcberpr\u00fcfen Sie diese und geben Sie mir Ihr Feedback.</p><p>Mit freundlichen Gr\u00fc\u00dfen,<br>[Ihr Name]</p></div><div class=\"email-footer\">Dies ist eine Vorschau der E-Mail und kann in verschiedenen E-Mail-Clients unterschiedlich angezeigt werden.</div></div></body></html>"
}

View file

@ -1,42 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Email Preview: Neuer Termin für unser Meeting</title>
<style>
body { font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5; }
.email-container { max-width: 600px; margin: 20px auto; background-color: white; border: 1px solid #ddd; border-radius: 5px; overflow: hidden; }
.email-header { background-color: #f0f0f0; padding: 15px; border-bottom: 1px solid #ddd; }
.email-content { padding: 20px; }
.email-footer { background-color: #f0f0f0; padding: 15px; border-top: 1px solid #ddd; font-size: 12px; color: #666; }
.field { margin-bottom: 10px; }
.field-label { font-weight: bold; color: #555; }
.email-body { margin-top: 20px; padding-top: 20px; border-top: 1px solid #eee; }
</style>
</head>
<body>
<div class="email-container">
<div class="email-header">
<h2>Email Template Preview</h2>
</div>
<div class="email-content">
<div class="field">
<div class="field-label">To:</div>
<div>peter.muster@domain.com</div>
</div>
<div class="field">
<div class="field-label">Subject:</div>
<div>Neuer Termin für unser Meeting</div>
</div>
<div class="email-body">
<p>Sehr geehrter Herr Muster,</p><p>ich hoffe, es geht Ihnen gut. Ich schreibe Ihnen, um unser geplantes Meeting von 10 Uhr auf Freitag zu verschieben. Bitte lassen Sie mich wissen, ob dieser neue Termin für Sie passt.</p><p>Vielen Dank für Ihr Verständnis.</p><p>Mit freundlichen Grüßen,<br>[Ihr Name]</p>
</div>
</div>
<div class="email-footer">
<p>This is a preview of the email template. The actual email may appear differently in various email clients.</p>
</div>
</div>
</body>
</html>

View file

@ -1,295 +0,0 @@
Comprehensive Workflow and Team Roles in Product Development
============================================================
# Introduction to Comprehensive Workflow and Team Roles in Product Development
## Purpose and Scope
This guide, "Comprehensive Workflow and Team Roles in Product Development," is meticulously crafted to serve as an essential resource for professionals involved in the intricate process of product development. It aims to provide a detailed exploration of the workflows and team roles that are pivotal in transforming innovative ideas into successful products. By delving into the structured processes and collaborative dynamics that drive product development, this guide offers valuable insights into optimizing efficiency and effectiveness within technical teams.
## Context and Background
In today's fast-paced technological landscape, the development of a product from concept to market-ready status involves a complex interplay of various teams and tools. Each team, whether it be product management, engineering, quality assurance (QA), or operations, plays a critical role in ensuring that the product not only meets market demands but also adheres to high standards of quality and functionality. The integration of sophisticated tools for ticketing, roadmaps, and management dashboards further enhances the ability of these teams to coordinate and execute their tasks with precision.
## Document Outline
Readers of this guide will embark on a comprehensive journey through the product development lifecycle. The document is structured to provide:
1. **An Overview of Product Development Workflow**: A detailed examination of the stages involved in product development, from initial concept through to deployment and maintenance.
2. **Team Roles and Responsibilities**: Insight into the specific roles and responsibilities of the product, engineering, QA, and operations teams, highlighting how each contributes to the overall success of the product.
3. **Tool Integration**: An analysis of the tools that facilitate seamless workflow management, including ticketing systems, roadmap planning tools, and management dashboards, and how they integrate into the daily operations of development teams.
4. **Best Practices and Case Studies**: Practical examples and case studies that illustrate successful implementations of workflows and team collaborations in real-world scenarios.
## Tone and Audience
This guide is tailored for a technical audience, including product managers, engineers, QA specialists, and operations professionals. The tone is formal and professional, designed to engage readers who are seeking to deepen their understanding of product development processes and enhance their team's performance. By providing a structured and insightful examination of workflows and roles, this guide aims to empower technical teams to achieve greater synergy and success in their product development endeavors.
Introduction
------------
# Introduction
In the rapidly evolving landscape of product development, understanding the intricate workflow and the pivotal roles played by various teams is essential for achieving success. This guide, "Comprehensive Workflow and Team Roles in Product Development," aims to provide a detailed exploration of the processes and team dynamics that drive product innovation from conception to deployment. By delving into the workflow stages and the integration of essential tools, this guide serves as an invaluable resource for technical professionals seeking to optimize their product development strategies.
## Overview of the Product Development Workflow
The product development workflow is a structured sequence of stages that transforms initial ideas into market-ready products. This workflow is designed to ensure that each phase of development is meticulously planned and executed, minimizing risks and maximizing efficiency. The workflow typically begins with the **Input** stage, where ideas are sourced from customers, sales teams, and internal brainstorming sessions. These inputs serve as the foundation for the subsequent stages, guiding the product team in aligning development efforts with market needs and business objectives.
The workflow progresses through several critical stages, each involving specific teams and processes:
1. **Product Team:**
- **Discover:**
- **Collect:** The product team gathers a diverse array of ideas and inputs, ensuring a comprehensive understanding of potential opportunities.
- **Qualify:** Ideas are analyzed and matched against business goals, market trends, and feasibility, allowing the team to prioritize the most promising concepts.
2. **Engineering Team:**
- **Design and Develop:** The engineering team translates qualified ideas into technical specifications and begins the development process, focusing on creating robust and scalable solutions.
3. **Q&A Team:**
- **Test and Validate:** Quality assurance plays a crucial role in ensuring that the product meets the highest standards of quality and functionality. Rigorous testing and validation processes are employed to identify and rectify any issues before release.
4. **Operations:**
- **Deploy and Monitor:** The operations team is responsible for deploying the product to the market and continuously monitoring its performance. This stage involves the integration of feedback loops to facilitate ongoing improvements and adaptations.
## Importance of Team Roles and Tool Integration
The success of the product development process hinges on the effective collaboration and coordination of various teams, each contributing their unique expertise and perspectives. The **Product Team** is tasked with strategic planning and market alignment, while the **Engineering Team** focuses on technical execution. The **Q&A Team** ensures quality assurance, and the **Operations Team** manages deployment and performance monitoring.
In addition to clearly defined roles, the integration of specialized tools is crucial for streamlining workflows and enhancing productivity. Tools for **ticketing** facilitate efficient task management and communication across teams, ensuring that issues are promptly addressed and resolved. **Roadmaps** provide a visual representation of the product development timeline, helping teams stay aligned with project goals and deadlines. **Management dashboards** offer real-time insights into project progress and performance metrics, enabling informed decision-making and strategic adjustments.
By leveraging these tools, teams can enhance collaboration, improve transparency, and maintain a cohesive approach to product development. This integration not only optimizes the workflow but also empowers teams to deliver high-quality products that meet customer expectations and drive business success.
In conclusion, understanding the comprehensive workflow and the critical roles of each team in product development is essential for navigating the complexities of modern product innovation. This guide will delve deeper into each aspect, providing technical professionals with the knowledge and tools needed to excel in their roles and contribute to successful product outcomes.
Teams Involved
--------------
# Teams Involved
In the complex landscape of product development, multiple teams collaborate to ensure the successful delivery of a product from conception to deployment. Each team plays a critical role in the workflow, contributing their expertise to different stages of the process. This section provides a detailed overview of the roles and responsibilities of the key teams involved: the Product Team, the Engineering Team, the QA Team, and the Operations Team.
## Product Team
The Product Team is at the forefront of the product development process, responsible for setting the vision and direction of the product. Their roles include:
- **Discover:**
- **Collect:** The Product Team gathers ideas and inputs from various sources, including customers, sales teams, and internal stakeholders. This stage is crucial for understanding market needs and identifying potential opportunities.
- **Qualify:** Once ideas are collected, the team analyzes them to ensure alignment with business objectives and feasibility. This involves evaluating the potential impact and prioritizing ideas based on strategic goals.
- **Define:**
- **Roadmap Creation:** The Product Team develops a product roadmap that outlines the strategic direction and key milestones. This roadmap serves as a guiding document for all teams involved in the development process.
- **Requirements Specification:** Detailed product requirements are documented, providing clear guidance for the Engineering Team. This includes user stories, acceptance criteria, and any necessary technical specifications.
## Engineering Team
The Engineering Team is responsible for transforming the product vision into a tangible, functional product. Their roles encompass:
- **Design and Development:**
- **Architecture Design:** Engineers design the system architecture, ensuring scalability, reliability, and performance. This involves selecting appropriate technologies and frameworks.
- **Implementation:** The team writes code and develops features according to the specifications provided by the Product Team. They ensure that the product is built to meet the defined requirements.
- **Integration:**
- **Tool Integration:** Engineers integrate various tools for ticketing, roadmaps, and management dashboards to streamline the development process and enhance collaboration across teams.
- **Continuous Integration/Continuous Deployment (CI/CD):** The team implements CI/CD pipelines to automate testing and deployment, ensuring rapid and reliable delivery of new features and updates.
## QA Team
The QA (Quality Assurance) Team plays a pivotal role in maintaining the quality and reliability of the product. Their responsibilities include:
- **Testing:**
- **Test Planning:** The QA Team develops comprehensive test plans that cover all aspects of the product, including functionality, performance, and security.
- **Execution:** They conduct various types of testing, such as unit testing, integration testing, and user acceptance testing, to identify and resolve defects before the product reaches the end-users.
- **Quality Control:**
- **Defect Management:** The team tracks and manages defects using ticketing systems, ensuring that issues are addressed promptly and effectively.
- **Continuous Improvement:** QA professionals analyze testing outcomes to identify areas for improvement, contributing to the enhancement of product quality over time.
## Operations Team
The Operations Team ensures that the product is deployed smoothly and operates efficiently in the production environment. Their roles include:
- **Deployment:**
- **Release Management:** The Operations Team manages the release process, coordinating with other teams to ensure that deployments are executed without disruptions.
- **Environment Configuration:** They configure and maintain the production environment, ensuring that it meets the necessary requirements for optimal performance.
- **Monitoring and Support:**
- **System Monitoring:** The team implements monitoring tools to track system performance and detect issues in real-time. This proactive approach helps in maintaining high availability and reliability.
- **Incident Response:** In the event of system failures or performance issues, the Operations Team is responsible for incident management and resolution, minimizing downtime and impact on users.
Each team plays a vital role in the product development lifecycle, and their collaboration is essential for delivering high-quality products that meet customer needs and business objectives. By integrating tools and processes effectively, these teams ensure a seamless workflow from ideation to deployment.
Workflow Stages
---------------
# Workflow Stages
In the product development lifecycle, understanding the workflow stages is crucial for ensuring seamless collaboration among various teams and achieving successful product outcomes. This section provides a detailed overview of the workflow stages, focusing on the roles of the product, engineering, QA, and operations teams, and the integration of tools for ticketing, roadmaps, and management dashboards.
## Input
The initial stage of the workflow involves gathering inputs from various sources to fuel the product development process. These inputs are critical for identifying potential opportunities and challenges.
- **Sources:**
- **Customers:** Feedback and suggestions from end-users provide valuable insights into product improvements and new features.
- **Sales:** Information from the sales team highlights market demands and competitive landscape, guiding product prioritization.
- **Internal Ideas:** Contributions from team members across the organization can lead to innovative solutions and enhancements.
## Product Team Processes
The product team plays a pivotal role in transforming raw inputs into actionable plans. This stage is divided into several key processes:
- **Discover:**
- **Collect:** The product team gathers ideas and inputs from various sources, ensuring a comprehensive understanding of user needs and market trends.
- **Qualify:** Ideas are analyzed and matched against business objectives and feasibility to determine their potential impact and alignment with the company's vision.
- **Define:**
- **Prioritize:** The team prioritizes ideas based on strategic importance, resource availability, and potential ROI.
- **Plan:** Detailed plans are developed, outlining the scope, objectives, and timelines for each initiative.
- **Shape:**
- **Design:** The product team collaborates with designers to create wireframes and prototypes, ensuring the proposed solutions are user-friendly and effective.
- **Specification:** Detailed specifications are documented, providing clear guidance for the engineering team.
## Engineering Team Processes
Once the product team has defined and shaped the product, the engineering team takes over to assess and develop the technical aspects.
- **Assessment:**
- **Feasibility Study:** Engineers evaluate the technical feasibility of the proposed solutions, identifying potential challenges and resource requirements.
- **Technical Planning:** A detailed technical plan is created, outlining the architecture, technologies, and tools to be used.
- **Development:**
- **Implementation:** The engineering team begins coding and building the product, adhering to the specifications and timelines.
- **Integration:** New features and updates are integrated into the existing system, ensuring compatibility and performance.
## QA Team Processes
Quality assurance is a critical stage in the workflow, ensuring that the product meets the highest standards before deployment.
- **Testing:**
- **Unit Testing:** Individual components are tested to ensure they function correctly in isolation.
- **Integration Testing:** The product is tested as a whole to verify that all components work together seamlessly.
- **Validation:**
- **User Acceptance Testing (UAT):** The product is tested in real-world scenarios to validate its functionality and usability.
- **Bug Fixing:** Any issues identified during testing are addressed and resolved promptly.
## Operations Team Processes
The final stage involves deploying the product and monitoring its performance in the live environment.
- **Deployment:**
- **Release Management:** The operations team manages the release process, ensuring a smooth transition from development to production.
- **Configuration:** The product is configured for optimal performance and security in the live environment.
- **Monitoring:**
- **Performance Monitoring:** Continuous monitoring of the product's performance helps identify and address any issues proactively.
- **Feedback Loop:** Feedback from users and performance data are collected to inform future improvements and updates.
In conclusion, each stage of the workflow is integral to the success of product development. By clearly defining roles and processes, and leveraging tools for ticketing, roadmaps, and management dashboards, teams can collaborate effectively to deliver high-quality products that meet user needs and business objectives.
Tool Integration
----------------
Title: Tool Integration
In the realm of product development, the integration of various tools is crucial to streamline processes, enhance collaboration, and ensure efficient workflow management. This section delves into the essential tools used in product development, focusing on ticketing systems, roadmap tools, and management dashboards. Each tool plays a pivotal role in facilitating communication and coordination among the product, engineering, QA, and operations teams.
## Ticketing Systems
Ticketing systems are the backbone of issue tracking and task management within product development. These systems enable teams to log, prioritize, and track the progress of tasks and issues throughout the development lifecycle.
### Key Features:
- **Issue Tracking:** Allows teams to report bugs, feature requests, and other tasks, ensuring nothing falls through the cracks.
- **Prioritization:** Facilitates the organization of tasks based on urgency and importance, helping teams focus on high-impact work.
- **Collaboration:** Provides a platform for team members to discuss issues, share updates, and collaborate on solutions.
- **Integration:** Often integrates with other tools such as version control systems and CI/CD pipelines to provide a seamless workflow.
### Examples:
- **Jira:** Widely used for its robust features and flexibility, Jira supports agile methodologies and offers extensive customization options.
- **Zendesk:** Known for its customer support capabilities, Zendesk also provides ticketing solutions that integrate customer feedback directly into the development process.
## Roadmap Tools
Roadmap tools are essential for strategic planning and communication of product vision and progress. They help align the product teams efforts with business goals and provide a clear timeline for stakeholders.
### Key Features:
- **Visualization:** Offers visual representations of product timelines, milestones, and dependencies, making it easier to communicate plans.
- **Collaboration:** Enables cross-functional teams to contribute to and update the roadmap, ensuring alignment across departments.
- **Flexibility:** Allows for adjustments as priorities shift, ensuring the roadmap remains relevant and actionable.
### Examples:
- **Aha!:** A comprehensive tool that supports product strategy, planning, and roadmapping, with features for capturing ideas and aligning them with business objectives.
- **ProductPlan:** Known for its intuitive interface, ProductPlan allows teams to create and share roadmaps easily, facilitating stakeholder engagement.
## Management Dashboards
Management dashboards provide a high-level overview of project status, performance metrics, and key performance indicators (KPIs). They are crucial for decision-making and ensuring that projects stay on track.
### Key Features:
- **Real-Time Data:** Offers up-to-date insights into project progress, resource allocation, and team performance.
- **Customization:** Allows managers to tailor dashboards to display the most relevant data for their specific needs.
- **Integration:** Connects with various data sources to provide a comprehensive view of the project landscape.
### Examples:
- **Tableau:** A powerful analytics platform that enables the creation of interactive dashboards, providing deep insights into project data.
- **Power BI:** Microsoft's business analytics service that delivers robust data visualization and reporting capabilities, integrating seamlessly with other Microsoft tools.
In conclusion, the integration of ticketing systems, roadmap tools, and management dashboards is vital for the efficient operation of product development teams. These tools not only enhance communication and collaboration but also provide the necessary infrastructure to manage complex workflows and align team efforts with strategic objectives. By leveraging these tools, teams can ensure a more organized, transparent, and effective product development process.
Conclusion
----------
# Conclusion
In this guide, we have explored the intricate workflow and team roles that are essential in the product development process. By understanding these components, teams can enhance their efficiency and effectiveness in bringing products to market. This conclusion will summarize the key aspects of the workflow and team roles, as well as highlight the benefits of integrating tools that facilitate seamless collaboration and management.
## Summary of Workflow and Team Roles
The product development process is a collaborative effort that involves multiple teams, each with distinct roles and responsibilities. The workflow begins with the **Product Team**, which is responsible for the discovery phase. This phase involves collecting inputs from various sources such as customers, sales, and internal ideas, and qualifying these inputs against business objectives and market needs.
Once the product requirements are defined, the **Engineering Team** takes over to design and develop the product. This stage is critical as it transforms ideas into tangible solutions. The engineering team works closely with the product team to ensure that the technical specifications align with the product vision.
The **Quality Assurance (QA) Team** plays a pivotal role in maintaining the integrity of the product. Through rigorous testing and validation, the QA team ensures that the product meets the required standards and functions as intended. Their feedback is crucial for identifying and rectifying defects before the product reaches the market.
Finally, the **Operations Team** is responsible for deploying and maintaining the product. They ensure that the product is delivered efficiently and that any operational issues are promptly addressed. This team also monitors the product's performance and gathers data to inform future development cycles.
## Benefits of Integrated Tools
The integration of tools for ticketing, roadmaps, and management dashboards significantly enhances the product development process. These tools provide a centralized platform for tracking progress, managing tasks, and facilitating communication across teams.
- **Ticketing Systems**: These systems streamline issue tracking and resolution, allowing teams to prioritize tasks and allocate resources effectively. By maintaining a clear record of issues and resolutions, teams can improve their response times and reduce downtime.
- **Roadmaps**: Product roadmaps offer a strategic overview of the product's development trajectory. They help teams align their efforts with long-term goals and ensure that all stakeholders are informed of the product's progress and future direction.
- **Management Dashboards**: Dashboards provide real-time insights into the development process, enabling managers to make informed decisions. They offer visibility into key performance indicators (KPIs) and facilitate the identification of bottlenecks or areas for improvement.
In conclusion, a well-defined workflow and clear team roles are fundamental to successful product development. By leveraging integrated tools, teams can enhance their collaboration, streamline processes, and ultimately deliver high-quality products that meet market demands. As organizations continue to evolve, embracing these practices will be crucial for maintaining a competitive edge in the ever-changing landscape of product development.
CONCLUSION
----------
Conclusion
In this guide, "Comprehensive Workflow and Team Roles in Product Development," we have explored the intricate processes and collaborative efforts that drive successful product development. By dissecting the workflow, we have highlighted the critical roles played by various teams, including product management, engineering, quality assurance (QA), and operations. Each team's responsibilities are pivotal in ensuring that the product development lifecycle is efficient, effective, and aligned with organizational goals.
Key Points Summary:
- **Product Development Workflow**: We detailed the sequential and iterative processes involved in product development, emphasizing the importance of clear communication and structured phases from ideation to deployment.
- **Team Roles**: The guide outlined the specific roles and responsibilities of the product, engineering, QA, and operations teams. Each team contributes uniquely to the development process, ensuring that products are not only built to specifications but also meet quality standards and operational requirements.
- **Tool Integration**: We discussed the integration of various tools that facilitate seamless workflow management. Tools for ticketing, roadmaps, and management dashboards play a crucial role in tracking progress, managing tasks, and ensuring transparency across teams.
Closure and Recommendations:
Understanding the workflow and team roles in product development is essential for any organization aiming to enhance its product delivery capabilities. By implementing structured processes and fostering collaboration across teams, organizations can improve efficiency, reduce time-to-market, and increase product quality. It is recommended that teams continuously evaluate and refine their workflows and tool integrations to adapt to evolving project needs and technological advancements.
Next Steps:
- Encourage cross-functional training to enhance team collaboration and understanding of each other's roles.
- Regularly review and update tool integrations to ensure they align with current project requirements and industry standards.
- Foster a culture of continuous improvement by soliciting feedback from all team members and stakeholders.
Significance:
This guide serves as a foundational resource for technical teams and project managers involved in product development. By providing a comprehensive overview of workflows and team roles, it equips readers with the knowledge to optimize their development processes and achieve successful product outcomes. As the landscape of product development continues to evolve, staying informed and adaptable will be key to maintaining a competitive edge.
Thank you for engaging with this guide. We hope it serves as a valuable asset in your product development endeavors.

View file

@ -1,6 +0,0 @@
{
"recipient": "peter.muster@domain.com",
"subject": "Neuer Termin f\u00fcr unser Meeting",
"plainBody": "Sehr geehrter Herr Muster,\n\nich hoffe, es geht Ihnen gut. Ich schreibe Ihnen, um unser geplantes Meeting von 10 Uhr auf Freitag zu verschieben. Bitte lassen Sie mich wissen, ob dieser neue Termin f\u00fcr Sie passt.\n\nVielen Dank f\u00fcr Ihr Verst\u00e4ndnis.\n\nMit freundlichen Gr\u00fc\u00dfen,\n\n[Ihr Name]",
"htmlBody": "<p>Sehr geehrter Herr Muster,</p><p>ich hoffe, es geht Ihnen gut. Ich schreibe Ihnen, um unser geplantes Meeting von 10 Uhr auf Freitag zu verschieben. Bitte lassen Sie mich wissen, ob dieser neue Termin f\u00fcr Sie passt.</p><p>Vielen Dank f\u00fcr Ihr Verst\u00e4ndnis.</p><p>Mit freundlichen Gr\u00fc\u00dfen,<br>[Ihr Name]</p>"
}

View file

@ -1,38 +0,0 @@
inputFiles = [] # DO NOT CHANGE THIS LINE
def is_prime(n):
if n <= 1:
return False
if n <= 3:
return True
if n % 2 == 0 or n % 3 == 0:
return False
i = 5
while i * i <= n:
if n % i == 0 or n % (i + 2) == 0:
return False
i += 6
return True
def generate_primes(limit):
primes = []
num = 2
while len(primes) < limit:
if is_prime(num):
primes.append(num)
num += 1
return primes
primes = generate_primes(1000)
prime_numbers_content = "\n".join(map(str, primes))
result = {
"prime_numbers.txt": {
"content": prime_numbers_content,
"base64Encoded": False,
"contentType": "text/plain"
}
}
import json
print(json.dumps(result))

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

View file

@ -1,42 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Email Preview: Verspätete Ankunft morgen</title>
<style>
body { font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5; }
.email-container { max-width: 600px; margin: 20px auto; background-color: white; border: 1px solid #ddd; border-radius: 5px; overflow: hidden; }
.email-header { background-color: #f0f0f0; padding: 15px; border-bottom: 1px solid #ddd; }
.email-content { padding: 20px; }
.email-footer { background-color: #f0f0f0; padding: 15px; border-top: 1px solid #ddd; font-size: 12px; color: #666; }
.field { margin-bottom: 10px; }
.field-label { font-weight: bold; color: #555; }
.email-body { margin-top: 20px; padding-top: 20px; border-top: 1px solid #eee; }
</style>
</head>
<body>
<div class="email-container">
<div class="email-header">
<h2>Email Template Preview</h2>
</div>
<div class="email-content">
<div class="field">
<div class="field-label">To:</div>
<div>i.dittrich@valueon.ch</div>
</div>
<div class="field">
<div class="field-label">Subject:</div>
<div>Verspätete Ankunft morgen</div>
</div>
<div class="email-body">
<p>Hallo Ida,</p><p>ich wollte dich nur kurz informieren, dass ich morgen etwas später ankommen werde. Ich hoffe, das ist in Ordnung.</p><p>Bis dann!</p><p>Viele Grüße</p>
</div>
</div>
<div class="email-footer">
<p>This is a preview of the email template. The actual email may appear differently in various email clients.</p>
</div>
</div>
</body>
</html>

View file

@ -1,6 +0,0 @@
{
"recipient": "i.dittrich@valueon.ch",
"subject": "Versp\u00e4tete Ankunft morgen",
"plainBody": "Hallo Ida,\n\nich wollte dich nur kurz informieren, dass ich morgen etwas sp\u00e4ter ankommen werde. Ich hoffe, das ist in Ordnung.\n\nBis dann!\n\nViele Gr\u00fc\u00dfe",
"htmlBody": "<p>Hallo Ida,</p><p>ich wollte dich nur kurz informieren, dass ich morgen etwas sp\u00e4ter ankommen werde. Ich hoffe, das ist in Ordnung.</p><p>Bis dann!</p><p>Viele Gr\u00fc\u00dfe</p>"
}

View file

@ -1,74 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Email Preview: Verspätete Ankunft morgen</title>
<style>
body { font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f5f5f5; }
.email-container { max-width: 600px; margin: 20px auto; background-color: white; border: 1px solid #ddd; border-radius: 5px; overflow: hidden; }
.email-header { background-color: #f0f0f0; padding: 15px; border-bottom: 1px solid #ddd; }
.email-content { padding: 20px; }
.email-footer { background-color: #f0f0f0; padding: 15px; border-top: 1px solid #ddd; font-size: 12px; color: #666; }
.field { margin-bottom: 10px; }
.field-label { font-weight: bold; color: #555; }
.email-body { margin-top: 20px; padding-top: 20px; border-top: 1px solid #eee; }
</style>
</head>
<body>
<div class="email-container">
<div class="email-header">
<h2>Email Template Preview</h2>
</div>
<div class="email-content">
<div class="field">
<div class="field-label">To:</div>
<div>i.dittrich@valueon.ch</div>
</div>
<div class="field">
<div class="field-label">Subject:</div>
<div>Verspätete Ankunft morgen</div>
</div>
<div class="email-body">
<html>
<head>
<meta charset="UTF-8">
<title>Email Preview: Verspätete Ankunft morgen</title>
<style>
body { font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f4f4f4; }
.email-container { max-width: 600px; margin: 20px auto; background-color: #ffffff; border-radius: 8px; overflow: hidden; box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); }
.email-header { background-color: #007BFF; color: #ffffff; padding: 10px; text-align: center; }
.email-content { padding: 20px; }
.email-footer { font-size: 12px; color: #777777; text-align: center; padding: 10px; }
</style>
</head>
<body>
<div class="email-container">
<div class="email-header">
<h1>Email Template Preview</h1>
</div>
<div class="email-content">
<p><strong>To:</strong> i.dittrich@valueon.ch</p>
<p><strong>Subject:</strong> Verspätete Ankunft morgen</p>
<div class="email-body">
<p>Hallo Ida,</p>
<p>ich wollte dich nur kurz informieren, dass ich morgen etwas später ankommen werde. Ich hoffe, das ist in Ordnung.</p>
<p>Bis dann!</p>
<p>Viele Grüße</p>
</div>
</div>
<div class="email-footer">
<p>Dies ist eine Vorschau des E-Mail-Templates.</p>
</div>
</div>
</body>
</html>
</div>
</div>
<div class="email-footer">
<p>This is a preview of the email template. The actual email may appear differently in various email clients.</p>
</div>
</div>
</body>
</html>

View file

@ -1,6 +0,0 @@
{
"recipient": "i.dittrich@valueon.ch",
"subject": "Versp\u00e4tete Ankunft morgen",
"plainBody": "Hallo Ida,\n\nich wollte dich nur kurz informieren, dass ich morgen etwas sp\u00e4ter ankommen werde. Ich hoffe, das ist in Ordnung.\n\nBis dann!\n\nViele Gr\u00fc\u00dfe",
"htmlBody": "<html>\n<head>\n<meta charset=\"UTF-8\">\n<title>Email Preview: Versp\u00e4tete Ankunft morgen</title>\n<style>\n body { font-family: Arial, sans-serif; margin: 0; padding: 0; background-color: #f4f4f4; }\n .email-container { max-width: 600px; margin: 20px auto; background-color: #ffffff; border-radius: 8px; overflow: hidden; box-shadow: 0 0 10px rgba(0, 0, 0, 0.1); }\n .email-header { background-color: #007BFF; color: #ffffff; padding: 10px; text-align: center; }\n .email-content { padding: 20px; }\n .email-footer { font-size: 12px; color: #777777; text-align: center; padding: 10px; }\n</style>\n</head>\n<body>\n<div class=\"email-container\">\n <div class=\"email-header\">\n <h1>Email Template Preview</h1>\n </div>\n <div class=\"email-content\">\n <p><strong>To:</strong> i.dittrich@valueon.ch</p>\n <p><strong>Subject:</strong> Versp\u00e4tete Ankunft morgen</p>\n <div class=\"email-body\">\n <p>Hallo Ida,</p>\n <p>ich wollte dich nur kurz informieren, dass ich morgen etwas sp\u00e4ter ankommen werde. Ich hoffe, das ist in Ordnung.</p>\n <p>Bis dann!</p>\n <p>Viele Gr\u00fc\u00dfe</p>\n </div>\n </div>\n <div class=\"email-footer\">\n <p>Dies ist eine Vorschau des E-Mail-Templates.</p>\n </div>\n</div>\n</body>\n</html>"
}

View file

@ -1 +0,0 @@
{"access_token": "eyJ0eXAiOiJKV1QiLCJub25jZSI6IlZXdlYzdDhtMTIyR19WNXF0ZU9sRGc0WjlBUkNBZkNCMHZER0hucmJWYlEiLCJhbGciOiJSUzI1NiIsIng1dCI6IkNOdjBPSTNSd3FsSEZFVm5hb01Bc2hDSDJYRSIsImtpZCI6IkNOdjBPSTNSd3FsSEZFVm5hb01Bc2hDSDJYRSJ9.eyJhdWQiOiIwMDAwMDAwMy0wMDAwLTAwMDAtYzAwMC0wMDAwMDAwMDAwMDAiLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC82YTUxYWFlYi0yNDY3LTQxODYtOTUwNC0yYTA1YWVkYzU5MWYvIiwiaWF0IjoxNzQ2NTk5NTA5LCJuYmYiOjE3NDY1OTk1MDksImV4cCI6MTc0NjYwNTE1NywiYWNjdCI6MCwiYWNyIjoiMSIsImFjcnMiOlsicDEiXSwiYWlvIjoiQWFRQVcvOFpBQUFBa2NUUldzUVFTZVV3eVY0cS91Z0w0NDhndEQwb1prZ3paKzgxVDdaN1k0VWhDV1RwREF6OUwrdlYvN2V5SW9sMG5zVXQvY2N1U3NuMjhXenlMd2szWWpvZGM3ajZrb2dGeW5hU0owcE0vTTl1VXM1NDMrQ3k4cDBZRExhTzF4djFCcDRmRVdkMUd3bDRsZ0VROUtFNndjazFMY25raWRTQUM0djA3V3k1RUw3SDV4MUNKY3cyOXYrcU9Dc1VDdkNIYnN0a2JGbzdoZ3NvY0w3b0ZuTVh3Zz09IiwiYW1yIjpbInB3ZCIsInJzYSIsIm1mYSJdLCJhcHBfZGlzcGxheW5hbWUiOiJQTSBUZXN0IC0gRW1haWwgRHJhZnQiLCJhcHBpZCI6ImM3ZTcxMTJkLTYxZGMtNGYzYS04Y2QzLTA4Y2M0Y2Q3NTA0YyIsImFwcGlkYWNyIjoiMSIsImRldmljZWlkIjoiOWE0YTM2OWEtNjBhOS00NjdlLWFjNTktODdkZGQyMDUxZGU5IiwiZmFtaWx5X25hbWUiOiJNb3RzY2giLCJnaXZlbl9uYW1lIjoiUGF0cmljayIsImlkdHlwIjoidXNlciIsImlwYWRkciI6IjE3OC4xOTcuMjIyLjE0OCIsIm5hbWUiOiJQYXRyaWNrIE1vdHNjaCIsIm9pZCI6IjdkMDhhYWI5LWExNzAtNDk3NS04ODk4LWJjN2UwYTk1NDg4ZSIsInBsYXRmIjoiMyIsInB1aWQiOiIxMDAzN0ZGRThDREQ2QTgyIiwicmgiOiIxLkFRc0E2NnBSYW1ja2hrR1ZCQ29GcnR4Wkh3TUFBQUFBQUFBQXdBQUFBQUFBQUFDRUFEQUxBQS4iLCJzY3AiOiJNYWlsLlJlYWRXcml0ZSBvcGVuaWQgcHJvZmlsZSBVc2VyLlJlYWQgZW1haWwiLCJzaWQiOiIyOTI0ZTgxMS0xMTM1LTQ0ZTItOGUxYi1kMmU2YmVhZmI3ZTUiLCJzaWduaW5fc3RhdGUiOlsia21zaSJdLCJzdWIiOiJJZzBpcDN4YWRiTGl1S3piRmd3VmhOSU1fRHpHMHdweGlFRmIySll1Y240IiwidGVuYW50X3JlZ2lvbl9zY29wZSI6IkVVIiwidGlkIjoiNmE1MWFhZWItMjQ2Ny00MTg2LTk1MDQtMmEwNWFlZGM1OTFmIiwidW5pcXVlX25hbWUiOiJwLm1vdHNjaEB2YWx1ZW9uLmNoIiwidXBuIjoicC5tb3RzY2hAdmFsdWVvbi5jaCIsInV0aSI6ImYzXy1ha2NKblVlQXhuM3o3NmdOQUEiLCJ2ZXIiOiIxLjAiLCJ3aWRzIjpbIjE1OGMwNDdhLWM5MDctNDU1Ni1iN2VmLTQ0NjU1MWE2YjVmNyIsIjliODk1ZDkyLTJjZDMtNDRjNy05ZDAyLWE2YWMyZDVlYTVjMyIsImNmMWMzOGU1LTM2MjEtNDAwNC1hN2NiLTg3OTYyNGRjZWQ3YyIsIjlmMDYyMDRkLTczYzEtNGQ0Yy04ODBhLTZlZGI5MDYwNmZkOCIsIjg5MmM1ODQyLWE5YTYtNDYzYS04MDQxLTcyYWEwOGNhM2NmNiIsImI3OWZiZjRkLTNlZjktNDY4OS04MTQzLTc2YjE5NGU4NTUwOSJdLCJ4bXNfZnRkIjoiMlN2YmlORzVSbGVucGhwdmM2SDdEU1R5WFF5UnpPTmJYOUtOQzFzZmRKSUJaWFZ5YjNCbGQyVnpkQzFrYzIxeiIsInhtc19pZHJlbCI6IjEgMTIiLCJ4bXNfc3QiOnsic3ViIjoiUjJ2RDBHMW1tYVlSQzdKWVdjSVNaVzJLRFBnTkJqQkxGbDZlTEFCX1BVTSJ9LCJ4bXNfdGNkdCI6MTQxODIxNDUwMSwieG1zX3RkYnIiOiJFVSJ9.dg3yuHyt--1GJh5mhnLy1mPkopsVhUTlPv3GpbRT9QcUcMgFnHqGqsU3Ht_hCG5XATy0fe1-cojzBTpYBuyIOBjEZtYpJb5fGcfd-lfuBxCKcYT-ApV5sfQgOEv-r5ki7OTI13MktZKrC4d63uTXmEAOOdoRsIG0UN-ZpM0iGwbWeRZdJV_2F-skZCCLpeOK63jItZkQ7spa8KH9VaU5070vSwDQEXVBuMmvDq70ql6Sw-oqlSzh-ea-pQAn0SoKVg23xMXWTvHgCFxjeveq6Q3vCsmThIXmWdQcQtWUIFYbRajW7ZMM_c1xsTOWyFMQEbEcOwmAqf93bAL_gF_DQw", "refresh_token": "1.AQsA66pRamckhkGVBCoFrtxZHy0R58fcYTpPjNMIzEzXUEyEADALAA.AgABAwEAAABVrSpeuWamRam2jAF1XRQEAwDs_wUA9P950c99CUJxojzQN3haIYdKnZObsofQW2RZsTO0E9apCt7LtcrCIp0xFEJkIYipSHIN1bAG6Jhhm4QiYb3XxIH7VtgmZSnZrZdf3QgZILRwjUKyFnFdjjmq0S0BO7InylLaZIJ2ZzOOPE8cY4xSXedyKEe3DC06Ejh7Zp9EhC6BlWdgGUCyyFNloDKv3xhUfqJ8GjQ91bo1OErWAFkH8N7CiD_f7XJQ55EV4dx7w7qHemN4aDDeG4uNjioMMuPDspIHcFZFZwzcgphHZO9uelRrlQMNEwQ8zNDNQk0f1Q_m09xGifHoMCszwoX2Z-ffaYtkcQjwGnEp4DsEQbCv_-03wHc5KlmJnTmOGgtCLSpfxl06qcyBqRVeA2cGCwhzmmF_Q81AkkAqM0MCFalT4z3b6dK3uxjs1Umu_wUa_lEKtEePKzQaeTDf1wCDWXo1UZ1oTeZESV6yGrnBnCiG6z4wRifqCdJpf6WWItYk_EyKV5Reh6kPMIret11Cacha2elopHxLTFFmEExvb2Mfu1z9NHZ_qBnXA-F05fDmvKickYspWu4CuQr2DhwJ74CD4IZ1dKFRiwHuYlw9HuTuBQjOdMy-FhAyGdhjTKHhkf4rh8GVjeza0DvCl5NJr04ubacnd2-_vGVoVNbsqUeqDWF9gKdT5Qnz5Aba9vFs4VYKjtnfrVEBEWZHZJsX5JzWVIzqfSVKmcE3ij2v9KGLw8kdcD16hfTN6wCCHYRdtMx5CVRhyBuj8KMBRRUtdEBFgG_jraeQTGoj6BnsdKPclM_TkPHhR8p-0KjfErJud-MFavGT9Y1cvsdr9TdsX_8o9y2LTcW8nXl3Vljnzq3RlZ6N4PoeQSzZNmri8MRpLuUFwJfAwxwuGemN_OIph7Npo3IQ1Tw9WeENGczplZWgbf2FdPITisdPylACrsWflH8mlfy1fEatstQb_2E2k-1vqCFYX8hYiSRbOS0kYWzYUBJ_yvRF2EUsu8yTbzw4SlJYjeNdAdmLcxjbdWXS0OS-aKv2QUvTi_htK0UKKm7V0Yj1I_bE", "user_info": {"name": "Patrick Motsch", "email": "p.motsch@valueon.ch"}, "timestamp": "2025-05-07T01:45:00.286453"}

View file

@ -1,432 +0,0 @@
#!/usr/bin/env python3
"""
Simplified Test Runner for Workflow State Machine
This script provides a clean and simple test runner for the workflow state machine
tests that properly handles async test methods.
Usage:
python tool_testBackendSingle.py [test_name]
Examples:
python tool_testBackendSingle.py # Run all tests
python tool_testBackendSingle.py test_state_1 # Run tests starting with test_state_1
"""
import os
import sys
import asyncio
import time
import traceback
import importlib
import inspect
from unittest.mock import patch, MagicMock, AsyncMock
# Try to import colorama, install if not available
try:
from colorama import init, Fore, Back, Style
init() # Initialize colorama
except ImportError:
print("Installing required package: colorama")
import subprocess
subprocess.call([sys.executable, "-m", "pip", "install", "colorama"])
from colorama import init, Fore, Back, Style
init() # Initialize colorama
class AsyncTestRunner:
"""Simple test runner that supports async test methods"""
def __init__(self):
"""Initialize the test runner"""
self.success_count = 0
self.failure_count = 0
self.results = []
self.total_time = 0
def print_header(self, test_case_name):
"""Print a header for the test suite"""
print("\n" + "=" * 80)
print(f"{Fore.CYAN}{Style.BRIGHT}{test_case_name}{Style.RESET_ALL}")
print("=" * 80)
def print_result(self, test_name, success, duration, error=None):
"""Print a test result with appropriate formatting"""
clean_name = test_name.replace('test_', '').replace('_', ' ').title()
if success:
status = f"{Fore.GREEN}[PASS]{Style.RESET_ALL}"
self.success_count += 1
else:
status = f"{Fore.RED}[FAIL]{Style.RESET_ALL}"
self.failure_count += 1
# Print result line
print(f"{status} {clean_name} - {duration:.2f}s")
# Print error if any
if error:
print(f" {Fore.RED}{error}{Style.RESET_ALL}")
if isinstance(error, Exception):
traceback.print_exception(type(error), error, error.__traceback__)
# Store result
self.results.append({
'name': clean_name,
'success': success,
'duration': duration,
'error': error
})
def print_summary(self):
"""Print a summary of test results"""
print("\n" + "=" * 80)
print(f"{Fore.CYAN}{Style.BRIGHT}TEST SUMMARY{Style.RESET_ALL}")
print("-" * 80)
# Print timing
print(f"Total execution time: {self.total_time:.2f}s")
# Print counts
total = self.success_count + self.failure_count
print(f"Tests: {total}, Passed: {Fore.GREEN}{self.success_count}{Style.RESET_ALL}, Failed: {Fore.RED}{self.failure_count}{Style.RESET_ALL}")
# Print overall status
if self.failure_count == 0:
print(f"\n{Fore.GREEN}{Style.BRIGHT}✓ ALL TESTS PASSED{Style.RESET_ALL}")
else:
print(f"\n{Fore.RED}{Style.BRIGHT}✗ TESTS FAILED{Style.RESET_ALL}")
# Print failures
print(f"\n{Fore.RED}Failed tests:{Style.RESET_ALL}")
for result in self.results:
if not result['success']:
print(f" - {result['name']}")
print("=" * 80)
async def run_test(self, test_instance, test_method):
"""Run a single test method (sync or async)"""
# Prepare test
test_name = test_method.__name__
clean_name = test_name.replace('test_', '').replace('_', ' ').title()
# Print start
print(f"\n{Fore.BLUE}[RUNNING]{Style.RESET_ALL} {clean_name}...")
# Run setUp
if hasattr(test_instance, 'setUp'):
await self.run_method_with_instance(test_instance, test_instance.setUp)
# Time the test execution
start_time = time.time()
success = True
error = None
try:
# Run the test - ensure bound method gets called with instance
if hasattr(test_method, '__self__') and test_method.__self__ is None:
# This is an unbound method, bind it to the instance
bound_method = getattr(test_instance, test_method.__name__)
await self.run_method_with_instance(test_instance, bound_method)
else:
# This is already a bound method
await self.run_method_with_instance(test_instance, test_method)
except Exception as e:
success = False
error = e
# Calculate duration
duration = time.time() - start_time
# Run tearDown
if hasattr(test_instance, 'tearDown'):
await self.run_method_with_instance(test_instance, test_instance.tearDown)
# Record and print result
self.print_result(test_name, success, duration, error)
return success
async def run_method_with_instance(self, instance, method):
"""Run a method ensuring it has the correct instance"""
method_name = method.__name__
bound_method = getattr(instance, method_name)
if asyncio.iscoroutinefunction(bound_method):
return await bound_method()
else:
return bound_method()
async def run_method(self, method):
"""Run a method that might be async or regular"""
# Check if this is an unbound method that needs self
if hasattr(method, '__self__') and method.__self__ is None:
# This suggests it's an unbound method that needs an instance
raise TypeError(f"Method {method.__name__} appears to be unbound and needs 'self'")
if asyncio.iscoroutinefunction(method):
return await method()
else:
return method()
def _reset_mocks(self):
"""Reset all mocks for a fresh test"""
# Only reset if the objects have reset_mock method
if hasattr(self.mydom_mock, 'reset_mock'):
self.mydom_mock.reset_mock()
else:
# Recreate the mock objects
self._setup_mocks()
if hasattr(self.registry_mock, 'reset_mock'):
self.registry_mock.reset_mock()
if hasattr(self.getDocumentContents_mock, 'reset_mock'):
self.getDocumentContents_mock.reset_mock()
async def run_test_case(self, test_case_class, filter_pattern=None):
"""Run all test methods in a test case class"""
# Initialize timing
start_time = time.time()
# Print header
self.print_header(test_case_class.__name__)
# Get all test methods
test_methods = sorted([
getattr(test_case_class, name) for name in dir(test_case_class)
if name.startswith('test_') and callable(getattr(test_case_class, name))
], key=lambda x: x.__name__)
# Filter tests if pattern provided
if filter_pattern:
test_methods = [
method for method in test_methods
if filter_pattern in method.__name__
]
if not test_methods:
print(f"{Fore.YELLOW}No tests found{Style.RESET_ALL}")
return
print(f"Running {len(test_methods)} tests...\n")
# Run each test
for test_method in test_methods:
# Create a fresh instance for each test
test_instance = test_case_class()
await self.run_test(test_instance, test_method)
# Record total time
self.total_time = time.time() - start_time
# Print summary
self.print_summary()
return self.failure_count == 0
def setup_module_paths():
"""Set up module paths to make imports work"""
# Add current directory and parent directory to path
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(current_dir)
if current_dir not in sys.path:
sys.path.insert(0, current_dir)
if parent_dir not in sys.path:
sys.path.insert(0, parent_dir)
# Also add any module directories that might exist
modules_dir = os.path.join(parent_dir, 'modules')
if os.path.exists(modules_dir) and modules_dir not in sys.path:
sys.path.insert(0, modules_dir)
gateway_dir = os.path.join(parent_dir, 'gateway')
if os.path.exists(gateway_dir) and gateway_dir not in sys.path:
sys.path.insert(0, gateway_dir)
print(f"{Fore.CYAN}Python path set to:{Style.RESET_ALL}")
for path in sys.path[:5]: # Print first 5 paths
print(f" - {path}")
def find_test_files():
"""Find test files in the current directory"""
# Look for test files in priority order
test_files = []
# First priority: test_workflow_state_machine.py
if os.path.exists('./test_workflow_state_machine.py'):
test_files.append('test_workflow_state_machine.py')
# Second priority: any tool_test*.py files
tool_test_files = [f for f in os.listdir('.') if f.startswith('tool_test') and f.endswith('.py') and f != 'tool_testBackendSingle.py']
test_files.extend(tool_test_files)
# Last priority: any test_*.py files
other_test_files = [f for f in os.listdir('.') if f.startswith('test_') and f.endswith('.py') and f not in test_files]
test_files.extend(other_test_files)
return test_files
async def run_tests(test_file=None, test_filter=None):
"""Run all tests"""
# Set up paths
setup_module_paths()
# Find test files if not specified
if not test_file:
test_files = find_test_files()
if not test_files:
print(f"{Fore.RED}No test files found{Style.RESET_ALL}")
return False
test_file = test_files[0]
print(f"{Fore.YELLOW}Found test files: {', '.join(test_files)}{Style.RESET_ALL}")
print(f"{Fore.YELLOW}Using: {test_file}{Style.RESET_ALL}")
# Remove .py extension for import
module_name = test_file[:-3] if test_file.endswith('.py') else test_file
try:
# First try a normal import
print(f"{Fore.YELLOW}Attempting to import module: {module_name}{Style.RESET_ALL}")
test_module = importlib.import_module(module_name)
print(f"{Fore.GREEN}Successfully imported test module: {module_name}{Style.RESET_ALL}")
except ImportError as e:
print(f"{Fore.RED}Error importing module {module_name}: {e}{Style.RESET_ALL}")
# Try different import approaches
try:
# Try to load as a relative module
print(f"{Fore.YELLOW}Trying relative import...{Style.RESET_ALL}")
test_module = importlib.import_module('.' + module_name, package=__package__)
print(f"{Fore.GREEN}Imported test module via relative import: {module_name}{Style.RESET_ALL}")
except ImportError as e:
print(f"{Fore.RED}Relative import failed: {e}{Style.RESET_ALL}")
# Fall back to exec (not recommended but sometimes necessary)
print(f"{Fore.YELLOW}Attempting to load using exec: {test_file}{Style.RESET_ALL}")
try:
with open(test_file, 'r') as f:
module_content = f.read()
# Create a new module namespace
module_namespace = {}
# Execute the module code in the namespace
exec(module_content, module_namespace)
# Create a mock module
class MockModule:
pass
test_module = MockModule()
# Copy the relevant attributes to the mock module
for key, value in module_namespace.items():
setattr(test_module, key, value)
print(f"{Fore.GREEN}Loaded test module using exec: {test_file}{Style.RESET_ALL}")
except Exception as e:
print(f"{Fore.RED}Failed to load module using exec: {e}{Style.RESET_ALL}")
traceback.print_exc()
return False
# Find test case class
test_case_class = None
print(f"{Fore.YELLOW}Looking for test case class in module...{Style.RESET_ALL}")
for item_name in dir(test_module):
item = getattr(test_module, item_name)
if inspect.isclass(item) and (item_name.startswith('Test') or 'Test' in item_name):
print(f"{Fore.GREEN}Found test case class: {item_name}{Style.RESET_ALL}")
test_case_class = item
break
if not test_case_class:
print(f"{Fore.RED}No test case class found in {test_file}{Style.RESET_ALL}")
return False
# Try to check for required imports
try:
print(f"{Fore.YELLOW}Checking for agent registry...{Style.RESET_ALL}")
try:
# First try direct import
from modules.workflowAgentsRegistry import getAgentRegistry
print(f"{Fore.GREEN}Successfully imported getAgentRegistry{Style.RESET_ALL}")
except ImportError:
try:
# Try alternate path
from modules.workflowAgentsRegistry import getAgentRegistry
print(f"{Fore.GREEN}Successfully imported getAgentRegistry from modules{Style.RESET_ALL}")
except ImportError:
print(f"{Fore.YELLOW}Agent registry import not found - may cause issues{Style.RESET_ALL}")
except Exception as e:
print(f"{Fore.YELLOW}Error checking agent registry: {e}{Style.RESET_ALL}")
# Run the tests
print(f"{Fore.CYAN}Starting test execution{Style.RESET_ALL}")
runner = AsyncTestRunner()
return await runner.run_test_case(test_case_class, test_filter)
if __name__ == "__main__":
# Get test filter from command line
test_file = None
test_filter = None
if len(sys.argv) > 1:
# Check if first arg is a file
if os.path.exists(sys.argv[1]) or sys.argv[1].endswith('.py'):
test_file = sys.argv[1]
if len(sys.argv) > 2:
test_filter = sys.argv[2]
else:
test_filter = sys.argv[1]
# Run tests
asyncio.run(run_tests(test_file, test_filter))
class MockDomInterface:
def __init__(self, *args, **kwargs):
self.getWorkflow = MagicMock(return_value=None)
self.loadWorkflowState = MagicMock(return_value=None)
self.createWorkflow = MagicMock()
self.updateWorkflow = MagicMock()
self.createWorkflowLog = MagicMock()
self.createWorkflowMessage = MagicMock()
self.getFile = MagicMock()
self.getFileData = MagicMock()
self.saveUploadedFile = MagicMock()
self.userLanguage = "en"
self.callAi = AsyncMock()
self.setUserLanguage = MagicMock()
def reset_mock(self):
"""Reset all mocks in this interface"""
for attr_name in dir(self):
attr = getattr(self, attr_name)
if hasattr(attr, 'reset_mock'):
attr.reset_mock()
class MockAgentRegistry:
def __init__(self):
self.getAgent = MagicMock()
self.getAgentInfos = MagicMock(return_value=[
{"name": "test_agent", "description": "Test agent", "capabilities": ["text_processing"]}
])
self.setMydom = MagicMock()
def reset_mock(self):
"""Reset all mocks in this registry"""
for attr_name in dir(self):
attr = getattr(self, attr_name)
if hasattr(attr, 'reset_mock'):
attr.reset_mock()

File diff suppressed because it is too large Load diff

View file

@ -1,244 +0,0 @@
#!/usr/bin/env python3
"""
Direct Interface Workflow Test Script
This script bypasses the API layer and works directly with the interface classes
to simulate a user uploading two files and then sending a chat request with these files.
It follows the state machine as defined in the backend documentation.
"""
import os
import sys
import json
import asyncio
import uuid
from datetime import datetime
# Adjust import paths
current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(current_dir)
if parent_dir not in sys.path:
sys.path.insert(0, parent_dir)
# Try to import the required modules
try:
from modules.workflowManager import getWorkflowManager
from modules.lucydomInterface import getLucydomInterface
except ImportError:
print("Error: Required modules not found. Attempting alternative imports...")
try:
from gateway.modules.workflowManager import getWorkflowManager
from gateway.modules.lucydomInterface import getLucydomInterface
except ImportError:
print("Error: Could not import required modules. Make sure the script is run from the correct directory.")
sys.exit(1)
# Constants
MANDATE_ID = 1
USER_ID = 1
#USER_PROMPT = "Please analyze these sales figures and the chart to identify key trends and opportunities."
#USER_PROMPT = "Please make me a svg file with forecast for Apr-Jun."
USER_PROMPT = "Please make me a jpg file with forecast for Apr-Jun."
# Sample files to upload
SAMPLE_SVG = """
<svg width="400" height="300" xmlns="http://www.w3.org/2000/svg">
<title>Sales Q1 Bar Chart</title>
<rect width="100%" height="100%" fill="#f9f9f9"/>
<g transform="translate(50, 20)">
<!-- Axes -->
<line x1="0" y1="230" x2="320" y2="230" stroke="black" />
<line x1="0" y1="0" x2="0" y2="230" stroke="black" />
<!-- Y-axis title -->
<text x="-30" y="120" transform="rotate(-90, -30, 120)">Sales ($)</text>
<!-- X-axis title -->
<text x="160" y="270">Month</text>
<!-- January -->
<rect x="40" y="80" width="60" height="150" fill="#4285F4" />
<text x="70" y="250">Jan</text>
<text x="70" y="70">$150K</text>
<!-- February -->
<rect x="130" y="50" width="60" height="180" fill="#EA4335" />
<text x="160" y="250">Feb</text>
<text x="160" y="40">$165K</text>
<!-- March -->
<rect x="220" y="20" width="60" height="210" fill="#FBBC05" />
<text x="250" y="250">Mar</text>
<text x="250" y="10">$180K</text>
</g>
</svg>
"""
SAMPLE_DATA = """
# Sales Data - Q1 2023
Month,Revenue,Growth,Units Sold
January,150000,5.2%,1250
February,165000,10.0%,1380
March,180000,9.1%,1490
## Regional Breakdown
- North: 35% of total sales
- South: 25% of total sales
- East: 20% of total sales
- West: 20% of total sales
## Top Products
1. Product A: 40% of revenue
2. Product B: 30% of revenue
3. Product C: 20% of revenue
4. Others: 10% of revenue
"""
async def create_test_files(mydom):
"""Create two test files and return their IDs"""
print("\n--- Uploading Test Files (State 0: File Upload) ---")
# Create SVG chart file
print("Uploading SVG chart file...")
chart_meta = mydom.saveUploadedFile(SAMPLE_SVG.encode('utf-8'), "q1_sales_chart.svg")
chart_id = chart_meta['id']
print(f"Created SVG chart file with ID: {chart_id}")
# Create data text file
print("Uploading markdown data file...")
data_meta = mydom.saveUploadedFile(SAMPLE_DATA.encode('utf-8'), "q1_sales_data.md")
data_id = data_meta['id']
print(f"Created markdown data file with ID: {data_id}")
return chart_id, data_id
async def monitor_workflow(mydom, workflow_id, timeout=300, interval=2):
"""Monitor the workflow until it completes or times out"""
print("\n--- Monitoring Workflow ---")
start_time = datetime.now()
elapsed = 0
while elapsed < timeout:
# Get current workflow state
workflow = mydom.loadWorkflowState(workflow_id)
if not workflow:
print("Error: Workflow not found")
return None
status = workflow.get("status", "unknown")
# Show progress
logs = workflow.get("logs", [])
latest_log = logs[-1] if logs else None
if latest_log:
progress = latest_log.get("progress", 0)
message = latest_log.get("message", "No message")
print(f"Status: {status} | Progress: {progress}% | {message}")
# Check if workflow is done
if status in ["completed", "failed", "stopped"]:
if status == "completed":
print("\nWorkflow completed successfully!")
elif status == "failed":
print("\nWorkflow failed!")
else:
print("\nWorkflow was stopped!")
return workflow
# Wait before checking again
await asyncio.sleep(interval)
elapsed = (datetime.now() - start_time).total_seconds()
print(f"Monitoring timed out after {timeout} seconds")
return mydom.loadWorkflowState(workflow_id)
async def run_test():
"""Main test function that follows the state machine workflow"""
print("\n=== Direct Interface Workflow Test ===\n")
# Initialize the interfaces
print("Initializing system...")
mydom = getLucydomInterface(MANDATE_ID, USER_ID)
manager = getWorkflowManager(MANDATE_ID, USER_ID)
# Upload test files (State 0: File Upload)
chart_id, data_id = await create_test_files(mydom)
# Prepare the user input
user_input = {
"prompt": USER_PROMPT,
"listFileId": [chart_id, data_id]
}
# Start workflow (State 1: Workflow Initialization)
print(f"\n--- Starting Workflow (State 1: Workflow Initialization) ---")
print(f"Sending user prompt: '{USER_PROMPT}'")
print(f"With files: SVG chart (ID: {chart_id}) and sales data (ID: {data_id})")
# Start the workflow with the user input
workflow = await manager.workflowStart(user_input)
workflow_id = workflow["id"]
print(f"Workflow initiated with ID: {workflow_id}")
print(f"Initial status: {workflow['status']}")
# Monitor the workflow progress
# This will monitor states 2-7 of the state machine
await monitor_workflow(mydom, workflow_id, timeout=120)
# Get final workflow state
final_workflow = mydom.loadWorkflowState(workflow_id)
# Print the results
print("\n--- Final Workflow Results ---")
if final_workflow:
# Print status information
print(f"Workflow Status: {final_workflow.get('status', 'unknown')}")
print(f"Current Round: {final_workflow.get('currentRound', 0)}")
# Print messages
print("\n=== Messages ===")
for msg in final_workflow.get("messages", []):
role = msg.get("role", "unknown")
agent = msg.get("agentName", "")
# Get a preview of the content
content = msg.get("content", "")
if len(content) > 100:
content_preview = content[:100] + "..."
else:
content_preview = content
# Format based on role
if role == "assistant" and agent:
print(f"\n[{role} - {agent}]: {content_preview}")
else:
print(f"\n[{role}]: {content_preview}")
# Print document info
docs = msg.get("documents", [])
if docs:
print(f" Documents ({len(docs)}):")
for doc in docs:
name = doc.get("name", "unnamed")
ext = doc.get("ext", "")
file_id = doc.get("fileId", "unknown")
print(f" - {name}.{ext} (ID: {file_id})")
# Print the final log
logs = final_workflow.get("logs", [])
if logs:
final_log = logs[-1]
print(f"\nFinal Log: {final_log.get('message', 'No message')}")
else:
print("Error: Could not retrieve final workflow state")
print("\n=== Test Complete ===")
return workflow_id
if __name__ == "__main__":
workflow_id = asyncio.run(run_test())
print(f"Completed workflow ID: {workflow_id}")