cleaned up code, removed sessions as jwt used, functions alignment

This commit is contained in:
ValueOn AG 2025-08-24 17:03:39 +02:00
parent 2a02bb1c01
commit bd0d964e93
35 changed files with 2067 additions and 2738 deletions

4
app.py
View file

@ -109,9 +109,9 @@ def initLogging():
) )
# Silence noisy third-party libraries - use the same level as the root logger # Silence noisy third-party libraries - use the same level as the root logger
noisyLoggers = ["httpx", "httpcore", "urllib3", "asyncio", "fastapi.security.oauth2"] noisyLoggers = ["httpx", "httpcore", "urllib3", "asyncio", "fastapi.security.oauth2", "msal"]
for loggerName in noisyLoggers: for loggerName in noisyLoggers:
logging.getLogger(loggerName).setLevel(logLevel) logging.getLogger(loggerName).setLevel(logging.WARNING)
# Log the current logging configuration # Log the current logging configuration
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)

View file

@ -159,13 +159,13 @@ class DocumentExtraction:
"svg": 40000 # SVG content "svg": 40000 # SVG content
} }
def _robustTextDecode(self, fileData: bytes, filename: str = "unknown") -> str: def _robustTextDecode(self, fileData: bytes, fileName: str = "unknown") -> str:
""" """
Robustly decode text data with multiple encoding fallbacks. Robustly decode text data with multiple encoding fallbacks.
Args: Args:
fileData: Raw bytes to decode fileData: Raw bytes to decode
filename: Filename for logging purposes fileName: fileName for logging purposes
Returns: Returns:
Decoded text string Decoded text string
@ -207,16 +207,16 @@ class DocumentExtraction:
else: else:
# Last resort: decode with replacement characters # Last resort: decode with replacement characters
content = fileData.decode('utf-8', errors='replace') content = fileData.decode('utf-8', errors='replace')
logger.warning(f"{filename}: decoded with UTF-8 and replacement characters due to low encoding confidence") logger.warning(f"{fileName}: decoded with UTF-8 and replacement characters due to low encoding confidence")
return content return content
except ImportError: except ImportError:
# chardet not available, use replacement characters # chardet not available, use replacement characters
content = fileData.decode('utf-8', errors='replace') content = fileData.decode('utf-8', errors='replace')
logger.warning(f"{filename}: decoded with UTF-8 and replacement characters (chardet not available)") logger.warning(f"{fileName}: decoded with UTF-8 and replacement characters (chardet not available)")
return content return content
# This should never be reached, but just in case # This should never be reached, but just in case
raise FileProcessingError(f"Failed to decode {filename} with any encoding") raise FileProcessingError(f"Failed to decode {fileName} with any encoding")
def initialize(self) -> None: def initialize(self) -> None:
"""Initialize the document processor.""" """Initialize the document processor."""
@ -262,13 +262,13 @@ class DocumentExtraction:
async def processFileData(self, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, prompt: str = None, documentId: str = None, enableAI: bool = True) -> ExtractedContent: async def processFileData(self, fileData: bytes, fileName: str, mimeType: str, base64Encoded: bool = False, prompt: str = None, documentId: str = None, enableAI: bool = True) -> ExtractedContent:
""" """
Process file data directly and extract its contents with optional AI processing. Process file data directly and extract its contents with optional AI processing.
Args: Args:
fileData: Raw file data as bytes fileData: Raw file data as bytes
filename: Name of the file fileName: Name of the file
mimeType: MIME type of the file mimeType: MIME type of the file
base64Encoded: Whether the data is base64 encoded base64Encoded: Whether the data is base64 encoded
prompt: Prompt for AI content extraction prompt: Prompt for AI content extraction
@ -287,13 +287,13 @@ class DocumentExtraction:
fileData = base64.b64decode(fileData) fileData = base64.b64decode(fileData)
# Use documentUtility for mime type detection # Use documentUtility for mime type detection
if mimeType == "application/octet-stream": if mimeType == "application/octet-stream":
mimeType = detectMimeTypeFromData(fileData, filename, self._serviceCenter) mimeType = detectMimeTypeFromData(fileData, fileName, self._serviceCenter)
# Process document based on type # Process document based on type
if mimeType not in self.supportedTypes: if mimeType not in self.supportedTypes:
contentItems = await self._processBinary(fileData, filename, mimeType) contentItems = await self._processBinary(fileData, fileName, mimeType)
else: else:
processor = self.supportedTypes[mimeType] processor = self.supportedTypes[mimeType]
contentItems = await processor(fileData, filename, mimeType) contentItems = await processor(fileData, fileName, mimeType)
# Process with AI if prompt provided and AI is enabled # Process with AI if prompt provided and AI is enabled
if enableAI and prompt and contentItems: if enableAI and prompt and contentItems:
@ -304,7 +304,7 @@ class DocumentExtraction:
except Exception as e: except Exception as e:
logger.error(f"Error processing content with AI: {str(e)}") logger.error(f"Error processing content with AI: {str(e)}")
elif not enableAI: elif not enableAI:
logger.debug(f"AI processing disabled for {filename}, returning raw extracted content") logger.debug(f"AI processing disabled for {fileName}, returning raw extracted content")
return ExtractedContent( return ExtractedContent(
id=documentId if documentId else str(uuid.uuid4()), id=documentId if documentId else str(uuid.uuid4()),
@ -317,14 +317,14 @@ class DocumentExtraction:
async def _processText(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: async def _processText(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process text document with robust encoding detection and complete content extraction""" """Process text document with robust encoding detection and complete content extraction"""
try: try:
content = self._robustTextDecode(fileData, filename) content = self._robustTextDecode(fileData, fileName)
# Validate that we got the complete content # Validate that we got the complete content
if not content or len(content.strip()) == 0: if not content or len(content.strip()) == 0:
logger.warning(f"Empty content extracted from {filename}") logger.warning(f"Empty content extracted from {fileName}")
return [ContentItem( return [ContentItem(
label="empty", label="empty",
data="[Empty file or no readable content]", data="[Empty file or no readable content]",
@ -341,7 +341,7 @@ class DocumentExtraction:
# Use documentUtility for mime type # Use documentUtility for mime type
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter) mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
return [ContentItem( return [ContentItem(
label="main", label="main",
data=content, data=content,
@ -356,11 +356,11 @@ class DocumentExtraction:
logger.error(f"Error processing text document: {str(e)}") logger.error(f"Error processing text document: {str(e)}")
raise FileProcessingError(f"Failed to process text document: {str(e)}") raise FileProcessingError(f"Failed to process text document: {str(e)}")
async def _processCsv(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: async def _processCsv(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process CSV document with robust encoding detection""" """Process CSV document with robust encoding detection"""
try: try:
content = self._robustTextDecode(fileData, filename) content = self._robustTextDecode(fileData, fileName)
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter) mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
return [ContentItem( return [ContentItem(
label="main", label="main",
data=content, data=content,
@ -375,12 +375,12 @@ class DocumentExtraction:
logger.error(f"Error processing CSV document: {str(e)}") logger.error(f"Error processing CSV document: {str(e)}")
raise FileProcessingError(f"Failed to process CSV document: {str(e)}") raise FileProcessingError(f"Failed to process CSV document: {str(e)}")
async def _processJson(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: async def _processJson(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process JSON document with robust encoding detection""" """Process JSON document with robust encoding detection"""
try: try:
content = self._robustTextDecode(fileData, filename) content = self._robustTextDecode(fileData, fileName)
jsonData = json.loads(content) jsonData = json.loads(content)
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter) mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
return [ContentItem( return [ContentItem(
label="main", label="main",
data=content, data=content,
@ -395,11 +395,11 @@ class DocumentExtraction:
logger.error(f"Error processing JSON document: {str(e)}") logger.error(f"Error processing JSON document: {str(e)}")
raise FileProcessingError(f"Failed to process JSON document: {str(e)}") raise FileProcessingError(f"Failed to process JSON document: {str(e)}")
async def _processXml(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: async def _processXml(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process XML document with robust encoding detection""" """Process XML document with robust encoding detection"""
try: try:
content = self._robustTextDecode(fileData, filename) content = self._robustTextDecode(fileData, fileName)
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter) mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
return [ContentItem( return [ContentItem(
label="main", label="main",
data=content, data=content,
@ -414,11 +414,11 @@ class DocumentExtraction:
logger.error(f"Error processing XML document: {str(e)}") logger.error(f"Error processing XML document: {str(e)}")
raise FileProcessingError(f"Failed to process XML document: {str(e)}") raise FileProcessingError(f"Failed to process XML document: {str(e)}")
async def _processHtml(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: async def _processHtml(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process HTML document with robust encoding detection""" """Process HTML document with robust encoding detection"""
try: try:
content = self._robustTextDecode(fileData, filename) content = self._robustTextDecode(fileData, fileName)
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter) mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
return [ContentItem( return [ContentItem(
label="main", label="main",
data=content, data=content,
@ -433,10 +433,10 @@ class DocumentExtraction:
logger.error(f"Error processing HTML document: {str(e)}") logger.error(f"Error processing HTML document: {str(e)}")
raise FileProcessingError(f"Failed to process HTML document: {str(e)}") raise FileProcessingError(f"Failed to process HTML document: {str(e)}")
async def _processSvg(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: async def _processSvg(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process SVG document with robust encoding detection and meaningful content extraction""" """Process SVG document with robust encoding detection and meaningful content extraction"""
try: try:
content = self._robustTextDecode(fileData, filename) content = self._robustTextDecode(fileData, fileName)
# Check if it's actually SVG content # Check if it's actually SVG content
if "<svg" not in content.lower(): if "<svg" not in content.lower():
@ -512,7 +512,7 @@ class DocumentExtraction:
# Combine all meaningful content # Combine all meaningful content
final_content = "\n".join(meaningful_content) final_content = "\n".join(meaningful_content)
mime_type = getMimeTypeFromExtension(getFileExtension(filename), self._serviceCenter) mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter)
return [ContentItem( return [ContentItem(
label="svg_content", label="svg_content",
data=final_content, data=final_content,
@ -526,7 +526,7 @@ class DocumentExtraction:
logger.error(f"Error processing SVG document: {str(e)}") logger.error(f"Error processing SVG document: {str(e)}")
raise FileProcessingError(f"Failed to process SVG document: {str(e)}") raise FileProcessingError(f"Failed to process SVG document: {str(e)}")
async def _processImage(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: async def _processImage(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process image document""" """Process image document"""
try: try:
self._loadImageProcessor() self._loadImageProcessor()
@ -592,7 +592,7 @@ class DocumentExtraction:
logger.error(f"Error processing image document: {str(e)}") logger.error(f"Error processing image document: {str(e)}")
raise FileProcessingError(f"Failed to process image document: {str(e)}") raise FileProcessingError(f"Failed to process image document: {str(e)}")
async def _processPdf(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: async def _processPdf(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process PDF document""" """Process PDF document"""
try: try:
self._loadPdfExtractor() self._loadPdfExtractor()
@ -661,7 +661,7 @@ class DocumentExtraction:
logger.error(f"Error processing PDF document: {str(e)}") logger.error(f"Error processing PDF document: {str(e)}")
raise FileProcessingError(f"Failed to process PDF document: {str(e)}") raise FileProcessingError(f"Failed to process PDF document: {str(e)}")
async def _processDocx(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: async def _processDocx(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process Word document with enhanced formatting preservation""" """Process Word document with enhanced formatting preservation"""
try: try:
self._loadOfficeExtractor() self._loadOfficeExtractor()
@ -853,7 +853,7 @@ class DocumentExtraction:
logger.error(f"Error processing Word document: {str(e)}") logger.error(f"Error processing Word document: {str(e)}")
raise FileProcessingError(f"Failed to process Word document: {str(e)}") raise FileProcessingError(f"Failed to process Word document: {str(e)}")
async def _processXlsx(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: async def _processXlsx(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process Excel document with enhanced table extraction and metadata""" """Process Excel document with enhanced table extraction and metadata"""
try: try:
self._loadOfficeExtractor() self._loadOfficeExtractor()
@ -867,7 +867,7 @@ class DocumentExtraction:
workbook = openpyxl.load_workbook(xlsxStream, data_only=True) workbook = openpyxl.load_workbook(xlsxStream, data_only=True)
except Exception as load_error: except Exception as load_error:
logger.error(f"Failed to load Excel workbook {filename}: {str(load_error)}") logger.error(f"Failed to load Excel workbook {fileName}: {str(load_error)}")
raise FileProcessingError(f"Failed to load Excel workbook: {str(load_error)}") raise FileProcessingError(f"Failed to load Excel workbook: {str(load_error)}")
# Extract workbook properties safely # Extract workbook properties safely
@ -1122,7 +1122,7 @@ class DocumentExtraction:
logger.error(f"Error processing Excel document: {str(e)}") logger.error(f"Error processing Excel document: {str(e)}")
raise FileProcessingError(f"Failed to process Excel document: {str(e)}") raise FileProcessingError(f"Failed to process Excel document: {str(e)}")
async def _processLegacyDoc(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: async def _processLegacyDoc(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process legacy Word .doc document""" """Process legacy Word .doc document"""
try: try:
# Try to use antiword or similar tools for .doc files # Try to use antiword or similar tools for .doc files
@ -1130,7 +1130,7 @@ class DocumentExtraction:
contentItems = [] contentItems = []
# Create a basic content item explaining the limitation # Create a basic content item explaining the limitation
info_content = f"""Legacy Word Document (.doc) - {filename} info_content = f"""Legacy Word Document (.doc) - {fileName}
Note: This is a legacy .doc format file. For better content extraction, Note: This is a legacy .doc format file. For better content extraction,
consider converting to .docx format. consider converting to .docx format.
@ -1173,7 +1173,7 @@ The raw binary content is available but not human-readable."""
logger.error(f"Error processing legacy Word document: {str(e)}") logger.error(f"Error processing legacy Word document: {str(e)}")
raise FileProcessingError(f"Failed to process legacy Word document: {str(e)}") raise FileProcessingError(f"Failed to process legacy Word document: {str(e)}")
async def _processLegacyXls(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: async def _processLegacyXls(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process legacy Excel .xls document""" """Process legacy Excel .xls document"""
try: try:
# Try to use xlrd or similar tools for .xls files # Try to use xlrd or similar tools for .xls files
@ -1181,7 +1181,7 @@ The raw binary content is available but not human-readable."""
contentItems = [] contentItems = []
# Create a basic content item explaining the limitation # Create a basic content item explaining the limitation
info_content = f"""Legacy Excel Document (.xls) - {filename} info_content = f"""Legacy Excel Document (.xls) - {fileName}
Note: This is a legacy .xls format file. For better content extraction, Note: This is a legacy .xls format file. For better content extraction,
consider converting to .xlsx format. consider converting to .xlsx format.
@ -1224,7 +1224,7 @@ The raw binary content is available but not human-readable."""
logger.error(f"Error processing legacy Excel document: {str(e)}") logger.error(f"Error processing legacy Excel document: {str(e)}")
raise FileProcessingError(f"Failed to process legacy Excel document: {str(e)}") raise FileProcessingError(f"Failed to process legacy Excel document: {str(e)}")
async def _processLegacyPpt(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: async def _processLegacyPpt(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process legacy PowerPoint .ppt document""" """Process legacy PowerPoint .ppt document"""
try: try:
# Try to use python-pptx or similar tools for .ppt files # Try to use python-pptx or similar tools for .ppt files
@ -1232,7 +1232,7 @@ The raw binary content is available but not human-readable."""
contentItems = [] contentItems = []
# Create a basic content item explaining the limitation # Create a basic content item explaining the limitation
info_content = f"""Legacy PowerPoint Document (.ppt) - {filename} info_content = f"""Legacy PowerPoint Document (.ppt) - {fileName}
Note: This is a legacy .ppt format file. For better content extraction, Note: This is a legacy .ppt format file. For better content extraction,
consider converting to .pptx format. consider converting to .pptx format.
@ -1275,7 +1275,7 @@ The raw binary content is available but not human-readable."""
logger.error(f"Error processing legacy PowerPoint document: {str(e)}") logger.error(f"Error processing legacy PowerPoint document: {str(e)}")
raise FileProcessingError(f"Failed to process legacy PowerPoint document: {str(e)}") raise FileProcessingError(f"Failed to process legacy PowerPoint document: {str(e)}")
async def _processPptx(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: async def _processPptx(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process PowerPoint document""" """Process PowerPoint document"""
try: try:
self._loadOfficeExtractor() self._loadOfficeExtractor()
@ -1351,7 +1351,7 @@ The raw binary content is available but not human-readable."""
logger.error(f"Error processing PowerPoint document: {str(e)}") logger.error(f"Error processing PowerPoint document: {str(e)}")
raise FileProcessingError(f"Failed to process PowerPoint document: {str(e)}") raise FileProcessingError(f"Failed to process PowerPoint document: {str(e)}")
async def _processBinary(self, fileData: bytes, filename: str, mimeType: str) -> List[ContentItem]: async def _processBinary(self, fileData: bytes, fileName: str, mimeType: str) -> List[ContentItem]:
"""Process binary document""" """Process binary document"""
try: try:
return [ContentItem( return [ContentItem(

View file

@ -23,7 +23,7 @@ class DocumentGenerator:
""" """
try: try:
# Read documents from the standard documents field (not data.documents) # Read documents from the standard documents field (not data.documents)
documents = action_result.documents if hasattr(action_result, 'documents') else [] documents = action_result.documents if action_result and hasattr(action_result, 'documents') else []
if not documents: if not documents:
logger.info(f"No documents found in action_result.documents for {action.execMethod}.{action.execAction}") logger.info(f"No documents found in action_result.documents for {action.execMethod}.{action.execAction}")
@ -56,152 +56,21 @@ class DocumentGenerator:
return [] return []
def processSingleDocument(self, doc: Any, action) -> Optional[Dict[str, Any]]: def processSingleDocument(self, doc: Any, action) -> Optional[Dict[str, Any]]:
"""Process a single document from action result""" """Process a single document from action result with simplified logic"""
try: try:
if hasattr(doc, 'filename') and doc.filename: # ActionDocument objects have documentName, documentData, and mimeType
# Document object with filename attribute mime_type = doc.mimeType
mime_type = getattr(doc, 'mimeType', 'application/octet-stream') if mime_type == "application/octet-stream":
if mime_type == "application/octet-stream": content = doc.documentData
content = getattr(doc, 'content', '') mime_type = detectMimeTypeFromContent(content, doc.documentName, self.service)
mime_type = detectMimeTypeFromContent(content, doc.filename, self.service)
return {
# Add result label to filename for document objects too 'fileName': doc.documentName,
base_filename = doc.filename 'fileSize': len(str(doc.documentData)),
if hasattr(action, 'execResultLabel') and action.execResultLabel: 'mimeType': mime_type,
result_label = action.execResultLabel.strip() 'content': doc.documentData,
if result_label: 'document': doc
# Check if filename already starts with resultLabel to avoid duplication }
if not base_filename.startswith(f"{result_label}-"):
base_filename = f"{result_label}-{base_filename}"
logger.info(f"Added resultLabel '{result_label}' as prefix to document object filename: {base_filename}")
else:
logger.info(f"Document object filename already has resultLabel prefix: {base_filename}")
return {
'filename': base_filename,
'fileSize': getattr(doc, 'fileSize', 0),
'mimeType': mime_type,
'content': getattr(doc, 'content', ''),
'document': doc
}
elif hasattr(doc, 'documentName') and doc.documentName:
# ActionDocument object with documentName attribute
base_filename = doc.documentName
mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
content = getattr(doc, 'documentData', '')
# Add result label to filename for ActionDocument objects
if hasattr(action, 'execResultLabel') and action.execResultLabel:
result_label = action.execResultLabel.strip()
if result_label:
# Check if filename already starts with resultLabel to avoid duplication
if not base_filename.startswith(f"{result_label}-"):
base_filename = f"{result_label}-{base_filename}"
logger.info(f"Added resultLabel '{result_label}' as prefix to ActionDocument filename: {base_filename}")
else:
logger.info(f"ActionDocument filename already has resultLabel prefix: {base_filename}")
# Calculate file size from actual content
fileSize = len(str(content)) if content else 0
logger.info(f"Processed ActionDocument: {base_filename}, content length: {len(str(content))}, mimeType: {mime_type}")
return {
'filename': base_filename,
'fileSize': fileSize,
'mimeType': mime_type,
'content': content,
'document': doc
}
elif isinstance(doc, dict):
# Dictionary format document - handle both 'documentName' and 'filename' keys
base_filename = doc.get('documentName', doc.get('filename', ''))
# Debug logging for resultLabel
if hasattr(action, 'execResultLabel'):
logger.info(f"Action {action.execMethod}.{action.execAction} has execResultLabel: '{action.execResultLabel}' (type: {type(action.execResultLabel)})")
else:
logger.info(f"Action {action.execMethod}.{action.execAction} has NO execResultLabel attribute")
# If no filename provided, generate one with action info
if not base_filename:
timestamp = int(get_utc_timestamp())
base_filename = f"{action.execMethod}_{action.execAction}_{timestamp}"
# ALWAYS add result label to filename for better document selection
# This ensures consistent naming regardless of whether filename was provided or generated
if hasattr(action, 'execResultLabel') and action.execResultLabel:
result_label = action.execResultLabel.strip()
if result_label:
# Check if filename already starts with resultLabel to avoid duplication
if not base_filename.startswith(f"{result_label}-"):
base_filename = f"{result_label}-{base_filename}"
logger.info(f"Added resultLabel '{result_label}' as prefix to filename: {base_filename}")
else:
logger.info(f"Filename already has resultLabel prefix: {base_filename}")
else:
logger.info(f"No resultLabel available for action {action.execMethod}.{action.execAction}")
filename = base_filename
mimeType = doc.get('mimeType', 'application/octet-stream')
# Handle documentData structure - it might be a dict with 'content' key or direct content
document_data = doc.get('documentData', '')
if isinstance(document_data, dict) and 'content' in document_data:
# This is the structure returned by extract action: documentData.content
content = document_data['content']
# Also check for other potential content fields
if not content and 'data' in document_data:
content = document_data['data']
else:
# Direct content (fallback)
content = document_data
# Calculate file size from actual content
fileSize = len(str(content)) if content else 0
# Detect mime type if not specified
if mimeType == "application/octet-stream":
mimeType = detectMimeTypeFromContent(content, filename, self.service)
logger.info(f"Processed document: {filename}, content length: {len(str(content))}, mimeType: {mimeType}")
return {
'filename': filename,
'fileSize': fileSize,
'mimeType': mimeType,
'content': content,
'document': doc
}
else:
# Unknown document type
logger.warning(f"Unknown document type for action {action.execMethod}.{action.execAction}: {type(doc)}")
timestamp = int(get_utc_timestamp())
base_filename = f"{action.execMethod}_{action.execAction}_{timestamp}"
# ALWAYS add result label to filename for better document selection
# This ensures consistent naming regardless of document type
if hasattr(action, 'execResultLabel') and action.execResultLabel:
result_label = action.execResultLabel.strip()
if result_label:
# Check if filename already starts with resultLabel to avoid duplication
if not base_filename.startswith(f"{result_label}-"):
base_filename = f"{result_label}-{base_filename}"
logger.info(f"Added resultLabel '{result_label}' as prefix to fallback filename: {base_filename}")
else:
logger.info(f"Fallback filename already has resultLabel prefix: {base_filename}")
else:
logger.info(f"No resultLabel available for action {action.execMethod}.{action.execAction}")
filename = base_filename
mimeType = detectMimeTypeFromContent(doc, filename, self.service)
return {
'filename': filename,
'fileSize': 0,
'mimeType': mimeType,
'content': str(doc),
'document': doc
}
except Exception as e: except Exception as e:
logger.error(f"Error processing single document: {str(e)}") logger.error(f"Error processing single document: {str(e)}")
return None return None
@ -209,7 +78,7 @@ class DocumentGenerator:
def createDocumentsFromActionResult(self, action_result, action, workflow) -> List[Any]: def createDocumentsFromActionResult(self, action_result, action, workflow) -> List[Any]:
""" """
Create actual document objects from action result and store them in the system. Create actual document objects from action result and store them in the system.
Returns a list of created document objects. Returns a list of created document objects with proper workflow context.
""" """
try: try:
logger.info(f"Creating documents from action result for {action.execMethod}.{action.execAction}") logger.info(f"Creating documents from action result for {action.execMethod}.{action.execAction}")
@ -221,7 +90,7 @@ class DocumentGenerator:
created_documents = [] created_documents = []
for i, doc_data in enumerate(processed_docs): for i, doc_data in enumerate(processed_docs):
try: try:
document_name = doc_data['filename'] document_name = doc_data['fileName']
document_data = doc_data['content'] document_data = doc_data['content']
mime_type = doc_data['mimeType'] mime_type = doc_data['mimeType']
@ -260,12 +129,14 @@ class DocumentGenerator:
existing_file_id=file_id existing_file_id=file_id
) )
if document: if document:
# Set workflow context on the document if possible
self._setDocumentWorkflowContext(document, action, workflow)
created_documents.append(document) created_documents.append(document)
logger.info(f"Successfully created ChatDocument: {document_name} (ID: {getattr(document, 'id', 'N/A')}, fileId: {getattr(document, 'fileId', 'N/A')})") logger.info(f"Successfully created ChatDocument: {document_name} (ID: {document.id if hasattr(document, 'id') else 'N/A'}, fileId: {document.fileId if hasattr(document, 'fileId') else 'N/A'})")
else: else:
logger.error(f"Failed to create ChatDocument object for {document_name}") logger.error(f"Failed to create ChatDocument object for {document_name}")
except Exception as e: except Exception as e:
logger.error(f"Error creating document {doc_data.get('filename', 'unknown')}: {str(e)}") logger.error(f"Error creating document {doc_data.get('fileName', 'unknown')}: {str(e)}")
continue continue
logger.info(f"Successfully created {len(created_documents)} documents") logger.info(f"Successfully created {len(created_documents)} documents")
@ -273,3 +144,36 @@ class DocumentGenerator:
except Exception as e: except Exception as e:
logger.error(f"Error creating documents from action result: {str(e)}") logger.error(f"Error creating documents from action result: {str(e)}")
return [] return []
def _setDocumentWorkflowContext(self, document, action, workflow):
"""Set workflow context on a document for proper routing and labeling"""
try:
# Get current workflow context from service center
workflow_context = self.service.getWorkflowContext()
workflow_stats = self.service.getWorkflowStats()
current_round = workflow_context.get('currentRound', 1)
current_task = workflow_context.get('currentTask', 0)
current_action = workflow_context.get('currentAction', 0)
# Try to set workflow context attributes if they exist
if hasattr(document, 'roundNumber'):
document.roundNumber = current_round
if hasattr(document, 'taskNumber'):
document.taskNumber = current_task
if hasattr(document, 'actionNumber'):
document.actionNumber = current_action
if hasattr(document, 'actionId'):
document.actionId = action.id if hasattr(action, 'id') else None
# Set additional workflow metadata if available
if hasattr(document, 'workflowId'):
document.workflowId = workflow_stats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None)
if hasattr(document, 'workflowStatus'):
document.workflowStatus = workflow_stats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown')
logger.debug(f"Set workflow context on document: Round {current_round}, Task {current_task}, Action {current_action}")
logger.debug(f"Document workflow metadata: ID={document.workflowId if hasattr(document, 'workflowId') else 'N/A'}, Status={document.workflowStatus if hasattr(document, 'workflowStatus') else 'N/A'}")
except Exception as e:
logger.warning(f"Could not set workflow context on document: {str(e)}")

View file

@ -4,10 +4,10 @@ from typing import Any, Dict
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def getFileExtension(filename: str) -> str: def getFileExtension(fileName: str) -> str:
"""Extract file extension from filename""" """Extract file extension from fileName"""
if '.' in filename: if '.' in fileName:
return filename.rsplit('.', 1)[-1].lower() return fileName.rsplit('.', 1)[-1].lower()
return '' return ''
def getMimeTypeFromExtension(extension: str, service=None) -> str: def getMimeTypeFromExtension(extension: str, service=None) -> str:
@ -36,22 +36,22 @@ def getMimeTypeFromExtension(extension: str, service=None) -> str:
} }
return mapping.get(extension.lower(), 'application/octet-stream') return mapping.get(extension.lower(), 'application/octet-stream')
def detectMimeTypeFromData(file_bytes: bytes, filename: str, service=None) -> str: def detectMimeTypeFromData(file_bytes: bytes, fileName: str, service=None) -> str:
"""Detect MIME type from file bytes and filename using a service if provided.""" """Detect MIME type from file bytes and fileName using a service if provided."""
try: try:
if service: if service:
detected = service.detectContentTypeFromData(file_bytes, filename) detected = service.detectContentTypeFromData(file_bytes, fileName)
if detected and detected != 'application/octet-stream': if detected and detected != 'application/octet-stream':
return detected return detected
# Fallback: guess from extension # Fallback: guess from extension
ext = getFileExtension(filename) ext = getFileExtension(fileName)
return getMimeTypeFromExtension(ext, service) return getMimeTypeFromExtension(ext, service)
except Exception as e: except Exception as e:
logger.warning(f"Error in MIME type detection for {filename}: {str(e)}") logger.warning(f"Error in MIME type detection for {fileName}: {str(e)}")
return 'application/octet-stream' return 'application/octet-stream'
def detectMimeTypeFromContent(content: Any, filename: str, service=None) -> str: def detectMimeTypeFromContent(content: Any, fileName: str, service=None) -> str:
"""Detect MIME type from content and filename using a service if provided.""" """Detect MIME type from content and fileName using a service if provided."""
try: try:
if isinstance(content, str): if isinstance(content, str):
file_bytes = content.encode('utf-8') file_bytes = content.encode('utf-8')
@ -59,9 +59,9 @@ def detectMimeTypeFromContent(content: Any, filename: str, service=None) -> str:
file_bytes = json.dumps(content, ensure_ascii=False).encode('utf-8') file_bytes = json.dumps(content, ensure_ascii=False).encode('utf-8')
else: else:
file_bytes = str(content).encode('utf-8') file_bytes = str(content).encode('utf-8')
return detectMimeTypeFromData(file_bytes, filename, service) return detectMimeTypeFromData(file_bytes, fileName, service)
except Exception as e: except Exception as e:
logger.warning(f"Error in MIME type detection for {filename}: {str(e)}") logger.warning(f"Error in MIME type detection for {fileName}: {str(e)}")
return 'application/octet-stream' return 'application/octet-stream'
def convertDocumentDataToString(document_data: Any, file_extension: str) -> str: def convertDocumentDataToString(document_data: Any, file_extension: str) -> str:

View file

@ -29,29 +29,6 @@ class TaskExecutionState:
self.failed_actions.append(action_result) self.failed_actions.append(action_result)
self.current_action_index += 1 self.current_action_index += 1
def getAvailableResults(self) -> list:
"""Get available results from successful actions"""
results = []
for action in self.successful_actions:
if action.documents:
# Extract text content from documents
for doc in action.documents:
if hasattr(doc, 'documentData'):
if isinstance(doc.documentData, dict):
result_text = doc.documentData.get("result", "")
elif isinstance(doc.documentData, str):
result_text = doc.documentData
else:
result_text = str(doc.documentData)
if result_text and result_text.strip():
results.append(result_text)
return results
def shouldRetryTask(self) -> bool:
"""Determine if task should be retried based on failure patterns"""
return len(self.failed_actions) > 0 and self.canRetry()
def canRetry(self) -> bool: def canRetry(self) -> bool:
"""Check if task can be retried""" """Check if task can be retried"""
return self.retry_count < self.max_retries return self.retry_count < self.max_retries

View file

@ -12,8 +12,13 @@ from modules.interfaces.interfaceChatModel import (
) )
from modules.shared.timezoneUtils import get_utc_timestamp from modules.shared.timezoneUtils import get_utc_timestamp
from .executionState import TaskExecutionState from .executionState import TaskExecutionState
from .promptFactory import createTaskPlanningPrompt, createActionDefinitionPrompt, createResultReviewPrompt from .promptFactory import (
createTaskPlanningPrompt,
createActionDefinitionPrompt,
createResultReviewPrompt
)
from modules.chat.documents.documentGeneration import DocumentGenerator from modules.chat.documents.documentGeneration import DocumentGenerator
import uuid
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -58,16 +63,43 @@ class HandlingTasks:
logger.info(f"Generating task plan for workflow {workflow.id}") logger.info(f"Generating task plan for workflow {workflow.id}")
available_docs = self.service.getAvailableDocuments(workflow) available_docs = self.service.getAvailableDocuments(workflow)
# Set initial workflow context
self.service.setWorkflowContext(round_number=1, task_number=0, action_number=0)
# Check workflow status before calling AI service # Check workflow status before calling AI service
self._checkWorkflowStopped() self._checkWorkflowStopped()
# Create proper context object for task planning
# For task planning, we need to create a minimal TaskStep since TaskContext requires it
from modules.interfaces.interfaceChatModel import TaskStep
planning_task_step = TaskStep(
id="planning",
objective=userInput,
dependencies=[],
success_criteria=[],
estimated_complexity="medium"
)
task_planning_context = TaskContext(
task_step=planning_task_step,
workflow=workflow,
workflow_id=workflow.id,
available_documents=available_docs,
available_connections=[],
previous_results=[],
previous_handover=None,
improvements=[],
retry_count=0,
previous_action_results=[],
previous_review_result=None,
is_regeneration=False,
failure_patterns=[],
failed_actions=[],
successful_actions=[]
)
prompt = await self.service.callAiTextAdvanced( prompt = await self.service.callAiTextAdvanced(
createTaskPlanningPrompt({ createTaskPlanningPrompt(task_planning_context, self.service)
'user_request': userInput,
'available_documents': available_docs,
'workflow_id': workflow.id
})
) )
# Inline _parseTaskPlanResponse logic # Inline _parseTaskPlanResponse logic
try: try:
@ -100,12 +132,16 @@ class HandlingTasks:
tasks=tasks tasks=tasks
) )
# Set workflow totals for progress tracking
total_tasks = len(tasks)
self.service.setWorkflowTotals(total_tasks=total_tasks)
logger.info(f"Task plan generated successfully with {len(tasks)} tasks") logger.info(f"Task plan generated successfully with {len(tasks)} tasks")
# Log the generated tasks # Log the generated tasks
for i, task in enumerate(tasks): for i, task in enumerate(tasks):
logger.info(f" Task {i+1}: {task.objective}") logger.info(f" Task {i+1}: {task.objective}")
if hasattr(task, 'success_criteria') and task.success_criteria: if task.success_criteria:
logger.info(f" Success criteria: {task.success_criteria}") logger.info(f" Success criteria: {task.success_criteria}")
# Log the complete task plan # Log the complete task plan
@ -118,11 +154,220 @@ class HandlingTasks:
logger.info(f"AI Response with task plan: {prompt}") logger.info(f"AI Response with task plan: {prompt}")
logger.info("=== END RAW AI TASK PLAN JSON ===") logger.info("=== END RAW AI TASK PLAN JSON ===")
# PHASE 3: Create chat message containing the task plan
await self.createTaskPlanMessage(task_plan, workflow)
return task_plan return task_plan
except Exception as e: except Exception as e:
logger.error(f"Error in generateTaskPlan: {str(e)}") logger.error(f"Error in generateTaskPlan: {str(e)}")
raise raise
async def createTaskPlanMessage(self, task_plan: TaskPlan, workflow):
"""Create a chat message containing the task plan with user-friendly messages"""
try:
# Build task plan summary
task_summary = f"📋 **Task Plan Generated**\n\n"
task_summary += f"**Overview:** {task_plan.overview}\n\n"
task_summary += f"**Total Tasks:** {len(task_plan.tasks)}\n\n"
# Add each task with its user message
for i, task in enumerate(task_plan.tasks):
task_summary += f"**Task {i+1}:** {task.objective}\n"
if task.userMessage:
task_summary += f" 💬 {task.userMessage}\n"
if task.success_criteria:
criteria_str = ', '.join(task.success_criteria)
task_summary += f" ✅ Success Criteria: {criteria_str}\n"
task_summary += "\n"
# Get overall user message from task plan if available
overall_message = task_plan.userMessage
if overall_message:
task_summary += f"**Plan Summary:** {overall_message}\n\n"
# Create workflow message
message_data = {
"workflowId": workflow.id,
"role": "assistant",
"message": task_summary,
"status": "step",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": get_utc_timestamp(),
"documentsLabel": "task_plan",
"documents": [],
# Add workflow context fields
"roundNumber": 1, # Task plan is always round 1
"taskNumber": 0, # Task plan is before individual tasks
"actionNumber": 0
}
message = self.chatInterface.createWorkflowMessage(message_data)
if message:
workflow.messages.append(message)
logger.info(f"Task plan message created with {len(task_plan.tasks)} tasks")
else:
logger.error("Failed to create task plan message")
except Exception as e:
logger.error(f"Error creating task plan message: {str(e)}")
async def createDocumentContextMessage(self, documents: List, workflow):
"""Create a chat message with enhanced document context and workflow labeling"""
try:
from .promptFactory import createDocumentContextPrompt
# Get user language from service
user_language = self.service.user.language if self.service and self.service.user else 'en'
# Get current workflow context and stats
workflow_context = self.service.getWorkflowContext()
workflow_stats = self.service.getWorkflowStats()
# Build context for the document context prompt
context = {
'documents': documents,
'workflow_context': {
'currentRound': workflow_context.get('currentRound', 1),
'totalTasks': workflow_stats.get('totalTasks', 0),
'currentTask': workflow_context.get('currentTask', 0),
'totalActions': workflow_stats.get('totalActions', 0),
'currentAction': workflow_context.get('currentAction', 0),
'workflowStatus': workflow_stats.get('workflowStatus', 'unknown'),
'workflowId': workflow_stats.get('workflowId', 'unknown')
},
'user_language': user_language
}
# Generate enhanced document context using AI
prompt = createDocumentContextPrompt(context)
response = await self.service.callAiTextAdvanced(prompt)
# Parse the AI response
try:
json_start = response.find('{')
json_end = response.find('}') + 1
if json_start != -1 and json_end > 0:
json_str = response[json_start:json_end]
doc_context = json.loads(json_str)
# Build message from AI response
message_text = f"📄 **Document Context**\n\n"
message_text += f"**Summary:** {doc_context.get('documentSummary', 'No summary available')}\n\n"
message_text += f"**Workflow Progress:** {doc_context.get('workflowProgress', 'No progress info')}\n\n"
# Add workflow context information
current_round = workflow_context.get('currentRound', 1)
current_task = workflow_context.get('currentTask', 0)
total_tasks = workflow_stats.get('totalTasks', 0)
current_action = workflow_context.get('currentAction', 0)
total_actions = workflow_stats.get('totalActions', 0)
message_text += f"**Workflow Context:**\n"
message_text += f"- Round: {current_round}\n"
if total_tasks > 0:
message_text += f"- Task: {current_task}/{total_tasks}\n"
else:
message_text += f"- Task: {current_task}\n"
if total_actions > 0:
message_text += f"- Action: {current_action}/{total_actions}\n"
else:
message_text += f"- Action: {current_action}\n"
message_text += f"- Status: {workflow_stats.get('workflowStatus', 'unknown')}\n\n"
# Add overall user message if available
overall_message = doc_context.get('overallUserMessage')
if overall_message:
message_text += f"💬 {overall_message}\n\n"
# Add document details
document_details = doc_context.get('documentDetails', [])
if document_details:
message_text += "**Document Details:**\n"
for doc_detail in document_details:
message_text += f"- {doc_detail.get('workflowLabel', 'Unknown')}: {doc_detail.get('fileName', 'Unknown file')}\n"
user_msg = doc_detail.get('userMessage')
if user_msg:
message_text += f" 💬 {user_msg}\n"
message_text += "\n"
else:
# Fallback if AI response parsing fails
message_text = f"📄 **Document Context**\n\n"
message_text += f"**Total Documents:** {len(documents)}\n\n"
# Add workflow context information even in fallback
current_round = workflow_context.get('currentRound', 1)
current_task = workflow_context.get('currentTask', 0)
total_tasks = workflow_stats.get('totalTasks', 0)
current_action = workflow_context.get('currentAction', 0)
total_actions = workflow_stats.get('totalActions', 0)
message_text += f"**Workflow Context:**\n"
message_text += f"- Round: {current_round}\n"
if total_tasks > 0:
message_text += f"- Task: {current_task}/{total_tasks}\n"
else:
message_text += f"- Task: {current_task}\n"
if total_actions > 0:
message_text += f"- Action: {current_action}/{total_actions}\n"
else:
message_text += f"- Action: {current_action}\n"
message_text += f"- Status: {workflow_stats.get('workflowStatus', 'unknown')}\n\n"
message_text += "Document context information is available for processing."
except Exception as e:
logger.error(f"Error parsing document context AI response: {str(e)}")
# Fallback message with workflow context
message_text = f"📄 **Document Context**\n\n"
message_text += f"**Total Documents:** {len(documents)}\n\n"
# Add workflow context information in fallback
current_round = workflow_context.get('currentRound', 1)
current_task = workflow_context.get('currentTask', 0)
total_tasks = workflow_stats.get('totalTasks', 0)
current_action = workflow_context.get('currentAction', 0)
total_actions = workflow_stats.get('totalActions', 0)
message_text += f"**Workflow Context:**\n"
message_text += f"- Round: {current_round}\n"
if total_tasks > 0:
message_text += f"- Task: {current_task}/{total_tasks}\n"
else:
message_text += f"- Task: {current_task}\n"
if total_actions > 0:
message_text += f"- Action: {current_action}/{total_actions}\n"
else:
message_text += f"- Action: {current_action}\n"
message_text += f"- Status: {workflow_stats.get('workflowStatus', 'unknown')}\n\n"
message_text += "Document context information is available for processing."
# Create workflow message
message_data = {
"workflowId": workflow.id,
"role": "assistant",
"message": message_text,
"status": "step",
"sequenceNr": len(workflow.messages) + 1,
"publishedAt": get_utc_timestamp(),
"documentsLabel": "document_context",
"documents": documents,
# Add workflow context fields
"roundNumber": workflow_context.get('currentRound', 1),
"taskNumber": workflow_context.get('currentTask', 0),
"actionNumber": workflow_context.get('currentAction', 0)
}
message = self.chatInterface.createWorkflowMessage(message_data)
if message:
workflow.messages.append(message)
logger.info(f"Document context message created with {len(documents)} documents")
else:
logger.error("Failed to create document context message")
except Exception as e:
logger.error(f"Error creating document context message: {str(e)}")
async def generateTaskActions(self, task_step, workflow, previous_results=None, enhanced_context=None) -> List[TaskAction]: async def generateTaskActions(self, task_step, workflow, previous_results=None, enhanced_context=None) -> List[TaskAction]:
"""Generate actions for a given task step.""" """Generate actions for a given task step."""
try: try:
@ -134,27 +379,51 @@ class HandlingTasks:
available_docs = self.service.getAvailableDocuments(workflow) available_docs = self.service.getAvailableDocuments(workflow)
available_connections = self.service.getConnectionReferenceList() available_connections = self.service.getConnectionReferenceList()
# Create proper context object for action definition
if enhanced_context and isinstance(enhanced_context, TaskContext):
# Use existing TaskContext if provided
action_context = TaskContext(
task_step=enhanced_context.task_step,
workflow=enhanced_context.workflow,
workflow_id=enhanced_context.workflow_id,
available_documents=enhanced_context.available_documents or available_docs,
available_connections=enhanced_context.available_connections or available_connections,
previous_results=enhanced_context.previous_results or previous_results or [],
previous_handover=enhanced_context.previous_handover,
improvements=enhanced_context.improvements or [],
retry_count=enhanced_context.retry_count or 0,
previous_action_results=enhanced_context.previous_action_results or [],
previous_review_result=enhanced_context.previous_review_result,
is_regeneration=enhanced_context.is_regeneration or False,
failure_patterns=enhanced_context.failure_patterns or [],
failed_actions=enhanced_context.failed_actions or [],
successful_actions=enhanced_context.successful_actions or []
)
else:
# Create new context from scratch
action_context = TaskContext(
task_step=task_step,
workflow=workflow,
workflow_id=workflow.id,
available_documents=available_docs,
available_connections=available_connections,
previous_results=previous_results or [],
previous_handover=None,
improvements=[],
retry_count=0,
previous_action_results=[],
previous_review_result=None,
is_regeneration=False,
failure_patterns=[],
failed_actions=[],
successful_actions=[]
)
context = enhanced_context or TaskContext(
task_step=task_step,
workflow=workflow,
workflow_id=workflow.id,
available_documents=available_docs,
previous_results=previous_results or [],
improvements=[],
retry_count=0,
previous_action_results=[],
previous_review_result=None,
is_regeneration=False,
failure_patterns=[],
failed_actions=[],
successful_actions=[]
)
# Check workflow status before calling AI service # Check workflow status before calling AI service
self._checkWorkflowStopped() self._checkWorkflowStopped()
prompt = await self.service.callAiTextAdvanced( prompt = await self.service.callAiTextAdvanced(
await createActionDefinitionPrompt(context, self.service) await createActionDefinitionPrompt(action_context, self.service)
) )
# Inline parseActionResponse logic here # Inline parseActionResponse logic here
json_start = prompt.find('{') json_start = prompt.find('{')
@ -170,18 +439,20 @@ class HandlingTasks:
if 'actions' not in action_data: if 'actions' not in action_data:
raise ValueError("Action response missing 'actions' field") raise ValueError("Action response missing 'actions' field")
actions = action_data['actions'] actions = action_data['actions']
if not self._validateActions(actions, context): if not self._validateActions(actions, action_context):
logger.error("Generated actions failed validation") logger.error("Generated actions failed validation")
raise Exception("AI-generated actions failed validation - AI is required for action generation") raise Exception("AI-generated actions failed validation - AI is required for action generation")
# Convert to TaskAction objects # Convert to TaskAction objects
task_actions = [self.chatInterface.createTaskAction({ task_actions = [self.createTaskAction({
"execMethod": a.get('method', 'unknown'), "execMethod": a.get('method', 'unknown'),
"execAction": a.get('action', 'unknown'), "execAction": a.get('action', 'unknown'),
"execParameters": a.get('parameters', {}), "execParameters": a.get('parameters', {}),
"execResultLabel": a.get('resultLabel', ''), "execResultLabel": a.get('resultLabel', ''),
"expectedDocumentFormats": a.get('expectedDocumentFormats', None), "expectedDocumentFormats": a.get('expectedDocumentFormats', None),
"status": TaskStatus.PENDING "status": TaskStatus.PENDING,
# Extract user-friendly message if available
"userMessage": a.get('userMessage', None)
}) for a in actions] }) for a in actions]
valid_actions = [ta for ta in task_actions if ta] valid_actions = [ta for ta in task_actions if ta]
@ -214,6 +485,11 @@ class HandlingTasks:
"""Execute all actions for a task step, with state management and retries.""" """Execute all actions for a task step, with state management and retries."""
logger.info(f"=== STARTING TASK {task_index or '?'}: {task_step.objective} ===") logger.info(f"=== STARTING TASK {task_index or '?'}: {task_step.objective} ===")
# Update workflow context for this task
if task_index is not None:
self.service.setWorkflowContext(task_number=task_index)
self.service.incrementWorkflowContext('task')
# Create database log entry for task start in format expected by frontend # Create database log entry for task start in format expected by frontend
if task_index is not None: if task_index is not None:
if total_tasks is not None: if total_tasks is not None:
@ -239,9 +515,17 @@ class HandlingTasks:
"sequenceNr": len(workflow.messages) + 1, "sequenceNr": len(workflow.messages) + 1,
"publishedAt": get_utc_timestamp(), "publishedAt": get_utc_timestamp(),
"documentsLabel": f"task_{task_index}_start", "documentsLabel": f"task_{task_index}_start",
"documents": [] "documents": [],
# Add workflow context fields
"roundNumber": 1, # Task start is always round 1
"taskNumber": task_index,
"actionNumber": 0
} }
# Add user-friendly message if available
if task_step.userMessage:
task_start_message["message"] += f"\n\n💬 {task_step.userMessage}"
message = self.chatInterface.createWorkflowMessage(task_start_message) message = self.chatInterface.createWorkflowMessage(task_start_message)
if message: if message:
workflow.messages.append(message) workflow.messages.append(message)
@ -256,6 +540,10 @@ class HandlingTasks:
# Check workflow status before starting task execution # Check workflow status before starting task execution
self._checkWorkflowStopped() self._checkWorkflowStopped()
# Update retry context with current attempt information
if retry_context:
retry_context.retry_count = attempt + 1
actions = await self.generateTaskActions(task_step, workflow, previous_results=retry_context.previous_results, enhanced_context=retry_context) actions = await self.generateTaskActions(task_step, workflow, previous_results=retry_context.previous_results, enhanced_context=retry_context)
if not actions: if not actions:
logger.error("No actions defined for task step, aborting task execution") logger.error("No actions defined for task step, aborting task execution")
@ -265,13 +553,25 @@ class HandlingTasks:
total_actions = len(actions) total_actions = len(actions)
logger.info(f"Task {task_index or '?'} has {total_actions} actions") logger.info(f"Task {task_index or '?'} has {total_actions} actions")
# Set workflow action total for this task
self.service.setWorkflowTotals(total_actions=total_actions)
# Create document context message if documents are available
available_docs = self.service.getAvailableDocuments(workflow)
if available_docs:
await self.createDocumentContextMessage(available_docs, workflow)
action_results = [] action_results = []
for action_idx, action in enumerate(actions): for action_idx, action in enumerate(actions):
# Check workflow status before each action execution # Check workflow status before each action execution
self._checkWorkflowStopped() self._checkWorkflowStopped()
# Log action start in format expected by frontend # Update workflow context for this action
action_number = action_idx + 1 action_number = action_idx + 1
self.service.setWorkflowContext(action_number=action_number)
self.service.incrementWorkflowContext('action')
# Log action start in format expected by frontend
logger.info(f"Task {task_index} - Starting action {action_number}/{total_actions}") logger.info(f"Task {task_index} - Starting action {action_number}/{total_actions}")
# Create database log entry for action start # Create database log entry for action start
@ -293,6 +593,17 @@ class HandlingTasks:
"documents": [] "documents": []
} }
# Add user-friendly message if available
if action.userMessage:
action_start_message["message"] += f"\n\n💬 {action.userMessage}"
# Add workflow context fields
action_start_message.update({
"roundNumber": 1, # Action start is always round 1
"taskNumber": task_index,
"actionNumber": action_number
})
message = self.chatInterface.createWorkflowMessage(action_start_message) message = self.chatInterface.createWorkflowMessage(action_start_message)
if message: if message:
workflow.messages.append(message) workflow.messages.append(message)
@ -340,9 +651,17 @@ class HandlingTasks:
"sequenceNr": len(workflow.messages) + 1, "sequenceNr": len(workflow.messages) + 1,
"publishedAt": get_utc_timestamp(), "publishedAt": get_utc_timestamp(),
"documentsLabel": f"task_{task_index}_completion", "documentsLabel": f"task_{task_index}_completion",
"documents": [] "documents": [],
# Add workflow context fields
"roundNumber": 1, # Task completion is always round 1
"taskNumber": task_index,
"actionNumber": 0
} }
# Add user-friendly message if available
if task_step.userMessage:
task_completion_message["message"] += f"\n\n💬 {task_step.userMessage}"
message = self.chatInterface.createWorkflowMessage(task_completion_message) message = self.chatInterface.createWorkflowMessage(task_completion_message)
if message: if message:
workflow.messages.append(message) workflow.messages.append(message)
@ -358,14 +677,18 @@ class HandlingTasks:
elif review_result.status == 'retry' and state.canRetry(): elif review_result.status == 'retry' and state.canRetry():
logger.warning(f"Task step '{task_step.objective}' requires retry: {review_result.improvements}") logger.warning(f"Task step '{task_step.objective}' requires retry: {review_result.improvements}")
state.incrementRetryCount() state.incrementRetryCount()
retry_context.retry_count = state.retry_count
retry_context.improvements = review_result.improvements # Update retry context with retry information
retry_context.previous_action_results = action_results if retry_context:
retry_context.previous_review_result = review_result retry_context.retry_count = state.retry_count
retry_context.is_regeneration = True retry_context.improvements = review_result.improvements
retry_context.failure_patterns = state.getFailurePatterns() retry_context.previous_action_results = action_results
retry_context.failed_actions = state.failed_actions retry_context.previous_review_result = review_result
retry_context.successful_actions = state.successful_actions retry_context.is_regeneration = True
retry_context.failure_patterns = state.getFailurePatterns()
retry_context.failed_actions = state.failed_actions
retry_context.successful_actions = state.successful_actions
continue continue
else: else:
logger.error(f"=== TASK {task_index or '?'} FAILED: {task_step.objective} after {attempt+1} attempts ===") logger.error(f"=== TASK {task_index or '?'} FAILED: {task_step.objective} after {attempt+1} attempts ===")
@ -395,7 +718,11 @@ class HandlingTasks:
"actionMethod": "task", "actionMethod": "task",
"actionName": "task_retry", "actionName": "task_retry",
"documentsLabel": None, "documentsLabel": None,
"documents": [] "documents": [],
# Add workflow context fields
"roundNumber": 1, # Task retry is always round 1
"taskNumber": task_index,
"actionNumber": 0
} }
try: try:
@ -442,7 +769,11 @@ class HandlingTasks:
"actionMethod": "task", "actionMethod": "task",
"actionName": "task_failure", "actionName": "task_failure",
"documentsLabel": None, "documentsLabel": None,
"documents": [] "documents": [],
# Add workflow context fields
"roundNumber": 1, # Task failure is always round 1
"taskNumber": task_index,
"actionNumber": 0
} }
try: try:
@ -468,10 +799,11 @@ class HandlingTasks:
# Check workflow status before reviewing task completion # Check workflow status before reviewing task completion
self._checkWorkflowStopped() self._checkWorkflowStopped()
# Create proper context object for result review
review_context = ReviewContext( review_context = ReviewContext(
task_step=task_step, task_step=task_step,
task_actions=task_actions,
action_results=action_results, action_results=action_results,
workflow=workflow,
step_result={ step_result={
'successful_actions': sum(1 for result in action_results if result.success), 'successful_actions': sum(1 for result in action_results if result.success),
'total_actions': len(action_results), 'total_actions': len(action_results),
@ -480,18 +812,21 @@ class HandlingTasks:
'documents': [ 'documents': [
{ {
'action_index': i, 'action_index': i,
'documents_count': len(result.documents) if hasattr(result, 'documents') and result.documents else 0, 'documents_count': len(result.documents) if result.documents else 0,
'documents': result.documents if hasattr(result, 'documents') and result.documents else [] 'documents': result.documents if result.documents else []
} }
for i, result in enumerate(action_results) for i, result in enumerate(action_results)
] ]
} },
workflow_id=workflow.id,
previous_results=[]
) )
# Check workflow status before calling AI service # Check workflow status before calling AI service
self._checkWorkflowStopped() self._checkWorkflowStopped()
# Use promptFactory for review prompt # Use promptFactory for review prompt
prompt = await createResultReviewPrompt(review_context) prompt = createResultReviewPrompt(review_context, self.service)
response = await self.service.callAiTextAdvanced(prompt) response = await self.service.callAiTextAdvanced(prompt)
# Inline parseReviewResponse logic here # Inline parseReviewResponse logic here
json_start = response.find('{') json_start = response.find('{')
@ -535,7 +870,9 @@ class HandlingTasks:
missing_outputs=[], missing_outputs=[],
met_criteria=met_criteria, met_criteria=met_criteria,
unmet_criteria=unmet_criteria, unmet_criteria=unmet_criteria,
confidence=review.get('confidence', 0.5) confidence=review.get('confidence', 0.5),
# Extract user-friendly message if available
userMessage=review.get('userMessage', None)
) )
# Enhanced validation logging # Enhanced validation logging
@ -566,20 +903,14 @@ class HandlingTasks:
self._checkWorkflowStopped() self._checkWorkflowStopped()
# Log handover status summary # Log handover status summary
if hasattr(review_result, 'status'): status = review_result.status if review_result else 'unknown'
status = review_result.status met = review_result.met_criteria if review_result and review_result.met_criteria else []
if hasattr(review_result, 'met_criteria'):
met = review_result.met_criteria
else:
met = []
handover_data = { handover_data = {
'task_id': task_step.id, 'task_id': task_step.id,
'task_description': task_step.objective, 'task_description': task_step.objective,
'actions': [action.to_dict() for action in task_actions], 'actions': [action.to_dict() for action in task_actions],
'review_result': review_result.to_dict() if hasattr(review_result, 'to_dict') else review_result, 'review_result': review_result.to_dict(),
'workflow_id': workflow.id, 'workflow_id': workflow.id,
'handover_time': get_utc_timestamp() 'handover_time': get_utc_timestamp()
} }
@ -589,6 +920,53 @@ class HandlingTasks:
logger.error(f"Error in prepareTaskHandover: {str(e)}") logger.error(f"Error in prepareTaskHandover: {str(e)}")
return {'error': str(e)} return {'error': str(e)}
def createTaskAction(self, actionData: Dict[str, Any]) -> 'TaskAction':
"""Creates a new task action."""
try:
# Ensure ID is present
if "id" not in actionData or not actionData["id"]:
actionData["id"] = f"action_{uuid.uuid4()}"
# Ensure required fields
if "status" not in actionData:
actionData["status"] = TaskStatus.PENDING
if "execMethod" not in actionData:
logger.error("execMethod is required for task action")
return None
if "execAction" not in actionData:
logger.error("execAction is required for task action")
return None
if "execParameters" not in actionData:
actionData["execParameters"] = {}
# Create action in database
createdAction = self.chatInterface.db.recordCreate("taskActions", actionData)
# Convert to TaskAction model
return TaskAction(
id=createdAction["id"],
execMethod=createdAction["execMethod"],
execAction=createdAction["execAction"],
execParameters=createdAction.get("execParameters", {}),
execResultLabel=createdAction.get("execResultLabel"),
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
status=createdAction.get("status", TaskStatus.PENDING),
error=createdAction.get("error"),
retryCount=createdAction.get("retryCount", 0),
retryMax=createdAction.get("retryMax", 3),
processingTime=createdAction.get("processingTime"),
timestamp=float(createdAction.get("timestamp", get_utc_timestamp())),
result=createdAction.get("result"),
resultDocuments=createdAction.get("resultDocuments", [])
)
except Exception as e:
logger.error(f"Error creating task action: {str(e)}")
return None
# --- Helper action handling methods --- # --- Helper action handling methods ---
async def executeSingleAction(self, action, workflow, task_step, task_index=None, action_index=None, total_actions=None): async def executeSingleAction(self, action, workflow, task_step, task_index=None, action_index=None, total_actions=None):
@ -638,9 +1016,9 @@ class HandlingTasks:
if result.documents and len(result.documents) > 0: if result.documents and len(result.documents) > 0:
# Try to get text content from the first document # Try to get text content from the first document
first_doc = result.documents[0] first_doc = result.documents[0]
if hasattr(first_doc, 'documentData') and isinstance(first_doc.documentData, dict): if isinstance(first_doc.documentData, dict):
action.result = first_doc.documentData.get("result", "") action.result = first_doc.documentData.get("result", "")
elif hasattr(first_doc, 'documentData') and isinstance(first_doc.documentData, str): elif isinstance(first_doc.documentData, str):
action.result = first_doc.documentData action.result = first_doc.documentData
# Preserve the action's execResultLabel for document routing # Preserve the action's execResultLabel for document routing
# Action methods should NOT return resultLabel - this is managed by the action handler # Action methods should NOT return resultLabel - this is managed by the action handler
@ -670,24 +1048,14 @@ class HandlingTasks:
if created_documents: if created_documents:
logger.info(f"Output documents ({len(created_documents)}):") logger.info(f"Output documents ({len(created_documents)}):")
for i, doc in enumerate(created_documents): for i, doc in enumerate(created_documents):
if hasattr(doc, 'filename'): logger.info(f" {i+1}. {doc.fileName}")
logger.info(f" {i+1}. {doc.filename}")
elif isinstance(doc, dict) and 'filename' in doc:
logger.info(f" {i+1}. {doc['filename']}")
else:
logger.info(f" {i+1}. {type(doc).__name__}")
# Log document details for debugging # Log document details for debugging
logger.info("Document details:") logger.info("Document details:")
for i, doc in enumerate(created_documents): for i, doc in enumerate(created_documents):
if hasattr(doc, 'filename'): logger.info(f" Doc {i+1}: fileName={doc.fileName}, type={type(doc)}")
logger.info(f" Doc {i+1}: filename={doc.filename}, type={type(doc)}") logger.info(f" ID: {doc.id}")
if hasattr(doc, 'id'): logger.info(f" File ID: {doc.fileId}")
logger.info(f" ID: {doc.id}")
if hasattr(doc, 'fileId'):
logger.info(f" File ID: {doc.fileId}")
elif isinstance(doc, dict):
logger.info(f" Doc {i+1}: dict with keys: {list(doc.keys())}")
else: else:
logger.info("Output: No documents created") logger.info("Output: No documents created")
else: else:
@ -716,7 +1084,7 @@ class HandlingTasks:
# Preserve the original documents field from the method result # Preserve the original documents field from the method result
# This ensures the standard document format is maintained # This ensures the standard document format is maintained
original_documents = result.documents if hasattr(result, 'documents') else [] original_documents = result.documents
# Extract result text from documents if available # Extract result text from documents if available
result_text = self._extractResultText(result) result_text = self._extractResultText(result)
@ -756,23 +1124,95 @@ class HandlingTasks:
else: else:
logger.info(f"Result label: {result_label} - No documents") logger.info(f"Result label: {result_label} - No documents")
# Get current workflow context and stats
workflow_context = self.service.getWorkflowContext()
workflow_stats = self.service.getWorkflowStats()
# Create a more meaningful message that includes task context # Create a more meaningful message that includes task context
task_objective = task_step.objective if task_step else 'Unknown task' task_objective = task_step.objective if task_step else 'Unknown task'
# Build a user-friendly message based on success/failure # Build a user-friendly message based on success/failure
if result.success: if result.success:
if created_documents and len(created_documents) > 0: if created_documents and len(created_documents) > 0:
doc_names = [doc.filename if hasattr(doc, 'filename') else str(doc) for doc in created_documents[:3]] doc_names = [doc.fileName for doc in created_documents[:3]]
if len(created_documents) > 3: if len(created_documents) > 3:
doc_names.append(f"... and {len(created_documents) - 3} more") doc_names.append(f"... and {len(created_documents) - 3} more")
message_text = f"✅ Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} completed\n\nObjective: {task_objective}\n\nGenerated {len(created_documents)} document(s): {', '.join(doc_names)}" # Enhanced message with workflow context
message_text = f"✅ **Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} Completed**\n\n"
message_text += f"**Objective:** {task_objective}\n\n"
message_text += f"**Generated {len(created_documents)} document(s):** {', '.join(doc_names)}\n\n"
message_text += f"**Result Label:** {result_label}\n"
# Add comprehensive workflow context
current_round = workflow_context.get('currentRound', 1)
current_task = workflow_context.get('currentTask', 0)
total_tasks = workflow_stats.get('totalTasks', 0)
current_action = workflow_context.get('currentAction', 0)
total_actions = workflow_stats.get('totalActions', 0)
message_text += f"**Workflow Context:**\n"
message_text += f"- Round: {current_round}\n"
if total_tasks > 0:
message_text += f"- Task: {current_task}/{total_tasks}\n"
else:
message_text += f"- Task: {current_task}\n"
if total_actions > 0:
message_text += f"- Action: {current_action}/{total_actions}\n"
else:
message_text += f"- Action: {current_action}\n"
message_text += f"- Status: {workflow_stats.get('workflowStatus', 'unknown')}"
else: else:
message_text = f"✅ Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} completed\n\nObjective: {task_objective}\n\nAction executed successfully" message_text = f"✅ **Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} Completed**\n\n"
message_text += f"**Objective:** {task_objective}\n\n"
message_text += "**Action executed successfully**\n\n"
message_text += f"**Result Label:** {result_label}\n"
# Add comprehensive workflow context
current_round = workflow_context.get('currentRound', 1)
current_task = workflow_context.get('currentTask', 0)
total_tasks = workflow_stats.get('totalTasks', 0)
current_action = workflow_context.get('currentAction', 0)
total_actions = workflow_stats.get('totalActions', 0)
message_text += f"**Workflow Context:**\n"
message_text += f"- Round: {current_round}\n"
if total_tasks > 0:
message_text += f"- Task: {current_task}/{total_tasks}\n"
else:
message_text += f"- Task: {current_task}\n"
if total_actions > 0:
message_text += f"- Action: {current_action}/{total_actions}\n"
else:
message_text += f"- Action: {current_action}\n"
message_text += f"- Status: {workflow_stats.get('workflowStatus', 'unknown')}"
else: else:
# ⚠️ FAILURE MESSAGE - Show error details to user # ⚠️ FAILURE MESSAGE - Show error details to user
error_details = result.error if result.error else "Unknown error occurred" error_details = result.error if result.error else "Unknown error occurred"
message_text = f"❌ Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} failed\n\nObjective: {task_objective}\n\nError: {error_details}\n\nPlease check the connection and try again." message_text = f"❌ **Task {task_index or '?'} - Action {action.execMethod}.{action.execAction} Failed**\n\n"
message_text += f"**Objective:** {task_objective}\n\n"
message_text += f"**Error:** {error_details}\n\n"
message_text += f"**Result Label:** {result_label}\n"
# Add comprehensive workflow context
current_round = workflow_context.get('currentRound', 1)
current_task = workflow_context.get('currentTask', 0)
total_tasks = workflow_stats.get('totalTasks', 0)
current_action = workflow_context.get('currentAction', 0)
total_actions = workflow_stats.get('totalActions', 0)
message_text += f"**Workflow Context:**\n"
message_text += f"- Round: {current_round}\n"
if total_tasks > 0:
message_text += f"- Task: {current_task}/{total_tasks}\n"
else:
message_text += f"- Task: {current_task}\n"
if total_actions > 0:
message_text += f"- Action: {current_action}/{total_actions}\n"
else:
message_text += f"- Action: {current_action}\n"
message_text += f"- Status: {workflow_stats.get('workflowStatus', 'unknown')}\n\n"
message_text += "Please check the connection and try again."
message_data = { message_data = {
"workflowId": workflow.id, "workflowId": workflow.id,
@ -785,9 +1225,20 @@ class HandlingTasks:
"actionMethod": action.execMethod, "actionMethod": action.execMethod,
"actionName": action.execAction, "actionName": action.execAction,
"documentsLabel": result_label, "documentsLabel": result_label,
"documents": created_documents "documents": created_documents,
# Add workflow context fields
"roundNumber": workflow_context.get('currentRound', 1),
"taskNumber": task_index,
"actionNumber": workflow_context.get('currentAction', 0)
} }
# Add user-friendly message if available
if action.userMessage:
if result.success:
message_data["message"] += f"\n\n💬 {action.userMessage}"
else:
message_data["message"] += f"\n\n💬 Action was intended to: {action.userMessage}"
# Add debugging for error messages # Add debugging for error messages
if not result.success: if not result.success:
logger.info(f"Creating ERROR message: {message_text}") logger.info(f"Creating ERROR message: {message_text}")
@ -884,8 +1335,8 @@ class HandlingTasks:
logger.error(f"Action {i} missing required fields: {missing_fields}") logger.error(f"Action {i} missing required fields: {missing_fields}")
return False return False
result_label = action.get('resultLabel', '') result_label = action.get('resultLabel', '')
if not result_label.startswith('task'): if not result_label.startswith('round'):
logger.error(f"Action {i} result label must start with 'task': {result_label}") logger.error(f"Action {i} result label must start with 'round': {result_label}")
return False return False
parameters = action.get('parameters', {}) parameters = action.get('parameters', {})
if not isinstance(parameters, dict): if not isinstance(parameters, dict):
@ -904,9 +1355,8 @@ class HandlingTasks:
# Try to get text content from the first document # Try to get text content from the first document
first_doc = result.documents[0] first_doc = result.documents[0]
if hasattr(first_doc, 'documentData') and isinstance(first_doc.documentData, dict): if isinstance(first_doc.documentData, dict):
return first_doc.documentData.get("result", "") return first_doc.documentData.get("result", "")
elif hasattr(first_doc, 'documentData') and isinstance(first_doc.documentData, str): elif isinstance(first_doc.documentData, str):
return first_doc.documentData return first_doc.documentData
else: return ""
return ""

View file

@ -4,19 +4,29 @@
import json import json
import logging import logging
from typing import Any, Dict from typing import Any, Dict
from modules.interfaces.interfaceChatModel import TaskContext, ReviewContext
# Set up logger # Set up logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Prompt creation helpers extracted from managerChat.py # Prompt creation helpers extracted from managerChat.py
def createTaskPlanningPrompt(context: Dict[str, Any]) -> str: def createTaskPlanningPrompt(context: TaskContext, service) -> str:
"""Create prompt for task planning""" """Create enhanced prompt for task planning with user-friendly message generation"""
return f"""You are a task planning AI that analyzes user requests and creates structured task plans. # Get user language directly from service.user.language
user_language = service.user.language if service and service.user else 'en'
# Extract user request from context - use Pydantic model directly
user_request = context.task_step.objective if context.task_step else 'No request specified'
# Extract available documents from context - use Pydantic model directly
available_documents = context.available_documents or []
return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages.
USER REQUEST: {context['user_request']} USER REQUEST: {user_request}
AVAILABLE DOCUMENTS: {', '.join(context['available_documents'])} AVAILABLE DOCUMENTS: {', '.join(available_documents)}
INSTRUCTIONS: INSTRUCTIONS:
1. Analyze the user request and available documents 1. Analyze the user request and available documents
@ -24,7 +34,8 @@ INSTRUCTIONS:
3. Focus on business outcomes, not technical operations 3. Focus on business outcomes, not technical operations
4. Each task should produce meaningful, usable outputs 4. Each task should produce meaningful, usable outputs
5. Ensure proper handover between tasks using result labels 5. Ensure proper handover between tasks using result labels
6. Return a JSON object with the exact structure shown below 6. Generate user-friendly messages for each task in the user's language ({user_language})
7. Return a JSON object with the exact structure shown below
TASK PLANNING PRINCIPLES: TASK PLANNING PRINCIPLES:
- Break down complex requests into logical, sequential steps - Break down complex requests into logical, sequential steps
@ -32,51 +43,45 @@ TASK PLANNING PRINCIPLES:
- Keep tasks at a meaningful level of abstraction - Keep tasks at a meaningful level of abstraction
- Each task should produce results that can be used by subsequent tasks - Each task should produce results that can be used by subsequent tasks
- Ensure clear dependencies and handovers between tasks - Ensure clear dependencies and handovers between tasks
- Provide clear, actionable user messages in the user's language ({user_language})
REQUIRED JSON STRUCTURE: REQUIRED JSON STRUCTURE:
{{ {{
\"overview\": \"Brief description of the overall plan\", "overview": "Brief description of the overall plan",
\"tasks\": [ "userMessage": "User-friendly message explaining the task plan in {user_language}",
"tasks": [
{{ {{
\"id\": \"task_1\", "id": "task_1",
\"objective\": \"Clear business objective this task accomplishes\", "objective": "Clear business objective this task accomplishes",
\"dependencies\": [\"task_0\"], // IDs of tasks that must complete first "dependencies": ["task_0"], // IDs of tasks that must complete first
\"success_criteria\": [\"criteria1\", \"criteria2\"], "success_criteria": ["criteria1", "criteria2"],
\"estimated_complexity\": \"low|medium|high\" "estimated_complexity": "low|medium|high",
"userMessage": "User-friendly message explaining what this task will accomplish in {user_language}"
}} }}
] ]
}} }}
EXAMPLES OF GOOD TASK OBJECTIVES: EXAMPLES OF GOOD TASK OBJECTIVES:
- \"Analyze documents and extract key insights for business communication\" - "Analyze documents and extract key insights for business communication"
- \"Create professional business communication incorporating analyzed information\" - "Create professional business communication incorporating analyzed information"
- \"Execute business communication using specified channels\" - "Execute business communication using specified channels"
- \"Document and store all business communication outcomes\" - "Document and store all business communication outcomes"
EXAMPLES OF GOOD SUCCESS CRITERIA: EXAMPLES OF GOOD SUCCESS CRITERIA:
- \"Key insights extracted and ready for business use\" - "Key insights extracted and ready for business use"
- \"Professional communication created with clear business value\" - "Professional communication created with clear business value"
- \"Business communication successfully delivered\" - "Business communication successfully delivered"
- \"All outcomes properly documented and accessible\" - "All outcomes properly documented and accessible"
EXAMPLES OF BAD TASK OBJECTIVES: EXAMPLES OF BAD TASK OBJECTIVES:
- \"Read the PDF file\" (too granular - should be \"Analyze document content\") - "Read the PDF file" (too granular - should be "Analyze document content")
- \"Convert data to CSV\" (implementation detail - should be \"Structure data for analysis\") - "Convert data to CSV" (implementation detail - should be "Structure data for analysis")
- \"Send email\" (too specific - should be \"Deliver business communication\") - "Send email" (too specific - should be "Deliver business communication")
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
async def createActionDefinitionPrompt(context, service) -> str: async def createActionDefinitionPrompt(context: TaskContext, service) -> str:
"""Create prompt for action generation with enhanced document extraction guidance and retry context""" """Create enhanced prompt for action generation with user-friendly messages and enhanced document context"""
task_step = context.task_step
workflow = context.workflow
available_docs = context.available_documents or []
previous_results = context.previous_results or []
improvements = context.improvements or []
retry_count = context.retry_count or 0
previous_action_results = context.previous_action_results or []
previous_review_result = context.previous_review_result
previous_handover = getattr(context, 'previous_handover', None)
methodList = service.getMethodsList() methodList = service.getMethodsList()
method_actions = {} method_actions = {}
for sig in methodList: for sig in methodList:
@ -84,9 +89,12 @@ async def createActionDefinitionPrompt(context, service) -> str:
method, rest = sig.split('.', 1) method, rest = sig.split('.', 1)
action = rest.split('(')[0] action = rest.split('(')[0]
method_actions.setdefault(method, []).append((action, sig)) method_actions.setdefault(method, []).append((action, sig))
messageSummary = await service.summarizeChat(workflow.messages)
# Get ALL documents from the entire workflow, not just current round messageSummary = await service.summarizeChat(context.workflow.messages) if context.workflow else ""
docRefs = service.getDocumentReferenceList()
# Get enhanced document context using the new method
available_documents_str = service.getEnhancedDocumentContext()
connRefs = service.getConnectionReferenceList() connRefs = service.getConnectionReferenceList()
# Debug logging for connections # Debug logging for connections
@ -94,82 +102,60 @@ async def createActionDefinitionPrompt(context, service) -> str:
logging.debug(f"Connection references type: {type(connRefs)}") logging.debug(f"Connection references type: {type(connRefs)}")
logging.debug(f"Connection references length: {len(connRefs) if connRefs else 0}") logging.debug(f"Connection references length: {len(connRefs) if connRefs else 0}")
# Get documents from current round (chat) and entire workflow history
current_round_docs = docRefs.get('chat', [])
workflow_history_docs = docRefs.get('history', [])
# Combine all documents, prioritizing current round first, then workflow history
all_doc_refs = current_round_docs + workflow_history_docs
# Log document availability for debugging # Log document availability for debugging
logging.debug(f"Document references - Current round: {len(current_round_docs)}, Workflow history: {len(workflow_history_docs)}, Total: {len(all_doc_refs)}") logging.debug(f"Enhanced document context length: {len(available_documents_str)}")
available_methods_str = '' available_methods_str = ''
for method, actions in method_actions.items(): for method, actions in method_actions.items():
available_methods_str += f"- {method}:\n" available_methods_str += f"- {method}:\n"
for action, sig in actions: for action, sig in actions:
available_methods_str += f" - {action}: {sig}\n" available_methods_str += f" - {action}: {sig}\n"
retry_context = "" retry_context = ""
if retry_count > 0: if context.retry_count and context.retry_count > 0:
retry_context = f""" retry_context = f"""
RETRY CONTEXT (Attempt {retry_count}): RETRY CONTEXT (Attempt {context.retry_count}):
Previous action results that failed or were incomplete: Previous action results that failed or were incomplete:
""" """
for i, result in enumerate(previous_action_results): for i, result in enumerate(context.previous_action_results or []):
retry_context += f"- Action {i+1}: ActionResult\n" retry_context += f"- Action {i+1}: ActionResult\n"
retry_context += f" Status: {result.success and 'success' or 'failed'}\n" retry_context += f" Status: {result.success and 'success' or 'failed'}\n"
retry_context += f" Error: {result.error or 'None'}\n" retry_context += f" Error: {result.error or 'None'}\n"
# Check if result has documents and show document info # Check if result has documents and show document info
if hasattr(result, 'documents') and result.documents: if result.documents:
doc_info = f"Documents: {len(result.documents)} document(s)" doc_info = f"Documents: {len(result.documents)} document(s)"
if result.documents[0].documentName: if result.documents[0].documentName:
doc_info += f" - {result.documents[0].documentName}" doc_info += f" - {result.documents[0].documentName}"
retry_context += f" {doc_info}\n" retry_context += f" {doc_info}\n"
else: else:
retry_context += f" Documents: None\n" retry_context += f" Documents: None\n"
if previous_review_result:
if context.previous_review_result:
retry_context += f""" retry_context += f"""
Previous review feedback: Previous review feedback:
- Status: {previous_review_result.status or 'unknown'} - Status: {context.previous_review_result.get('status', 'unknown') or 'unknown'}
- Reason: {previous_review_result.reason or 'No reason provided'} - Reason: {context.previous_review_result.get('reason', 'No reason provided') or 'No reason provided'}
- Quality Score: {previous_review_result.quality_score or 0}/10 - Quality Score: {context.previous_review_result.get('quality_score', 0) or 0}/10
- Unmet Criteria: {', '.join(previous_review_result.unmet_criteria or [])} - Unmet Criteria: {', '.join(context.previous_review_result.get('unmet_criteria', []) or [])}
""" """
success_criteria_str = ', '.join(task_step.success_criteria or [])
previous_results_str = ', '.join(previous_results) if previous_results else 'None'
improvements_str = str(improvements) if improvements else 'None'
available_connections_str = '\n'.join(f"- {conn}" for conn in connRefs)
# Build comprehensive document list showing both current round and workflow history
if all_doc_refs:
available_documents_str = "CURRENT ROUND DOCUMENTS:\n"
if current_round_docs:
for doc in current_round_docs:
available_documents_str += f"- {doc.documentsLabel} contains {', '.join(doc.documents)}\n"
else:
available_documents_str += "- No documents in current round\n"
available_documents_str += "\nWORKFLOW HISTORY DOCUMENTS:\n"
if workflow_history_docs:
for doc in workflow_history_docs:
available_documents_str += f"- {doc.documentsLabel} contains {', '.join(doc.documents)}\n"
else:
available_documents_str += "- No documents in workflow history\n"
else:
available_documents_str = "NO DOCUMENTS AVAILABLE - This workflow has no documents to process."
# Debug logging for document availability # Use Pydantic model directly - no need for getattr
logging.debug(f"Available documents string length: {len(available_documents_str)}") success_criteria_str = ', '.join(context.task_step.success_criteria) if context.task_step and context.task_step.success_criteria else 'No criteria specified'
logging.debug(f"Current round docs count: {len(current_round_docs)}") previous_results_str = ', '.join(context.previous_results) if context.previous_results else 'None'
logging.debug(f"Workflow history docs count: {len(workflow_history_docs)}") improvements_str = str(context.improvements) if context.improvements else 'None'
logging.debug(f"Total doc refs: {len(all_doc_refs)}") available_connections_str = '\n'.join(f"- {conn}" for conn in connRefs)
# Get user language from service - this is the correct way
user_language = service.user.language if service and service.user else 'en'
prompt = f""" prompt = f"""
You are an action generation AI that creates specific actions to accomplish a task step. You are an action generation AI that creates specific actions to accomplish a task step with user-friendly messages.
DOCUMENT REFERENCE TYPES: DOCUMENT REFERENCE TYPES:
- docItem: Reference to a single document. Format: "docItem:<id>:<filename>" - docItem: Reference to a single document. Format: "docItem:<id>:<label>"
- docList: Reference to a group of documents under a label. Format: <label> (e.g., "task1_action2_results"). - docList: Reference to a group of documents under a label. Format: <label> (e.g., "round1_task2_action3_results").
- Each docList label maps to a list of docItem references (see AVAILABLE DOCUMENTS). - Each docList label maps to a list of docItem references (see AVAILABLE DOCUMENTS).
- A label like "task1_action2_results" refers to the output of action 2 in task 1. - A label like "round1_task2_action3_results" refers to the output of action 3 in task 2 of round 1.
CRITICAL DOCUMENT REFERENCE RULES: CRITICAL DOCUMENT REFERENCE RULES:
- ONLY use the exact labels listed in AVAILABLE DOCUMENTS below - ONLY use the exact labels listed in AVAILABLE DOCUMENTS below
@ -179,7 +165,8 @@ CRITICAL DOCUMENT REFERENCE RULES:
- When generating multiple actions, you may only use as input documents those that are already present in AVAILABLE DOCUMENTS or produced by actions that come earlier in the list. Do NOT use as input any document label that will be produced by a later action. - When generating multiple actions, you may only use as input documents those that are already present in AVAILABLE DOCUMENTS or produced by actions that come earlier in the list. Do NOT use as input any document label that will be produced by a later action.
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you CANNOT create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed. - If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you CANNOT create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed.
TASK STEP: {task_step.objective} (ID: {task_step.id}) TASK STEP: {context.task_step.objective if context.task_step else 'No task step specified'} (ID: {context.task_step.id if context.task_step else 'unknown'})
SUCCESS CRITERIA: {success_criteria_str} SUCCESS CRITERIA: {success_criteria_str}
CONTEXT - Chat History: CONTEXT - Chat History:
@ -206,10 +193,10 @@ AVAILABLE DOCUMENTS:
DOCUMENT REFERENCE EXAMPLES: DOCUMENT REFERENCE EXAMPLES:
CORRECT: Use exact labels from AVAILABLE DOCUMENTS above CORRECT: Use exact labels from AVAILABLE DOCUMENTS above
- "task2_action1_personnel_search" - "round1_task2_action3_results"
- "task2_action3_personnel_analysis" - "round1_task1_action1_input"
- "docItem:doc_abc:file1.txt" - "docItem:doc_abc:round1_task2_action3_webpage_content.html"
- "docList:msg123:user_uploads" (supported format, but use actual labels instead) - "docList:msg123:round1_task2_action3_results" (supported format, but use actual labels instead)
INCORRECT: These will cause errors INCORRECT: These will cause errors
- "msg_xxx:documents" (invalid format - missing docList/docItem prefix) - "msg_xxx:documents" (invalid format - missing docList/docItem prefix)
@ -220,7 +207,7 @@ PREVIOUS RESULTS: {previous_results_str}
IMPROVEMENTS NEEDED: {improvements_str} IMPROVEMENTS NEEDED: {improvements_str}
PREVIOUS TASK HANDOVER CONTEXT: PREVIOUS TASK HANDOVER CONTEXT:
{previous_handover.workflowSummary if previous_handover and previous_handover.workflowSummary else 'No previous task handover available'} {context.previous_handover.workflowSummary if context.previous_handover and context.previous_handover.workflowSummary else 'No previous task handover available'}
{retry_context} {retry_context}
@ -236,10 +223,13 @@ ACTION GENERATION PRINCIPLES:
- If this is a retry, learn from previous failures and improve the approach - If this is a retry, learn from previous failures and improve the approach
- Address specific issues mentioned in previous review feedback - Address specific issues mentioned in previous review feedback
- When specifying expectedDocumentFormats, ensure AI prompts explicitly request pure data without markdown formatting - When specifying expectedDocumentFormats, ensure AI prompts explicitly request pure data without markdown formatting
- Generate user-friendly messages for each action in the user's language ({user_language})
USER LANGUAGE: {user_language} - All user messages must be generated in this language.
DOCUMENT ROUTING GUIDANCE: DOCUMENT ROUTING GUIDANCE:
- Each action should produce documents with a clear resultLabel for routing - Each action should produce documents with a clear resultLabel for routing
- Use consistent naming: "task{{task_id}}_action{{action_number}}_{{descriptive_label}}" - Use consistent naming: "round{{round_number}}_task{{task_id}}_action{{action_number}}_{{descriptive_label}}"
- Ensure document flow: Action A produces documents that Action B can consume - Ensure document flow: Action A produces documents that Action B can consume
- Document labels should be descriptive of content, not just "results" or "output" - Document labels should be descriptive of content, not just "results" or "output"
- Consider what subsequent actions will need and structure outputs accordingly - Consider what subsequent actions will need and structure outputs accordingly
@ -249,11 +239,12 @@ INSTRUCTIONS:
- Use docItem for single documents and docList labels for groups of documents as shown in AVAILABLE DOCUMENTS - Use docItem for single documents and docList labels for groups of documents as shown in AVAILABLE DOCUMENTS
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you cannot create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed. - If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", you cannot create document extraction actions. Instead, create actions that generate new content or inform the user that documents are needed.
- Always pass documentList as a LIST of references (docItem and/or docList) - this list CANNOT be empty for document extraction actions - Always pass documentList as a LIST of references (docItem and/or docList) - this list CANNOT be empty for document extraction actions
- For resultLabel, use the format: "task{{task_id}}_action{{action_number}}_{{short_label}}" where: - For resultLabel, use the format: "round{{round_number}}_task{{task_id}}_action{{action_number}}_{{short_label}}" where:
- {{round_number}} = the current round number (e.g., 1)
- {{task_id}} = the current task's id (e.g., 1) - {{task_id}} = the current task's id (e.g., 1)
- {{action_number}} = the sequence number of the action within the task (e.g., 2) - {{action_number}} = the sequence number of the action within the task (e.g., 2)
- {{short_label}} = a short, descriptive label for the output (e.g., "analysis_results") - {{short_label}} = a short, descriptive label for the output (e.g., "analysis_results")
Example: "task1_action2_analysis_results" Example: "round1_task1_action2_analysis_results"
- If this is a retry, ensure the new actions address the specific issues from previous attempts - If this is a retry, ensure the new actions address the specific issues from previous attempts
- Follow the JSON structure below. All fields are required. - Follow the JSON structure below. All fields are required.
@ -264,10 +255,10 @@ REQUIRED JSON STRUCTURE:
"method": "method_name", // Use only the method name (e.g., "document") "method": "method_name", // Use only the method name (e.g., "document")
"action": "action_name", // Use only the action name (e.g., "extract") "action": "action_name", // Use only the action name (e.g., "extract")
"parameters": {{ "parameters": {{
"documentList": ["docItem:doc_abc:file1.txt", "task1_action2_results"], "documentList": ["docItem:doc_abc:round1_task1_action2_results", "round1_task1_action1_input"],
"aiPrompt": "Comprehensive AI prompt describing what to accomplish" "aiPrompt": "Comprehensive AI prompt describing what to accomplish"
}}, }},
"resultLabel": "task1_action3_analysis_results", "resultLabel": "round1_task1_action3_analysis_results",
"expectedDocumentFormats": [ // OPTIONAL: Specify expected document formats when needed "expectedDocumentFormats": [ // OPTIONAL: Specify expected document formats when needed
{{ {{
"extension": ".txt", "extension": ".txt",
@ -275,7 +266,8 @@ REQUIRED JSON STRUCTURE:
"description": "Structured data output" "description": "Structured data output"
}} }}
], ],
"description": "What this action accomplishes (business outcome)" "description": "What this action accomplishes (business outcome)",
"userMessage": "User-friendly message explaining what this action will do in the user's language"
}} }}
] ]
}} }}
@ -284,25 +276,26 @@ FIELD REQUIREMENTS:
- "method": Must be from AVAILABLE METHODS - "method": Must be from AVAILABLE METHODS
- "action": Must be valid for the method - "action": Must be valid for the method
- "parameters": Method-specific, must include documentList as a list if required by the signature - "parameters": Method-specific, must include documentList as a list if required by the signature
- "resultLabel": Must follow the format above (e.g., "task1_action3_analysis_results") - "resultLabel": Must follow the format above (e.g., "round1_task1_action3_analysis_results")
- "expectedDocumentFormats": OPTIONAL - Only specify when you need to control output format - "expectedDocumentFormats": OPTIONAL - Only specify when you need to control output format
- Use when you need specific file types (e.g., CSV for data, JSON for structured output) - Use when you need specific file types (e.g., CSV for data, JSON for structured output)
- Omit when format is flexible (e.g., folder queries with mixed file types) - Omit when format is flexible (e.g., folder queries with mixed file types)
- Each format should specify: extension, mimeType, description - Each format should specify: extension, mimeType, description
- When using expectedDocumentFormats, ensure the aiPrompt explicitly requests pure data without markdown formatting - When using expectedDocumentFormats, ensure the aiPrompt explicitly requests pure data without markdown formatting
- "description": Clear summary of the business outcome - "description": Clear summary of the business outcome
- "userMessage": User-friendly message explaining what the action will accomplish in the user's language
EXAMPLES OF GOOD ACTIONS: EXAMPLES OF GOOD ACTIONS:
1. Document analysis with specific output format (use expectedDocumentFormats): 1. Document analysis with specific output format and user message:
{{ {{
"method": "document", "method": "document",
"action": "extract", "action": "extract",
"parameters": {{ "parameters": {{
"documentList": ["docItem:doc_57520394-6b6d-41c2-b641-bab3fc6d7f4b:candidate_1_profile.txt"], "documentList": ["docItem:doc_57520394-6b6d-41c2-b641-bab3fc6d7f4b:round1_task1_action1_candidate_profile.txt"],
"aiPrompt": "Extract and analyze the candidate's qualifications, experience, skills, and suitability for the product designer position. Identify key strengths, relevant experience, technical skills, and any areas of concern. Provide a comprehensive assessment that can be used for evaluation." "aiPrompt": "Extract and analyze the candidate's qualifications, experience, skills, and suitability for the product designer position. Identify key strengths, relevant experience, technical skills, and any areas of concern. Provide a comprehensive assessment that can be used for evaluation."
}}, }},
"resultLabel": "task1_action1_candidate_analysis", "resultLabel": "round1_task1_action2_candidate_analysis",
"expectedDocumentFormats": [ "expectedDocumentFormats": [
{{ {{
"extension": ".json", "extension": ".json",
@ -310,30 +303,32 @@ EXAMPLES OF GOOD ACTIONS:
"description": "Structured candidate analysis data" "description": "Structured candidate analysis data"
}} }}
], ],
"description": "Comprehensive analysis of candidate profile for evaluation" "description": "Comprehensive analysis of candidate profile for evaluation",
"userMessage": "Ich analysiere das Kandidatenprofil und extrahiere alle wichtigen Informationen für die Bewertung."
}} }}
2. Multi-document processing with flexible output (omit expectedDocumentFormats): 2. Multi-document processing with user message:
{{ {{
"method": "document", "method": "document",
"action": "extract", "action": "extract",
"parameters": {{ "parameters": {{
"documentList": ["task1_action1_candidate_analysis", "task1_action2_candidate_analysis", "task1_action3_candidate_analysis"], "documentList": ["round1_task1_action2_candidate_analysis", "round1_task1_action3_candidate_analysis", "round1_task1_action4_candidate_analysis"],
"aiPrompt": "Compare all three candidate profiles and create an evaluation matrix. Rate each candidate on technical skills, experience level, cultural fit, portfolio quality, and communication skills. Provide clear rankings and recommendations for the product designer position." "aiPrompt": "Compare all three candidate profiles and create an evaluation matrix. Rate each candidate on technical skills, experience level, cultural fit, portfolio quality, and communication skills. Provide clear rankings and recommendations for the product designer position."
}}, }},
"resultLabel": "task1_action4_evaluation_matrix", "resultLabel": "round1_task1_action5_evaluation_matrix",
"description": "Create comprehensive evaluation matrix comparing all candidates" "description": "Create comprehensive evaluation matrix comparing all candidates",
"userMessage": "Ich vergleiche alle drei Kandidatenprofile und erstelle eine umfassende Bewertungsmatrix mit klaren Empfehlungen."
}} }}
3. Data extraction with specific CSV format: 3. Data extraction with specific CSV format and user message:
{{ {{
"method": "document", "method": "document",
"action": "extract", "action": "extract",
"parameters": {{ "parameters": {{
"documentList": ["docItem:doc_abc:table_data.pdf"], "documentList": ["docItem:doc_abc:round1_task1_action1_table_data.pdf"],
"aiPrompt": "Extract all table data and convert to structured CSV format with proper headers and data types. IMPORTANT: Deliver pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows." "aiPrompt": "Extract all table data and convert to structured CSV format with proper headers and data types. IMPORTANT: Deliver pure CSV data without any markdown formatting, code blocks, or additional text. Output only the CSV content with proper headers and data rows."
}}, }},
"resultLabel": "task1_action2_structured_data", "resultLabel": "round1_task1_action2_structured_data",
"expectedDocumentFormats": [ "expectedDocumentFormats": [
{{ {{
"extension": ".csv", "extension": ".csv",
@ -341,19 +336,21 @@ EXAMPLES OF GOOD ACTIONS:
"description": "Structured table data in CSV format" "description": "Structured table data in CSV format"
}} }}
], ],
"description": "Extract and structure table data for analysis" "description": "Extract and structure table data for analysis",
"userMessage": "Ich extrahiere alle Tabellendaten und konvertiere sie in ein strukturiertes CSV-Format für die weitere Analyse."
}} }}
4. Comprehensive summary report from multiple documents (use generateReport): 4. Comprehensive summary report with user message:
{{ {{
"method": "document", "method": "document",
"action": "generateReport", "action": "generateReport",
"parameters": {{ "parameters": {{
"documentList": ["task1_action1_candidate_analysis", "task1_action2_candidate_analysis", "task1_action3_candidate_analysis"], "documentList": ["round1_task1_action2_candidate_analysis", "round1_task1_action3_candidate_analysis", "round1_task1_action4_candidate_analysis"],
"title": "Comprehensive Candidate Evaluation Report" "title": "Comprehensive Candidate Evaluation Report"
}}, }},
"resultLabel": "task1_action5_summary_report", "resultLabel": "round1_task1_action6_summary_report",
"description": "Generate a comprehensive, professional HTML report consolidating all candidate analyses and findings" "description": "Generate a comprehensive, professional HTML report consolidating all candidate analyses and findings",
"userMessage": "Ich erstelle einen umfassenden, professionellen Bericht, der alle Kandidatenanalysen und Erkenntnisse zusammenfasst."
}} }}
5. Correct chaining of actions within a task: 5. Correct chaining of actions within a task:
@ -363,21 +360,23 @@ EXAMPLES OF GOOD ACTIONS:
"method": "document", "method": "document",
"action": "extract", "action": "extract",
"parameters": {{ "parameters": {{
"documentList": ["docItem:doc_abc:file1.txt"], "documentList": ["docItem:doc_abc:round1_task1_action1_file1.txt"],
"aiPrompt": "Extract data from file1." "aiPrompt": "Extract data from file1."
}}, }},
"resultLabel": "task1_action1_extracted_data", "resultLabel": "round1_task1_action1_extracted_data",
"description": "Extract data from file1." "description": "Extract data from file1.",
"userMessage": "Ich extrahiere die Daten aus der Datei."
}}, }},
{{ {{
"method": "document", "method": "document",
"action": "generateReport", "action": "generateReport",
"parameters": {{ "parameters": {{
"documentList": ["task1_action1_extracted_data"], "documentList": ["round1_task1_action1_extracted_data"],
"title": "Report" "title": "Report"
}}, }},
"resultLabel": "task1_action2_report", "resultLabel": "round1_task1_action2_report",
"description": "Generate report from extracted data." "description": "Generate report from extracted data.",
"userMessage": "Ich erstelle einen Bericht basierend auf den extrahierten Daten."
}} }}
] ]
}} }}
@ -390,132 +389,136 @@ EXAMPLES OF GOOD ACTIONS:
"documentList": [], "documentList": [],
"title": "Workflow Status Report" "title": "Workflow Status Report"
}}, }},
"resultLabel": "task1_action1_status_report", "resultLabel": "round1_task1_action1_status_report",
"description": "Generate a status report informing the user that no documents are available for processing and requesting document upload or alternative input." "description": "Generate a status report informing the user that no documents are available for processing and requesting document upload or alternative input.",
"userMessage": "Ich erstelle einen Statusbericht, der Sie darüber informiert, dass keine Dokumente zur Verarbeitung verfügbar sind und um Dokumente oder alternative Eingaben bittet."
}} }}
IMPORTANT NOTES: IMPORTANT NOTES:
- Respond with ONLY the JSON object. Do not include any explanatory text. - Respond with ONLY the JSON object. Do not include any explanatory text.
- Before creating any document extraction action, verify that AVAILABLE DOCUMENTS contains actual document references. - Before creating any document extraction action, verify that AVAILABLE DOCUMENTS contains actual document references.
- If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", use example 6 above to create a status report action instead of document extraction.""" - If AVAILABLE DOCUMENTS shows "NO DOCUMENTS AVAILABLE", use example 6 above to create a status report action instead of document extraction.
logging.debug(f"[ACTION PLAN PROMPT] Available Documents Section:\n{available_documents_str}\nUser Connections Section:\n{available_connections_str}\nAvailable Methods (detailed):\n{available_methods_str}") - Always include a user-friendly userMessage for each action in the user's language ({user_language}).
- The examples above show German user messages as reference - adapt the language to match the USER LANGUAGE specified above."""
logging.debug(f"[ACTION PLAN PROMPT] Enhanced Document Context:\n{available_documents_str}\nUser Connections Section:\n{available_connections_str}\nAvailable Methods (detailed):\n{available_methods_str}")
return prompt return prompt
async def createResultReviewPrompt(review_context) -> str: def createResultReviewPrompt(context: ReviewContext, service) -> str:
"""Create prompt for result review""" """Create enhanced prompt for result review with user-friendly messages and document context"""
task_step = review_context.task_step # Build comprehensive action and result summary
step_result = review_context.step_result or {} action_summary = ""
step_result_serializable = { for i, action in enumerate(context.task_actions or []):
'task_step': { action_summary += f"\nACTION {i+1}: {action.execMethod}.{action.execAction}\n"
'id': task_step.id, action_summary += f" Status: {action.status}\n"
'objective': task_step.objective, if action.error:
'success_criteria': task_step.success_criteria or [] action_summary += f" Error: {action.error}\n"
}, if action.resultDocuments:
'action_results': [], action_summary += f" Documents: {len(action.resultDocuments)} document(s)\n"
'successful_actions': step_result.get('successful_actions', 0), for doc in action.resultDocuments:
'total_actions': step_result.get('total_actions', 0), # Use Pydantic model properties directly
'results_count': len(step_result.get('results', [])), fileName = doc.fileName
'errors_count': len(step_result.get('errors', [])) fileSize = doc.fileSize
} mimeType = doc.mimeType
for action_result in (review_context.action_results or []):
documents_metadata = [] action_summary += f" - {fileName} ({fileSize} bytes, {mimeType})\n"
# Get document information from step_result.documents
action_index = len(step_result_serializable['action_results'])
step_documents = step_result.get('documents', [])
logger.debug(f"Processing action {action_index}: step_documents count = {len(step_documents)}")
if action_index < len(step_documents):
# Use the document information from step_result
step_doc_info = step_documents[action_index]
documents_count = step_doc_info.get('documents_count', 0)
documents_list = step_doc_info.get('documents', [])
logger.debug(f"Action {action_index}: documents_count = {documents_count}, documents_list length = {len(documents_list)}")
# Process the actual documents
for doc in documents_list:
# These are ActionDocument objects from ActionResult.documents
documents_metadata.append({
'filename': doc.documentName or 'unknown',
'fileSize': len(str(doc.documentData or '')),
'mimeType': getattr(doc, 'mimeType', 'unknown')
})
else: else:
logger.warning(f"Action {action_index}: No step_documents info found - this should not happen with the new architecture") action_summary += f" Documents: None\n"
# No fallback - if step_result.documents is missing, we have a bug
# Build result summary
serializable_action_result = { result_summary = ""
'status': 'completed' if action_result.success else 'failed', if context.action_results:
'result_summary': action_result.resultLabel or 'Action completed successfully', for i, result in enumerate(context.action_results):
'error': action_result.error, result_summary += f"\nRESULT {i+1}:\n"
'resultLabel': action_result.resultLabel or '', result_summary += f" Success: {result.success}\n"
'documents_count': len(documents_metadata), if result.error:
'documents_metadata': documents_metadata, result_summary += f" Error: {result.error}\n"
'success_indicator': ( if result.documents:
'documents' if len(documents_metadata) > 0 else 'none' result_summary += f" Documents: {len(result.documents)} document(s)\n"
) for doc in result.documents:
} result_summary += f" - {doc.documentName} ({doc.mimeType})\n"
else:
logger.debug(f"Action {action_index}: Final documents_count = {len(documents_metadata)}") result_summary += f" Documents: None\n"
step_result_serializable['action_results'].append(serializable_action_result) # Get enhanced document context using the new method
step_result_json = json.dumps(step_result_serializable, indent=2, ensure_ascii=False) document_context = service.getEnhancedDocumentContext()
success_criteria_str = ', '.join(task_step.success_criteria or [])
return f"""You are a result review AI that evaluates task step completion with BASIC validation. # Get user language from service
user_language = service.user.language if service and service.user else 'en'
prompt = f"""
You are a result review AI that evaluates task execution results and provides feedback with user-friendly messages.
TASK STEP: {task_step.objective} TASK OBJECTIVE: {context.task_step.objective if context.task_step else 'No task objective specified'}
SUCCESS CRITERIA: {success_criteria_str} SUCCESS CRITERIA: {', '.join(context.task_step.success_criteria) if context.task_step and context.task_step.success_criteria else 'No success criteria specified'}
STEP RESULT: {step_result_json} EXECUTION SUMMARY:
{action_summary}
BASIC VALIDATION RULES: RESULT SUMMARY:
1. SUCCESS if: Action completed AND (documents were produced OR meaningful text output exists) {result_summary}
2. RETRY if: Action failed due to technical issues that can be fixed
3. FAILED if: Action completely failed with no recoverable output
VALIDATION PRINCIPLES: DOCUMENT CONTEXT:
- Be GENEROUS with success - if the action achieved its basic purpose, mark as success {document_context}
- Focus on FUNCTIONALITY, not perfection
- Document outputs are PRIMARY indicators of success
- Text outputs are SECONDARY indicators
- Only retry for CLEAR technical issues, not minor imperfections
- Don't be picky about formatting or minor details
- Check if ANY documents were produced (documents_count > 0)
- If documents were produced, consider it a SUCCESS
EXAMPLES OF SUCCESS: PREVIOUS RESULTS: {', '.join(context.previous_results) if context.previous_results else 'None'}
- Document extraction produced a file (even if imperfect)
- Text analysis provided meaningful insights
- Data processing completed with results
- Any action that produced documents (documents_count > 0)
EXAMPLES OF RETRY: REVIEW INSTRUCTIONS:
- Technical errors (API failures, timeouts) 1. Evaluate if the task step was completed successfully
- Missing required inputs 2. Check if all success criteria were met
- Clear implementation bugs 3. Assess the quality and completeness of outputs
4. Identify any missing or incomplete results
EXAMPLES OF FAILED: 5. Provide specific improvement suggestions
- Complete system failures 6. Generate user-friendly messages explaining the results
- No output whatsoever 7. Return a JSON object with the exact structure shown below
- Unrecoverable errors
- Actions with documents_count = 0 AND no meaningful text output
REQUIRED JSON STRUCTURE: REQUIRED JSON STRUCTURE:
{{ {{
"status": "success|retry|failed", "status": "success|partial|failed",
"reason": "Brief explanation", "reason": "Brief explanation of the status",
"improvements": ["specific technical fixes only"], "improvements": ["improvement1", "improvement2"],
"quality_score": 1-10, "quality_score": 8, // 1-10 scale
"met_criteria": ["basic functionality achieved"], "missing_outputs": ["missing_output1", "missing_output2"],
"unmet_criteria": [] "met_criteria": ["criteria1", "criteria2"],
"unmet_criteria": ["criteria3", "criteria4"],
"confidence": 0.85, // 0.0-1.0 scale
"userMessage": "User-friendly message explaining the review results in the user's language"
}} }}
VALIDATION LOGIC: FIELD REQUIREMENTS:
- If ANY action has documents_count > 0, mark as SUCCESS - "status": Overall task completion status
- If ALL actions have documents_count = 0 AND no meaningful text output, mark as FAILED - "success": All criteria met, high-quality outputs
- Only mark as RETRY for clear technical issues that can be fixed - "partial": Some criteria met, outputs need improvement
- Focus on actual document production and functionality, not specific output names - "failed": Most criteria unmet, significant issues
- "reason": Clear explanation of why this status was assigned
- "improvements": List of specific, actionable improvements
- "quality_score": 1-10 rating of output quality
- "missing_outputs": List of expected outputs that were not produced
- "met_criteria": List of success criteria that were fully met
- "unmet_criteria": List of success criteria that were not met
- "confidence": 0.0-1.0 confidence level in this assessment
- "userMessage": User-friendly explanation of results in the user's language
NOTE: Respond with ONLY the JSON object. Be GENEROUS with success ratings.""" EXAMPLES OF GOOD IMPROVEMENTS:
- "Increase AI prompt specificity for better data extraction"
- "Add validation steps to ensure output completeness"
- "Improve error handling for failed document processing"
- "Enhance document format specifications for better output quality"
EXAMPLES OF GOOD MISSING OUTPUTS:
- "Structured analysis report in JSON format"
- "Comparison matrix of candidate profiles"
- "Data validation summary with quality metrics"
- "Professional business communication document"
QUALITY SCORE GUIDELINES:
- 9-10: Exceptional quality, exceeds expectations
- 7-8: Good quality, meets all requirements
- 5-6: Acceptable quality, minor issues
- 3-4: Poor quality, significant issues
- 1-2: Very poor quality, major problems
USER LANGUAGE: {user_language} - All user messages must be generated in this language.
NOTE: Respond with ONLY the JSON object. Do not include any explanatory text."""
return prompt

View file

@ -49,14 +49,25 @@ class ChatManager:
logger.info(f"Task {idx+1}/{total_tasks}: {task_step.objective}") logger.info(f"Task {idx+1}/{total_tasks}: {task_step.objective}")
# Create task context for this task # Create proper context object for this task
task_context = TaskContext( task_context = TaskContext(
task_step=task_step, task_step=task_step,
workflow=workflow, workflow=workflow,
workflow_id=workflow.id, workflow_id=workflow.id,
available_documents=self.service.getAvailableDocuments(workflow), available_documents=self.service.getAvailableDocuments(workflow),
previous_results=previous_results available_connections=self.service.getConnectionReferenceList(),
previous_results=previous_results,
previous_handover=None,
improvements=[],
retry_count=0,
previous_action_results=[],
previous_review_result=None,
is_regeneration=False,
failure_patterns=[],
failed_actions=[],
successful_actions=[]
) )
# Execute task (this handles action generation, execution, and review internally) # Execute task (this handles action generation, execution, and review internally)
task_result = await self.handlingTasks.executeTask(task_step, workflow, task_context, current_task_index, total_tasks) task_result = await self.handlingTasks.executeTask(task_step, workflow, task_context, current_task_index, total_tasks)
# Handover # Handover
@ -97,5 +108,6 @@ class ChatManager:
completed_tasks=0, completed_tasks=0,
total_tasks=0, total_tasks=0,
execution_time=0.0, execution_time=0.0,
final_results_count=0 final_results_count=0,
error=str(e)
) )

View file

@ -111,21 +111,21 @@ class ServiceCenter:
except Exception as e: except Exception as e:
logger.error(f"Error discovering methods: {str(e)}") logger.error(f"Error discovering methods: {str(e)}")
def detectContentTypeFromData(self, fileData: bytes, filename: str) -> str: def detectContentTypeFromData(self, fileData: bytes, fileName: str) -> str:
""" """
Detect content type from file data and filename. Detect content type from file data and fileName.
This method makes the MIME type detection function accessible through the service center. This method makes the MIME type detection function accessible through the service center.
Args: Args:
fileData: Raw file data as bytes fileData: Raw file data as bytes
filename: Name of the file fileName: Name of the file
Returns: Returns:
str: Detected MIME type str: Detected MIME type
""" """
try: try:
# Check file extension first # Check file extension first
ext = os.path.splitext(filename)[1].lower() ext = os.path.splitext(fileName)[1].lower()
if ext: if ext:
# Map common extensions to MIME types # Map common extensions to MIME types
extToMime = { extToMime = {
@ -246,53 +246,31 @@ class ServiceCenter:
} }
return mime_types.get(extension.lower(), 'application/octet-stream') return mime_types.get(extension.lower(), 'application/octet-stream')
def getFileExtension(self, filename: str) -> str: def getFileExtension(self, fileName: str) -> str:
""" """
Extract file extension from filename. Extract file extension from fileName.
Args: Args:
filename: Name of the file fileName: Name of the file
Returns: Returns:
str: File extension (without dot) str: File extension (without dot)
""" """
if '.' in filename: if '.' in fileName:
return filename.split('.')[-1].lower() return fileName.split('.')[-1].lower()
return "txt" # Default to text return "txt" # Default to text
def getFileExtension(self, filename): def getFileExtension(self, fileName):
""" """
Extract file extension from filename (without dot, lowercased). Extract file extension from fileName (without dot, lowercased).
Returns empty string if no extension is found. Returns empty string if no extension is found.
""" """
if '.' in filename: if '.' in fileName:
return filename.rsplit('.', 1)[-1].lower() return fileName.rsplit('.', 1)[-1].lower()
return '' return ''
# ===== Functions ===== # ===== Functions =====
def extractContent(self, prompt: str, document: ChatDocument) -> ExtractedContent:
"""Extract content from document using prompt"""
return self.extractContentFromDocument(prompt, document)
def getMethodsCatalog(self) -> Dict[str, Any]:
"""Get catalog of available methods and their actions"""
catalog = {}
for methodName, method in self.methods.items():
catalog[methodName] = {
'description': method['description'],
'actions': {
actionName: {
'description': action['description'],
'parameters': action['parameters']
}
for actionName, action in method['actions'].items()
}
}
return catalog
def getMethodsList(self) -> List[str]: def getMethodsList(self) -> List[str]:
"""Get list of available methods with their signatures in the required format""" """Get list of available methods with their signatures in the required format"""
methodList = [] methodList = []
@ -306,35 +284,98 @@ class ServiceCenter:
return methodList return methodList
def generateDocumentLabel(self, document: ChatDocument, message: ChatMessage) -> str:
"""Generate new document label: round+task+action+filename.extension"""
try:
# Get workflow context from message
round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1
task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0
action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0
# Get file extension from document's fileName property
try:
file_extension = self.getFileExtension(document.fileName)
filename = document.fileName
except Exception as e:
# Try to diagnose and recover the issue
diagnosis = self.diagnoseDocumentAccess(document)
logger.error(f"Critical error: Cannot access document fileName for document {document.id}. Diagnosis: {diagnosis}")
# Attempt recovery
if self.recoverDocumentAccess(document):
try:
file_extension = self.getFileExtension(document.fileName)
filename = document.fileName
logger.info(f"Document access recovered for {document.id}")
except Exception as recovery_error:
logger.error(f"Recovery failed for document {document.id}: {str(recovery_error)}")
raise RuntimeError(f"Document {document.id} is permanently inaccessible after recovery attempt: {str(recovery_error)}")
else:
# Recovery failed - don't continue with invalid data
raise RuntimeError(f"Document {document.id} is inaccessible and recovery failed. Diagnosis: {diagnosis}")
# Construct label: round1_task2_action3_filename.ext
if file_extension:
label = f"round{round_num}_task{task_num}_action{action_num}_{filename}"
else:
label = f"round{round_num}_task{task_num}_action{action_num}_{filename}"
return label
except Exception as e:
logger.error(f"Critical error generating document label for document {document.id}: {str(e)}")
# Re-raise the error to prevent workflow from continuing with invalid data
raise
def getDocumentReferenceList(self) -> Dict[str, List[DocumentExchange]]: def getDocumentReferenceList(self) -> Dict[str, List[DocumentExchange]]:
"""Get list of document exchanges sorted by datetime, categorized by chat round""" """Get list of document exchanges with new labeling format, sorted by recency"""
# Collect all documents first and refresh their attributes
all_documents = []
for message in self.workflow.messages:
if message.documents:
all_documents.extend(message.documents)
# Refresh file attributes for all documents
if all_documents:
self.refreshDocumentFileAttributes(all_documents)
chat_exchanges = [] chat_exchanges = []
history_exchanges = [] history_exchanges = []
# Process messages in reverse order; "first" marks boundary: include up to and including # Process messages in reverse order; "first" marks boundary
# the first "first" message in the chat container, older messages in the history container
in_current_round = True in_current_round = True
for message in reversed(self.workflow.messages): for message in reversed(self.workflow.messages):
is_first = getattr(message, "status", None) == "first" is_first = message.status == "first" if hasattr(message, 'status') else False
# Build a DocumentExchange if message has documents # Build a DocumentExchange if message has documents
doc_exchange = None doc_exchange = None
if message.documents: if message.documents:
if message.actionId and message.documentsLabel: if message.actionId and message.documentsLabel:
doc_ref = self.getDocumentReferenceFromMessage(message) # Use new document label format
if doc_ref:
doc_exchange = DocumentExchange(
documentsLabel=message.documentsLabel,
documents=[doc_ref]
)
else:
doc_refs = [] doc_refs = []
for doc in message.documents: for doc in message.documents:
doc_ref = self.getDocumentReferenceFromChatDocument(doc) doc_ref = self.getDocumentReferenceFromChatDocument(doc, message)
doc_refs.append(doc_ref) doc_refs.append(doc_ref)
if doc_refs:
doc_exchange = DocumentExchange( doc_exchange = DocumentExchange(
documentsLabel=f"{message.id}:documents", documentsLabel=message.documentsLabel,
documents=doc_refs
)
else:
# Generate new labels for documents without explicit labels
doc_refs = []
for doc in message.documents:
doc_ref = self.getDocumentReferenceFromChatDocument(doc, message)
doc_refs.append(doc_ref)
if doc_refs:
# Create a label based on message context
round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1
task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0
action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0
context_label = f"round{round_num}_task{task_num}_action{action_num}_context"
doc_exchange = DocumentExchange(
documentsLabel=context_label,
documents=doc_refs documents=doc_refs
) )
@ -349,59 +390,150 @@ class ServiceCenter:
if in_current_round and is_first: if in_current_round and is_first:
in_current_round = False in_current_round = False
# Sort both lists by datetime in descending order # Sort by recency: most recent first, then current round, then earlier rounds
chat_exchanges.sort(key=lambda x: x.documentsLabel, reverse=True) # Sort chat exchanges by message sequence number (most recent first)
history_exchanges.sort(key=lambda x: x.documentsLabel, reverse=True) chat_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x), reverse=True)
# Sort history exchanges by message sequence number (most recent first)
history_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x), reverse=True)
return { return {
"chat": chat_exchanges, "chat": chat_exchanges,
"history": history_exchanges "history": history_exchanges
} }
def getDocumentReferenceFromChatDocument(self, document: ChatDocument) -> str: def _getMessageSequenceForExchange(self, exchange: DocumentExchange) -> int:
"""Get document reference from ChatDocument""" """Get message sequence number for sorting exchanges by recency"""
return f"docItem:{document.id}:{document.filename}" try:
# Extract message ID from the first document reference
def getDocumentReferenceFromMessage(self, message: ChatMessage) -> str: if exchange.documents and len(exchange.documents) > 0:
"""Get document reference from ChatMessage""" first_doc_ref = exchange.documents[0]
# If documentsLabel already contains the full reference format, return it if first_doc_ref.startswith("docItem:"):
if message.documentsLabel.startswith("docList:"): # docItem:<id>:<label> - extract ID
return message.documentsLabel parts = first_doc_ref.split(':')
if len(parts) >= 2:
# Otherwise construct the reference using the message ID and documents label doc_id = parts[1]
return f"docList:{message.id}:{message.documentsLabel}" # Find the message containing this document
for message in self.workflow.messages:
def resolveDocumentReference(self, intent_label: str) -> str: if message.documents:
"""Resolve an intent label (e.g., 'task1_extract_results') to a docList reference with message ID.""" for doc in message.documents:
for message in self.workflow.messages: if doc.id == doc_id:
if message.documentsLabel == intent_label and message.documents: return message.sequenceNr if hasattr(message, 'sequenceNr') else 0
return f"docList:{message.id}:{intent_label}" elif first_doc_ref.startswith("docList:"):
return None # docList:<message_id>:<label> - extract message ID
parts = first_doc_ref.split(':')
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]: if len(parts) >= 2:
"""Get ChatDocuments from a list of document references (intent or resolved).""" message_id = parts[1]
# Find the message by ID
for message in self.workflow.messages:
if str(message.id) == message_id:
return message.sequenceNr if hasattr(message, 'sequenceNr') else 0
return 0
except Exception as e:
logger.error(f"Error getting message sequence for exchange: {str(e)}")
return 0
def getEnhancedDocumentContext(self) -> str:
"""Get enhanced document context formatted for action planning prompts with technically clear labels"""
try:
document_list = self.getDocumentReferenceList()
# Build technical context string for AI action planning
context = "AVAILABLE DOCUMENTS:\n\n"
# Process chat exchanges (current round)
if document_list["chat"]:
context += "CURRENT ROUND DOCUMENTS:\n"
for exchange in document_list["chat"]:
context += f"- {exchange.documentsLabel} contains {', '.join(exchange.documents)}\n"
context += "\n"
# Process history exchanges (previous rounds)
if document_list["history"]:
context += "WORKFLOW HISTORY DOCUMENTS:\n"
for exchange in document_list["history"]:
context += f"- {exchange.documentsLabel} contains {', '.join(exchange.documents)}\n"
context += "\n"
if not document_list["chat"] and not document_list["history"]:
context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
return context
except Exception as e:
logger.error(f"Error generating enhanced document context: {str(e)}")
return "NO DOCUMENTS AVAILABLE - Error generating document context."
def _extractDocumentInfoFromReference(self, doc_ref: str) -> Dict[str, str]:
"""Extract document information from reference string"""
try:
if doc_ref.startswith("docItem:"):
# docItem:<id>:<label>
parts = doc_ref.split(':')
if len(parts) >= 3:
label = parts[2]
# Parse label: round1_task2_action3_filename.ext
if label.startswith("round"):
label_parts = label.split('_', 3)
if len(label_parts) >= 4:
round_num = label_parts[0].replace('round', '')
task_num = label_parts[1].replace('task', '')
action_num = label_parts[2].replace('action', '')
filename = label_parts[3]
context = f"Round {round_num}, Task {task_num}, Action {action_num}"
return {
'filename': filename,
'context': context
}
# Fallback for non-standard labels
return {
'filename': label,
'context': 'Unknown context'
}
elif doc_ref.startswith("docList:"):
# docList:<message_id>:<label>
parts = doc_ref.split(':')
if len(parts) >= 3:
label = parts[2]
if label.startswith("round"):
label_parts = label.split('_', 3)
if len(label_parts) >= 4:
round_num = label_parts[0].replace('round', '')
task_num = label_parts[1].replace('task', '')
action_num = label_parts[2].replace('action', '')
filename = label_parts[3]
context = f"Round {round_num}, Task {task_num}, Action {action_num}"
return {
'filename': filename,
'context': context
}
return {
'filename': label,
'context': 'Message context'
}
return None
except Exception as e:
logger.error(f"Error extracting document info from reference: {str(e)}")
return None
def getDocumentReferenceFromChatDocument(self, document: ChatDocument, message: ChatMessage) -> str:
"""Get document reference using new label format: round+task+action+filename.extension"""
try:
# Generate new document label
label = self.generateDocumentLabel(document, message)
return f"docItem:{document.id}:{label}"
except Exception as e:
logger.error(f"Critical error creating document reference for document {document.id}: {str(e)}")
# Re-raise the error to prevent workflow from continuing with invalid data
raise
def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
"""Get ChatDocuments from a list of document references using new label format."""
try: try:
# ADDED LOGGING: Print workflow id, message count, and all message labels and document counts
all_documents = [] all_documents = []
for doc_ref in documentList: for doc_ref in documentList:
# Parse reference format # Parse reference format
parts = doc_ref.split(':', 2) # Split into max 3 parts parts = doc_ref.split(':', 2) # Split into max 3 parts
# Handle simple label format (e.g., "task1_action2_webpage_content")
if len(parts) == 1:
# Simple label - try to find documents by label
label = parts[0]
found = False
for message in self.workflow.messages:
if message.documentsLabel == label and message.documents:
all_documents.extend(message.documents)
found = True
break
if not found:
logger.debug(f"No documents found for label: {label}")
continue
# Handle structured reference format
if len(parts) < 3: if len(parts) < 3:
logger.debug(f"Invalid document reference format: {doc_ref}") logger.debug(f"Invalid document reference format: {doc_ref}")
continue continue
@ -411,7 +543,7 @@ class ServiceCenter:
ref_label = parts[2] ref_label = parts[2]
if ref_type == "docItem": if ref_type == "docItem":
# Handle ChatDocument reference: docItem:<id>:<filename> # Handle ChatDocument reference: docItem:<id>:<new_label>
for message in self.workflow.messages: for message in self.workflow.messages:
if message.documents: if message.documents:
for doc in message.documents: for doc in message.documents:
@ -421,19 +553,49 @@ class ServiceCenter:
if any(doc.id == ref_id for doc in message.documents): if any(doc.id == ref_id for doc in message.documents):
break break
elif ref_type == "docList": elif ref_type == "docList":
# If ref_id is not a message ID (i.e., not all digits or not found), treat as intent label # Handle document list reference: docList:<message_id>:<new_label>
found = False try:
for message in self.workflow.messages: message_id = ref_id
if message.documentsLabel == ref_label and message.documents: label = ref_label
all_documents.extend(message.documents)
found = True # Find message by ID
break target_message = None
if not found: for message in self.workflow.messages:
# Try to resolve intent label to message ID if str(message.id) == message_id:
resolved_ref = self.resolveDocumentReference(ref_label) target_message = message
if resolved_ref: break
# Recursively resolve the resolved reference
all_documents.extend(self.getChatDocumentsFromDocumentList([resolved_ref])) if target_message and target_message.documents:
# Parse new label format: round1_task2_action3_filename.ext
if label.startswith("round"):
# New format - extract context and find matching documents
label_parts = label.split('_', 3)
if len(label_parts) >= 4:
round_num = int(label_parts[0].replace('round', ''))
task_num = int(label_parts[1].replace('task', ''))
action_num = int(label_parts[2].replace('action', ''))
filename = label_parts[3]
# Check if message context matches
msg_round = target_message.roundNumber if hasattr(target_message, 'roundNumber') else 1
msg_task = target_message.taskNumber if hasattr(target_message, 'taskNumber') else 0
msg_action = target_message.actionNumber if hasattr(target_message, 'actionNumber') else 0
if (msg_round == round_num and
msg_task == task_num and
msg_action == action_num):
# Add documents that match the filename
for doc in target_message.documents:
if doc.fileName == filename:
all_documents.append(doc)
else:
logger.debug(f"Label does not follow new format: {label}")
continue
except Exception as e:
logger.error(f"Error processing docList reference {doc_ref}: {str(e)}")
continue
return all_documents return all_documents
except Exception as e: except Exception as e:
logger.error(f"Error getting documents from document list: {str(e)}") logger.error(f"Error getting documents from document list: {str(e)}")
@ -561,39 +723,6 @@ Please provide a comprehensive summary of this conversation."""
logger.error(f"Error summarizing chat: {str(e)}") logger.error(f"Error summarizing chat: {str(e)}")
return f"Error summarizing chat: {str(e)}" return f"Error summarizing chat: {str(e)}"
async def summarizeMessage(self, message: ChatMessage) -> str:
"""
Summarize a single chat message
Args:
message: Chat message to summarize
Returns:
str: Summary of the message in user's language
"""
try:
# Create prompt for AI
prompt = f"""You are an AI assistant providing a summary of a chat message.
Please respond in '{self.user.language}' language.
Message:
{message.message}
Instructions:
1. Summarize the key points of this message
2. Be concise but informative
3. Use a professional but friendly tone
4. Focus on important information and any actions needed
Please provide a clear summary of this message."""
# Get summary using AI
return await self.callAiTextBasic(prompt)
except Exception as e:
logger.error(f"Error summarizing message: {str(e)}")
return f"Error summarizing message: {str(e)}"
async def callAiTextAdvanced(self, prompt: str, context: str = None) -> str: async def callAiTextAdvanced(self, prompt: str, context: str = None) -> str:
"""Advanced text processing using Anthropic, with fallback to OpenAI basic if advanced fails.""" """Advanced text processing using Anthropic, with fallback to OpenAI basic if advanced fails."""
max_retries = 3 max_retries = 3
@ -692,7 +821,7 @@ Please provide a clear summary of this message."""
if file_item: if file_item:
return { return {
"id": file_item.id, "id": file_item.id,
"filename": file_item.filename, "fileName": file_item.fileName,
"size": file_item.fileSize, "size": file_item.fileSize,
"mimeType": file_item.mimeType, "mimeType": file_item.mimeType,
"fileHash": file_item.fileHash, "fileHash": file_item.fileHash,
@ -718,14 +847,32 @@ Please provide a clear summary of this message."""
logger.error(f"No file data found for fileId: {document.fileId}") logger.error(f"No file data found for fileId: {document.fileId}")
raise ValueError("No file data found for document") raise ValueError("No file data found for document")
# Get filename and mime type from document # Get fileName and mime type from document properties
filename = document.filename if hasattr(document, 'filename') else "document" try:
mimeType = document.mimeType if hasattr(document, 'mimeType') else "application/octet-stream" fileName = document.fileName
mimeType = document.mimeType
except Exception as e:
# Try to diagnose and recover the issue
diagnosis = self.diagnoseDocumentAccess(document)
logger.error(f"Critical error: Cannot access document properties for document {document.id}. Diagnosis: {diagnosis}")
# Attempt recovery
if self.recoverDocumentAccess(document):
try:
fileName = document.fileName
mimeType = document.mimeType
logger.info(f"Document access recovered for {document.id} - proceeding with AI extraction")
except Exception as recovery_error:
logger.error(f"Recovery failed for document {document.id}: {str(recovery_error)}")
raise RuntimeError(f"Document {document.id} properties are permanently inaccessible after recovery attempt - cannot proceed with AI extraction: {str(recovery_error)}")
else:
# Recovery failed - don't continue with invalid data
raise RuntimeError(f"Document {document.id} properties are inaccessible and recovery failed. Diagnosis: {diagnosis}")
# Process with document processor directly # Process with document processor directly
extractedContent = await self.documentProcessor.processFileData( extractedContent = await self.documentProcessor.processFileData(
fileData=fileData, fileData=fileData,
filename=filename, fileName=fileName,
mimeType=mimeType, mimeType=mimeType,
base64Encoded=False, base64Encoded=False,
prompt=prompt, prompt=prompt,
@ -741,21 +888,6 @@ Please provide a clear summary of this message."""
logger.error(f"Error extracting from document: {str(e)}") logger.error(f"Error extracting from document: {str(e)}")
raise raise
async def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, documentId: str = None) -> ExtractedContent:
"""Extract content from file data directly using prompt"""
try:
return await self.documentProcessor.processFileData(
fileData=fileData,
filename=filename,
mimeType=mimeType,
base64Encoded=base64Encoded,
prompt=prompt,
documentId=documentId
)
except Exception as e:
logger.error(f"Error extracting from file data: {str(e)}")
raise
def createFile(self, fileName: str, mimeType: str, content: str, base64encoded: bool = False) -> str: def createFile(self, fileName: str, mimeType: str, content: str, base64encoded: bool = False) -> str:
"""Create new file and return its ID""" """Create new file and return its ID"""
# Convert content to bytes based on base64 flag # Convert content to bytes based on base64 flag
@ -778,7 +910,7 @@ Please provide a clear summary of this message."""
return file_item.id return file_item.id
def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, existing_file_id: str = None) -> ChatDocument: def createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, existing_file_id: str = None) -> ChatDocument:
"""Create document from file data object created by AI call""" """Create document AND file from file data object created by AI call"""
# Use existing file ID if provided, otherwise create new file # Use existing file ID if provided, otherwise create new file
if existing_file_id: if existing_file_id:
file_id = existing_file_id file_id = existing_file_id
@ -786,17 +918,22 @@ Please provide a clear summary of this message."""
# First create the file and get its ID # First create the file and get its ID
file_id = self.createFile(fileName, mimeType, content, base64encoded) file_id = self.createFile(fileName, mimeType, content, base64encoded)
# Get file info for metadata # Get file info to copy attributes
file_info = self.interfaceComponent.getFile(file_id) file_info = self.getFileInfo(file_id)
if not file_info:
logger.error(f"Could not get file info for fileId: {file_id}")
raise ValueError(f"File info not found for fileId: {file_id}")
# Create document with file reference (ChatDocument is just a reference, not a data container) # Create document with all file attributes copied
return ChatDocument( document = ChatDocument(
id=str(uuid.uuid4()), id=str(uuid.uuid4()),
fileId=file_id, fileId=file_id,
filename=fileName, fileName=file_info.get("fileName", fileName),
fileSize=file_info.fileSize, fileSize=file_info.get("size", 0),
mimeType=mimeType mimeType=file_info.get("mimeType", mimeType)
) )
return document
def updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None: def updateWorkflowStats(self, eventLabel: str = None, bytesSent: int = 0, bytesReceived: int = 0, tokenCount: int = 0) -> None:
""" """
@ -847,50 +984,22 @@ Please provide a clear summary of this message."""
logger.error(f"Error calculating object size: {str(e)}") logger.error(f"Error calculating object size: {str(e)}")
return 0 return 0
def calculateUserInputSize(self, userInput: Any) -> int:
"""
Calculate size of user input including file sizes.
Args:
userInput: User input object
Returns:
int: Total size in bytes
"""
try:
total_size = 0
# Calculate base user input size
if hasattr(userInput, 'prompt'):
total_size += self.calculateObjectSize(userInput.prompt)
# Add file sizes if present
if hasattr(userInput, 'listFileId') and userInput.listFileId:
for fileId in userInput.listFileId:
file_info = self.getFileInfo(fileId)
if file_info:
total_size += file_info.get('size', 0)
return total_size
except Exception as e:
logger.error(f"Error calculating user input size: {str(e)}")
return 0
def getAvailableDocuments(self, workflow) -> List[str]: def getAvailableDocuments(self, workflow) -> List[str]:
""" """
Get list of available document filenames from workflow. Get list of available document fileNames from workflow with new labeling format.
Args: Args:
workflow: ChatWorkflow object workflow: ChatWorkflow object
Returns: Returns:
List[str]: List of document filenames List[str]: List of document labels in new format
""" """
documents = [] documents = []
for message in workflow.messages: for message in workflow.messages:
for doc in message.documents: for doc in message.documents:
documents.append(doc.filename) # Generate new label format
label = self.generateDocumentLabel(doc, message)
documents.append(label)
return documents return documents
async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult: async def executeAction(self, methodName: str, actionName: str, parameters: Dict[str, Any]) -> ActionResult:
@ -913,24 +1022,23 @@ Please provide a clear summary of this message."""
raise raise
async def processFileIds(self, fileIds: List[str]) -> List[ChatDocument]: async def processFileIds(self, fileIds: List[str]) -> List[ChatDocument]:
"""Process file IDs and return ChatDocument objects""" """Process file IDs from existing files and return ChatDocument objects"""
documents = [] documents = []
for fileId in fileIds: for fileId in fileIds:
try: try:
# Get file info from service # Get file info from service
fileInfo = self.getFileInfo(fileId) fileInfo = self.getFileInfo(fileId)
if fileInfo: if fileInfo:
# Create document using interface # Create document directly with all file attributes
documentData = { document = ChatDocument(
"fileId": fileId, id=str(uuid.uuid4()),
"filename": fileInfo.get("filename", "unknown"), fileId=fileId,
"fileSize": fileInfo.get("size", 0), fileName=fileInfo.get("fileName", "unknown"),
"mimeType": fileInfo.get("mimeType", "application/octet-stream") fileSize=fileInfo.get("size", 0),
} mimeType=fileInfo.get("mimeType", "application/octet-stream")
document = self.interfaceChat.createChatDocument(documentData) )
if document: documents.append(document)
documents.append(document) logger.info(f"Processed file ID {fileId} -> {document.fileName}")
logger.info(f"Processed file ID {fileId} -> {document.filename}")
else: else:
logger.warning(f"No file info found for file ID {fileId}") logger.warning(f"No file info found for file ID {fileId}")
except Exception as e: except Exception as e:
@ -941,12 +1049,176 @@ Please provide a clear summary of this message."""
"""Set user language for the service center""" """Set user language for the service center"""
self.user.language = language self.user.language = language
def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
"""Set current workflow context for document generation and routing"""
try:
if round_number is not None:
self.workflow.currentRound = round_number
if task_number is not None:
self.workflow.currentTask = task_number
if action_number is not None:
self.workflow.currentAction = action_number
logger.debug(f"Updated workflow context: Round {self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 'N/A'}, Task {self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 'N/A'}, Action {self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 'N/A'}")
except Exception as e:
logger.error(f"Error setting workflow context: {str(e)}")
def getWorkflowContext(self) -> Dict[str, int]:
"""Get current workflow context for document generation"""
try:
return {
'currentRound': self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 1,
'currentTask': self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 0,
'currentAction': self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 0
}
except Exception as e:
logger.error(f"Error getting workflow context: {str(e)}")
return {'currentRound': 1, 'currentTask': 0, 'currentAction': 0}
def incrementWorkflowContext(self, context_type: str):
"""Increment workflow context counters"""
try:
if context_type == 'round':
current_round = self.workflow.currentRound if hasattr(self.workflow, 'currentRound') else 1
self.workflow.currentRound = current_round + 1
# Reset task and action when round changes
self.workflow.currentTask = 0
self.workflow.currentAction = 0
logger.info(f"Incremented workflow round to {self.workflow.currentRound}")
elif context_type == 'task':
current_task = self.workflow.currentTask if hasattr(self.workflow, 'currentTask') else 0
self.workflow.currentTask = current_task + 1
# Reset action when task changes
self.workflow.currentAction = 0
logger.info(f"Incremented workflow task to {self.workflow.currentTask}")
elif context_type == 'action':
current_action = self.workflow.currentAction if hasattr(self.workflow, 'currentAction') else 0
self.workflow.currentAction = current_action + 1
logger.info(f"Incremented workflow action to {self.workflow.currentAction}")
else:
logger.warning(f"Unknown context type for increment: {context_type}")
except Exception as e:
logger.error(f"Error incrementing workflow context: {str(e)}")
def getWorkflowStats(self) -> Dict[str, Any]:
"""Get comprehensive workflow statistics including current context"""
try:
workflow_context = self.getWorkflowContext()
return {
'currentRound': workflow_context['currentRound'],
'currentTask': workflow_context['currentTask'],
'currentAction': workflow_context['currentAction'],
'totalTasks': self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 0,
'totalActions': self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 0,
'workflowStatus': self.workflow.status if hasattr(self.workflow, 'status') else 'unknown',
'workflowId': self.workflow.id if hasattr(self.workflow, 'id') else 'unknown'
}
except Exception as e:
logger.error(f"Error getting workflow stats: {str(e)}")
return {
'currentRound': 1,
'currentTask': 0,
'currentAction': 0,
'totalTasks': 0,
'totalActions': 0,
'workflowStatus': 'unknown',
'workflowId': 'unknown'
}
def refreshDocumentFileAttributes(self, documents: List[ChatDocument]) -> None:
"""Update file attributes (fileName, fileSize, mimeType) for documents"""
for doc in documents:
try:
file_item = self.interfaceComponent.getFile(doc.fileId)
if file_item:
doc.fileName = file_item.fileName
doc.fileSize = file_item.fileSize
doc.mimeType = file_item.mimeType
else:
logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}")
except Exception as e:
logger.error(f"Error refreshing file attributes for document {doc.id}: {e}")
def setWorkflowTotals(self, total_tasks: int = None, total_actions: int = None):
"""Set total counts for workflow progress tracking"""
try:
if total_tasks is not None:
self.workflow.totalTasks = total_tasks
if total_actions is not None:
self.workflow.totalActions = total_actions
logger.debug(f"Updated workflow totals: Tasks {self.workflow.totalTasks if hasattr(self.workflow, 'totalTasks') else 'N/A'}, Actions {self.workflow.totalActions if hasattr(self.workflow, 'totalActions') else 'N/A'}")
except Exception as e:
logger.error(f"Error setting workflow totals: {str(e)}")
def diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
"""
Diagnose document access issues and provide recovery information.
This method helps identify why document properties are inaccessible.
"""
try:
diagnosis = {
'document_id': document.id,
'file_id': document.fileId,
'has_component_interface': document._componentInterface is not None,
'component_interface_type': type(document._componentInterface).__name__ if document._componentInterface else None,
'file_exists': False,
'file_info': None,
'error_details': None
}
# Check if component interface is set
if not document._componentInterface:
diagnosis['error_details'] = "Component interface not set - document cannot access file system"
return diagnosis
# Try to access the file directly
try:
file_info = self.interfaceComponent.getFile(document.fileId)
if file_info:
diagnosis['file_exists'] = True
diagnosis['file_info'] = {
'fileName': file_info.fileName if hasattr(file_info, 'fileName') else 'N/A',
'fileSize': file_info.fileSize if hasattr(file_info, 'fileSize') else 'N/A',
'mimeType': file_info.mimeType if hasattr(file_info, 'mimeType') else 'N/A'
}
else:
diagnosis['error_details'] = f"File with ID {document.fileId} not found in component interface"
except Exception as e:
diagnosis['error_details'] = f"Error accessing file {document.fileId}: {str(e)}"
return diagnosis
except Exception as e:
return {
'document_id': document.id if hasattr(document, 'id') else 'unknown',
'file_id': document.fileId if hasattr(document, 'fileId') else 'unknown',
'error_details': f"Error during diagnosis: {str(e)}"
}
def recoverDocumentAccess(self, document: ChatDocument) -> bool:
"""
Attempt to recover document access by re-setting the component interface.
Returns True if recovery was successful.
"""
try:
logger.info(f"Attempting to recover document access for document {document.id}")
# Re-set the component interface
document.setComponentInterface(self.interfaceComponent)
# Test if we can now access the fileName
try:
test_fileName = document.fileName
logger.info(f"Document access recovered for {document.id} -> {test_fileName}")
return True
except Exception as e:
logger.error(f"Document access recovery failed for {document.id}: {str(e)}")
return False
except Exception as e:
logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
return False
# Create singleton instance # Create singleton instance
serviceObject = None serviceObject = None
def initializeServiceCenter(currentUser: User, workflow: ChatWorkflow) -> ServiceCenter:
"""Initialize the service center singleton"""
global serviceObject
if serviceObject is None:
serviceObject = ServiceCenter(currentUser, workflow)
return serviceObject

View file

@ -127,9 +127,9 @@ class DatabaseConnector:
try: try:
if os.path.exists(tablePath): if os.path.exists(tablePath):
for filename in os.listdir(tablePath): for fileName in os.listdir(tablePath):
if filename.endswith('.json'): if fileName.endswith('.json'):
recordId = filename[:-5] # Remove .json extension recordId = fileName[:-5] # Remove .json extension
metadata["recordIds"].append(recordId) metadata["recordIds"].append(recordId)
metadata["recordIds"].sort() metadata["recordIds"].sort()
@ -392,12 +392,6 @@ class DatabaseConnector:
del self._tableMetadataCache[table] del self._tableMetadataCache[table]
logger.debug(f"Cleared metadata cache for table: {table}") logger.debug(f"Cleared metadata cache for table: {table}")
def clearAllCache(self) -> None:
"""Clears all cache to ensure completely fresh data."""
self._tablesCache.clear()
self._tableMetadataCache.clear()
logger.debug("Cleared all database cache")
# Public API # Public API
def getTables(self) -> List[str]: def getTables(self) -> List[str]:
@ -550,19 +544,10 @@ class DatabaseConnector:
return False return False
def hasInitialId(self, table: str) -> bool:
"""Checks if an initial ID is registered for a table."""
systemData = self._loadSystemTable()
return table in systemData
def getInitialId(self, table: str) -> Optional[str]: def getInitialId(self, table: str) -> Optional[str]:
"""Returns the initial ID for a table.""" """Returns the initial ID for a table."""
systemData = self._loadSystemTable() systemData = self._loadSystemTable()
initialId = systemData.get(table) initialId = systemData.get(table)
logger.debug(f"Initial ID for table '{table}': {initialId}") logger.debug(f"Initial ID for table '{table}': {initialId}")
return initialId return initialId
def getAllInitialIds(self) -> Dict[str, str]:
"""Returns all registered initial IDs."""
systemData = self._loadSystemTable()
return systemData.copy() # Return a copy to protect the original

View file

@ -5,7 +5,7 @@ Access control for the Application.
import logging import logging
from typing import Dict, Any, List, Optional from typing import Dict, Any, List, Optional
from datetime import datetime from datetime import datetime
from modules.interfaces.interfaceAppModel import UserPrivilege, Session, User from modules.interfaces.interfaceAppModel import UserPrivilege, User
from modules.shared.timezoneUtils import get_utc_now from modules.shared.timezoneUtils import get_utc_now
# Configure logger # Configure logger
@ -124,14 +124,7 @@ class AppAccess:
else: else:
record["_hideEdit"] = record.get("userId") != self.userId record["_hideEdit"] = record.get("userId") != self.userId
record["_hideDelete"] = record.get("userId") != self.userId record["_hideDelete"] = record.get("userId") != self.userId
elif table == "sessions":
# Only show sessions for the current user or if admin
if self.privilege in [UserPrivilege.SYSADMIN, UserPrivilege.ADMIN]:
record["_hideView"] = False
else:
record["_hideView"] = record.get("userId") != self.userId
record["_hideEdit"] = True # Sessions can't be edited
record["_hideDelete"] = not self.canModify("sessions", record_id)
elif table == "auth_events": elif table == "auth_events":
# Only show auth events for the current user or if admin # Only show auth events for the current user or if admin
if self.privilege in [UserPrivilege.SYSADMIN, UserPrivilege.ADMIN]: if self.privilege in [UserPrivilege.SYSADMIN, UserPrivilege.ADMIN]:
@ -204,60 +197,3 @@ class AppAccess:
# Regular users can create most entities # Regular users can create most entities
return True return True
def validateSession(self, sessionId: str) -> bool:
"""
Validates a user session.
Args:
sessionId: ID of the session to validate
Returns:
Boolean indicating if session is valid
"""
try:
# Get session
sessions: List[Dict[str, Any]] = self.db.getRecordset("sessions", recordFilter={"id": sessionId})
if not sessions:
return False
session = sessions[0]
# Check if session is expired
if get_utc_now() > session["expiresAt"]:
return False
# Check if user has permission to access this session
if session["userId"] != self.userId and self.privilege not in [UserPrivilege.SYSADMIN, UserPrivilege.ADMIN]:
return False
# Update last activity
self.db.recordModify("sessions", sessionId, {
"lastActivity": get_utc_now()
})
# Clear cache to ensure fresh data
self.db.clearTableCache("sessions")
return True
except Exception as e:
logger.error(f"Error validating session: {str(e)}")
return False
def canAccessAuthEvents(self, userId: str) -> bool:
"""
Checks if the current user can access auth events for a specific user.
Args:
userId: ID of the user whose auth events to check
Returns:
Boolean indicating permission
"""
# System admins and admins can access all auth events
if self.privilege in [UserPrivilege.SYSADMIN, UserPrivilege.ADMIN]:
return True
# Regular users can only access their own auth events
return userId == self.userId

View file

@ -175,55 +175,7 @@ register_model_labels(
} }
) )
class Session(BaseModel, ModelMixin):
"""Data model for user sessions"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique session ID")
userId: str = Field(description="ID of the user")
tokenId: str = Field(description="ID of the associated token")
lastActivity: float = Field(default_factory=get_utc_timestamp, description="Last activity timestamp (UTC timestamp in seconds)")
expiresAt: float = Field(description="When the session expires (UTC timestamp in seconds)")
ipAddress: Optional[str] = Field(None, description="IP address of the session")
userAgent: Optional[str] = Field(None, description="User agent of the session")
# Register labels for Session
register_model_labels(
"Session",
{"en": "Session", "fr": "Session"},
{
"id": {"en": "ID", "fr": "ID"},
"userId": {"en": "User ID", "fr": "ID utilisateur"},
"tokenId": {"en": "Token ID", "fr": "ID du token"},
"lastActivity": {"en": "Last Activity", "fr": "Dernière activité"},
"expiresAt": {"en": "Expires At", "fr": "Expire le"},
"ipAddress": {"en": "IP Address", "fr": "Adresse IP"},
"userAgent": {"en": "User Agent", "fr": "User Agent"}
}
)
class AuthEvent(BaseModel, ModelMixin):
"""Data model for authentication events"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique event ID")
userId: str = Field(description="ID of the user")
eventType: str = Field(description="Type of event (login, logout, etc.)")
details: Dict[str, Any] = Field(description="Event details")
timestamp: float = Field(default_factory=get_utc_timestamp, description="When the event occurred (UTC timestamp in seconds)")
ipAddress: Optional[str] = Field(None, description="IP address of the event")
userAgent: Optional[str] = Field(None, description="User agent of the event")
# Register labels for AuthEvent
register_model_labels(
"AuthEvent",
{"en": "Auth Event", "fr": "Événement d'authentification"},
{
"id": {"en": "ID", "fr": "ID"},
"userId": {"en": "User ID", "fr": "ID utilisateur"},
"eventType": {"en": "Event Type", "fr": "Type d'événement"},
"details": {"en": "Details", "fr": "Détails"},
"timestamp": {"en": "Timestamp", "fr": "Horodatage"},
"ipAddress": {"en": "IP Address", "fr": "Adresse IP"},
"userAgent": {"en": "User Agent", "fr": "User Agent"}
}
)
class User(BaseModel, ModelMixin): class User(BaseModel, ModelMixin):
"""Data model for a user""" """Data model for a user"""

View file

@ -18,10 +18,9 @@ from modules.shared.timezoneUtils import get_utc_now, get_utc_timestamp
from modules.interfaces.interfaceAppAccess import AppAccess from modules.interfaces.interfaceAppAccess import AppAccess
from modules.interfaces.interfaceAppModel import ( from modules.interfaces.interfaceAppModel import (
User, Mandate, UserInDB, UserConnection, User, Mandate, UserInDB, UserConnection,
Session, AuthEvent, AuthAuthority, UserPrivilege, AuthAuthority, UserPrivilege,
ConnectionStatus, Token, LocalToken, GoogleToken, MsftToken ConnectionStatus, Token
) )
from modules.shared.attributeUtils import ModelMixin
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -79,8 +78,6 @@ class AppObjects:
# Update database context # Update database context
self.db.updateContext(self.userId) self.db.updateContext(self.userId)
logger.debug(f"User context set: userId={self.userId}, mandateId={self.mandateId}")
def _initializeDatabase(self): def _initializeDatabase(self):
"""Initializes the database connection.""" """Initializes the database connection."""
@ -214,14 +211,6 @@ class AppObjects:
# User methods # User methods
def getAllUsers(self) -> List[User]:
"""Returns users based on user access level."""
allUsers = self.db.getRecordset("users")
filteredUsers = self._uam("users", allUsers)
# Convert to User models
return [User.from_dict(user) for user in filteredUsers]
def getUsersByMandate(self, mandateId: str) -> List[User]: def getUsersByMandate(self, mandateId: str) -> List[User]:
"""Returns users for a specific mandate if user has access.""" """Returns users for a specific mandate if user has access."""
# Get users for this mandate # Get users for this mandate
@ -277,21 +266,13 @@ class AppObjects:
def getUserConnections(self, userId: str) -> List[UserConnection]: def getUserConnections(self, userId: str) -> List[UserConnection]:
"""Returns all connections for a user.""" """Returns all connections for a user."""
try: try:
logger.debug(f"getUserConnections: Looking for connections for user ID: {userId}")
logger.debug(f"getUserConnections: Current database context userId: {self.db.userId}")
logger.debug(f"getUserConnections: Current interface userId: {self.userId}")
# Get connections for this user # Get connections for this user
connections = self.db.getRecordset("connections", recordFilter={"userId": userId}) connections = self.db.getRecordset("connections", recordFilter={"userId": userId})
logger.debug(f"getUserConnections: Raw database connections: {connections}")
logger.debug(f"getUserConnections: Database connections type: {type(connections)}")
logger.debug(f"getUserConnections: Database connections length: {len(connections) if connections else 0}")
# Convert to UserConnection objects # Convert to UserConnection objects
result = [] result = []
for conn_dict in connections: for conn_dict in connections:
try: try:
logger.debug(f"getUserConnections: Processing connection dict: {conn_dict}")
# Create UserConnection object # Create UserConnection object
connection = UserConnection( connection = UserConnection(
id=conn_dict["id"], id=conn_dict["id"],
@ -305,13 +286,10 @@ class AppObjects:
lastChecked=conn_dict.get("lastChecked"), lastChecked=conn_dict.get("lastChecked"),
expiresAt=conn_dict.get("expiresAt") expiresAt=conn_dict.get("expiresAt")
) )
logger.debug(f"getUserConnections: Created UserConnection object: {connection}")
result.append(connection) result.append(connection)
except Exception as e: except Exception as e:
logger.error(f"Error converting connection dict to object: {str(e)}") logger.error(f"Error converting connection dict to object: {str(e)}")
continue continue
logger.debug(f"getUserConnections: Final result: {result}")
return result return result
except Exception as e: except Exception as e:
@ -527,17 +505,12 @@ class AppObjects:
def _deleteUserReferencedData(self, userId: str) -> None: def _deleteUserReferencedData(self, userId: str) -> None:
"""Deletes all data associated with a user.""" """Deletes all data associated with a user."""
try: try:
# Delete user sessions
sessions = self.db.getRecordset("sessions", recordFilter={"userId": userId})
for session in sessions:
self.db.recordDelete("sessions", session["id"])
logger.debug(f"Deleted session {session['id']} for user {userId}")
# Delete user auth events # Delete user auth events
events = self.db.getRecordset("auth_events", recordFilter={"userId": userId}) events = self.db.getRecordset("auth_events", recordFilter={"userId": userId})
for event in events: for event in events:
self.db.recordDelete("auth_events", event["id"]) self.db.recordDelete("auth_events", event["id"])
logger.debug(f"Deleted auth event {event['id']} for user {userId}")
# Delete user tokens # Delete user tokens
tokens = self.db.getRecordset("tokens", recordFilter={"userId": userId}) tokens = self.db.getRecordset("tokens", recordFilter={"userId": userId})
@ -549,7 +522,6 @@ class AppObjects:
connections = self.db.getRecordset("connections", recordFilter={"userId": userId}) connections = self.db.getRecordset("connections", recordFilter={"userId": userId})
for conn in connections: for conn in connections:
self.db.recordDelete("connections", conn["id"]) self.db.recordDelete("connections", conn["id"])
logger.debug(f"Deleted connection {conn['id']} for user {userId}")
logger.info(f"All referenced data for user {userId} has been deleted") logger.info(f"All referenced data for user {userId} has been deleted")
@ -630,22 +602,18 @@ class AppObjects:
def updateMandate(self, mandateId: str, updateData: Dict[str, Any]) -> Mandate: def updateMandate(self, mandateId: str, updateData: Dict[str, Any]) -> Mandate:
"""Updates a mandate if user has access.""" """Updates a mandate if user has access."""
try: try:
logger.debug(f"Updating mandate {mandateId} with data: {updateData}")
# First check if user has permission to modify mandates # First check if user has permission to modify mandates
if not self._canModify("mandates", mandateId): if not self._canModify("mandates", mandateId):
raise PermissionError(f"No permission to update mandate {mandateId}") raise PermissionError(f"No permission to update mandate {mandateId}")
# Get mandate with access control # Get mandate with access control
mandate = self.getMandate(mandateId) mandate = self.getMandate(mandateId)
logger.debug(f"Retrieved mandate: {mandate}")
if not mandate: if not mandate:
raise ValueError(f"Mandate {mandateId} not found") raise ValueError(f"Mandate {mandateId} not found")
# Update mandate data using model # Update mandate data using model
updatedData = mandate.to_dict() updatedData = mandate.to_dict()
updatedData.update(updateData) updatedData.update(updateData)
logger.debug(f"Updated data: {updatedData}")
updatedMandate = Mandate.from_dict(updatedData) updatedMandate = Mandate.from_dict(updatedData)
# Update mandate record # Update mandate record
@ -862,34 +830,23 @@ class AppObjects:
def getConnectionToken(self, connectionId: str, auto_refresh: bool = True) -> Optional[Token]: def getConnectionToken(self, connectionId: str, auto_refresh: bool = True) -> Optional[Token]:
"""Get the connection token for a specific connectionId, optionally auto-refresh if expired""" """Get the connection token for a specific connectionId, optionally auto-refresh if expired"""
try: try:
logger.debug(f"Getting connection token for connectionId: {connectionId}")
# Validate connectionId # Validate connectionId
if not connectionId: if not connectionId:
raise ValueError("connectionId is required for getConnectionToken") raise ValueError("connectionId is required for getConnectionToken")
# Get token for this specific connection # Get token for this specific connection
logger.debug(f"Querying tokens table with connectionId: {connectionId}")
# Query for specific connection # Query for specific connection
tokens = self.db.getRecordset("tokens", recordFilter={ tokens = self.db.getRecordset("tokens", recordFilter={
"connectionId": connectionId "connectionId": connectionId
}) })
logger.debug(f"Raw tokens from database for connectionId {connectionId}: {tokens}")
logger.debug(f"Tokens count: {len(tokens) if tokens else 0}")
if not tokens: if not tokens:
logger.warning(f"No connection token found for connectionId: {connectionId}") logger.warning(f"No connection token found for connectionId: {connectionId}")
return None return None
# Sort by expiration date and get the latest (most recent expiration) # Sort by expiration date and get the latest (most recent expiration)
logger.debug(f"Sorting tokens by expiresAt, current tokens: {tokens}")
tokens.sort(key=lambda x: x.get("expiresAt", 0), reverse=True) tokens.sort(key=lambda x: x.get("expiresAt", 0), reverse=True)
latest_token = Token(**tokens[0]) latest_token = Token(**tokens[0])
logger.debug(f"Latest connection token: {latest_token}")
logger.debug(f"Token expiresAt: {latest_token.expiresAt}, type: {type(latest_token.expiresAt)}")
logger.debug(f"Current UTC timestamp: {get_utc_timestamp()}, type: {type(get_utc_timestamp())}")
# Check if token is expired # Check if token is expired
if latest_token.expiresAt and latest_token.expiresAt < get_utc_timestamp(): if latest_token.expiresAt and latest_token.expiresAt < get_utc_timestamp():
@ -913,7 +870,6 @@ class AppObjects:
logger.warning(f"Connection token for connectionId {connectionId} is expired (expiresAt: {latest_token.expiresAt})") logger.warning(f"Connection token for connectionId {connectionId} is expired (expiresAt: {latest_token.expiresAt})")
return None return None
logger.debug(f"Returning valid connection token: {latest_token}")
return latest_token return latest_token
except Exception as e: except Exception as e:
@ -968,12 +924,6 @@ class AppObjects:
logger.error(f"Error deleting connection token for connectionId {connectionId}: {str(e)}") logger.error(f"Error deleting connection token for connectionId {connectionId}: {str(e)}")
raise raise
# Backward compatibility method
def getTokenForConnection(self, connectionId: str, auto_refresh: bool = True) -> Optional[Token]:
"""Backward compatibility method - use getConnectionToken instead"""
logger.warning("getTokenForConnection is deprecated, use getConnectionToken instead")
return self.getConnectionToken(connectionId, auto_refresh)
def cleanupExpiredTokens(self) -> int: def cleanupExpiredTokens(self) -> int:
"""Clean up expired tokens for all connections, returns count of cleaned tokens""" """Clean up expired tokens for all connections, returns count of cleaned tokens"""
try: try:
@ -990,7 +940,6 @@ class AppObjects:
# Token is expired, delete it # Token is expired, delete it
self.db.recordDelete("tokens", token_data["id"]) self.db.recordDelete("tokens", token_data["id"])
cleaned_count += 1 cleaned_count += 1
logger.debug(f"Cleaned up expired token {token_data['id']} for connection {token_data.get('connectionId')}")
# Clear cache to ensure fresh data # Clear cache to ensure fresh data
if cleaned_count > 0: if cleaned_count > 0:

View file

@ -175,9 +175,21 @@ class ChatDocument(BaseModel, ModelMixin):
"""Data model for a chat document""" """Data model for a chat document"""
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key") id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
fileId: str = Field(description="Foreign key to file") fileId: str = Field(description="Foreign key to file")
filename: str = Field(description="Name of the file")
# Direct file attributes (copied from file object)
fileName: str = Field(description="Name of the file")
fileSize: int = Field(description="Size of the file") fileSize: int = Field(description="Size of the file")
mimeType: str = Field(description="MIME type of the file") mimeType: str = Field(description="MIME type of the file")
# Workflow context fields
roundNumber: Optional[int] = Field(None, description="Round number in workflow")
taskNumber: Optional[int] = Field(None, description="Task number within round")
actionNumber: Optional[int] = Field(None, description="Action number within task")
# Reference to action that created this document
actionId: Optional[str] = Field(None, description="ID of the action that created this document")
# Register labels for ChatDocument # Register labels for ChatDocument
register_model_labels( register_model_labels(
@ -186,9 +198,10 @@ register_model_labels(
{ {
"id": {"en": "ID", "fr": "ID"}, "id": {"en": "ID", "fr": "ID"},
"fileId": {"en": "File ID", "fr": "ID du fichier"}, "fileId": {"en": "File ID", "fr": "ID du fichier"},
"filename": {"en": "Filename", "fr": "Nom de fichier"}, "roundNumber": {"en": "Round Number", "fr": "Numéro de tour"},
"fileSize": {"en": "File Size", "fr": "Taille du fichier"}, "taskNumber": {"en": "Task Number", "fr": "Numéro de tâche"},
"mimeType": {"en": "MIME Type", "fr": "Type MIME"} "actionNumber": {"en": "Action Number", "fr": "Numéro d'action"},
"actionId": {"en": "Action ID", "fr": "ID de l'action"}
} }
) )
@ -234,6 +247,9 @@ class TaskAction(BaseModel, ModelMixin):
execResultLabel: Optional[str] = Field(None, description="Label for the set of result documents") execResultLabel: Optional[str] = Field(None, description="Label for the set of result documents")
# NEW: Optional document format specification # NEW: Optional document format specification
expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field(None, description="Expected document formats (optional)") expectedDocumentFormats: Optional[List[Dict[str, str]]] = Field(None, description="Expected document formats (optional)")
# User message in user's language
userMessage: Optional[str] = Field(None, description="User-friendly message in user's language")
status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status") status: TaskStatus = Field(default=TaskStatus.PENDING, description="Action status")
error: Optional[str] = Field(None, description="Error message if action failed") error: Optional[str] = Field(None, description="Error message if action failed")
@ -276,6 +292,8 @@ register_model_labels(
"execAction": {"en": "Action", "fr": "Action"}, "execAction": {"en": "Action", "fr": "Action"},
"execParameters": {"en": "Parameters", "fr": "Paramètres"}, "execParameters": {"en": "Parameters", "fr": "Paramètres"},
"execResultLabel": {"en": "Result Label", "fr": "Label du résultat"}, "execResultLabel": {"en": "Result Label", "fr": "Label du résultat"},
"expectedDocumentFormats": {"en": "Expected Document Formats", "fr": "Formats de documents attendus"},
"userMessage": {"en": "User Message", "fr": "Message utilisateur"},
"status": {"en": "Status", "fr": "Statut"}, "status": {"en": "Status", "fr": "Statut"},
"error": {"en": "Error", "fr": "Erreur"}, "error": {"en": "Error", "fr": "Erreur"},
"retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"}, "retryCount": {"en": "Retry Count", "fr": "Nombre de tentatives"},
@ -388,6 +406,13 @@ class ChatStat(BaseModel, ModelMixin):
bytesReceived: Optional[int] = Field(None, description="Number of bytes received") bytesReceived: Optional[int] = Field(None, description="Number of bytes received")
successRate: Optional[float] = Field(None, description="Success rate of operations") successRate: Optional[float] = Field(None, description="Success rate of operations")
errorCount: Optional[int] = Field(None, description="Number of errors encountered") errorCount: Optional[int] = Field(None, description="Number of errors encountered")
# Enhanced workflow progress tracking
currentRound: Optional[int] = Field(None, description="Current round number in workflow")
totalTasks: Optional[int] = Field(None, description="Total number of tasks in current round")
currentTask: Optional[int] = Field(None, description="Current task number within round")
totalActions: Optional[int] = Field(None, description="Total number of actions in current task")
currentAction: Optional[int] = Field(None, description="Current action number within task")
# Register labels for ChatStat # Register labels for ChatStat
register_model_labels( register_model_labels(
@ -400,7 +425,12 @@ register_model_labels(
"bytesSent": {"en": "Bytes Sent", "fr": "Octets envoyés"}, "bytesSent": {"en": "Bytes Sent", "fr": "Octets envoyés"},
"bytesReceived": {"en": "Bytes Received", "fr": "Octets reçus"}, "bytesReceived": {"en": "Bytes Received", "fr": "Octets reçus"},
"successRate": {"en": "Success Rate", "fr": "Taux de succès"}, "successRate": {"en": "Success Rate", "fr": "Taux de succès"},
"errorCount": {"en": "Error Count", "fr": "Nombre d'erreurs"} "errorCount": {"en": "Error Count", "fr": "Nombre d'erreurs"},
"currentRound": {"en": "Current Round", "fr": "Tour actuel"},
"totalTasks": {"en": "Total Tasks", "fr": "Tâches totales"},
"currentTask": {"en": "Current Task", "fr": "Tâche actuelle"},
"totalActions": {"en": "Total Actions", "fr": "Actions totales"},
"currentAction": {"en": "Current Action", "fr": "Action actuelle"}
} }
) )
@ -448,6 +478,11 @@ class ChatMessage(BaseModel, ModelMixin):
actionId: Optional[str] = Field(None, description="ID of the action that produced this message") actionId: Optional[str] = Field(None, description="ID of the action that produced this message")
actionMethod: Optional[str] = Field(None, description="Method of the action that produced this message") actionMethod: Optional[str] = Field(None, description="Method of the action that produced this message")
actionName: Optional[str] = Field(None, description="Name of the action that produced this message") actionName: Optional[str] = Field(None, description="Name of the action that produced this message")
# New workflow context fields:
roundNumber: Optional[int] = Field(None, description="Round number in workflow")
taskNumber: Optional[int] = Field(None, description="Task number within round")
actionNumber: Optional[int] = Field(None, description="Action number within task")
# Register labels for ChatMessage # Register labels for ChatMessage
register_model_labels( register_model_labels(
@ -468,7 +503,10 @@ register_model_labels(
"success": {"en": "Success", "fr": "Succès"}, "success": {"en": "Success", "fr": "Succès"},
"actionId": {"en": "Action ID", "fr": "ID de l'action"}, "actionId": {"en": "Action ID", "fr": "ID de l'action"},
"actionMethod": {"en": "Action Method", "fr": "Méthode de l'action"}, "actionMethod": {"en": "Action Method", "fr": "Méthode de l'action"},
"actionName": {"en": "Action Name", "fr": "Nom de l'action"} "actionName": {"en": "Action Name", "fr": "Nom de l'action"},
"roundNumber": {"en": "Round Number", "fr": "Numéro de tour"},
"taskNumber": {"en": "Task Number", "fr": "Numéro de tâche"},
"actionNumber": {"en": "Action Number", "fr": "Numéro d'action"}
} }
) )
@ -512,6 +550,34 @@ class ChatWorkflow(BaseModel, ModelMixin):
frontend_readonly=True, frontend_readonly=True,
frontend_required=False frontend_required=False
) )
currentTask: int = Field(
default=0,
description="Current task number",
frontend_type="integer",
frontend_readonly=True,
frontend_required=False
)
currentAction: int = Field(
default=0,
description="Current action number",
frontend_type="integer",
frontend_readonly=True,
frontend_required=False
)
totalTasks: int = Field(
default=0,
description="Total number of tasks in the workflow",
frontend_type="integer",
frontend_readonly=True,
frontend_required=False
)
totalActions: int = Field(
default=0,
description="Total number of actions in the workflow",
frontend_type="integer",
frontend_readonly=True,
frontend_required=False
)
lastActivity: float = Field( lastActivity: float = Field(
default_factory=get_utc_timestamp, default_factory=get_utc_timestamp,
description="Timestamp of last activity (UTC timestamp in seconds)", description="Timestamp of last activity (UTC timestamp in seconds)",
@ -565,6 +631,10 @@ register_model_labels(
"status": {"en": "Status", "fr": "Statut"}, "status": {"en": "Status", "fr": "Statut"},
"name": {"en": "Name", "fr": "Nom"}, "name": {"en": "Name", "fr": "Nom"},
"currentRound": {"en": "Current Round", "fr": "Tour actuel"}, "currentRound": {"en": "Current Round", "fr": "Tour actuel"},
"currentTask": {"en": "Current Task", "fr": "Tâche actuelle"},
"currentAction": {"en": "Current Action", "fr": "Action actuelle"},
"totalTasks": {"en": "Total Tasks", "fr": "Total des tâches"},
"totalActions": {"en": "Total Actions", "fr": "Total des actions"},
"lastActivity": {"en": "Last Activity", "fr": "Dernière activité"}, "lastActivity": {"en": "Last Activity", "fr": "Dernière activité"},
"startedAt": {"en": "Started At", "fr": "Démarré le"}, "startedAt": {"en": "Started At", "fr": "Démarré le"},
"logs": {"en": "Logs", "fr": "Journaux"}, "logs": {"en": "Logs", "fr": "Journaux"},
@ -582,6 +652,21 @@ class TaskStep(BaseModel, ModelMixin):
dependencies: Optional[list[str]] = [] dependencies: Optional[list[str]] = []
success_criteria: Optional[list[str]] = [] success_criteria: Optional[list[str]] = []
estimated_complexity: Optional[str] = None estimated_complexity: Optional[str] = None
userMessage: Optional[str] = Field(None, description="User-friendly message in user's language")
# Register labels for TaskStep
register_model_labels(
"TaskStep",
{"en": "Task Step", "fr": "Étape de tâche"},
{
"id": {"en": "ID", "fr": "ID"},
"objective": {"en": "Objective", "fr": "Objectif"},
"dependencies": {"en": "Dependencies", "fr": "Dépendances"},
"success_criteria": {"en": "Success Criteria", "fr": "Critères de succès"},
"estimated_complexity": {"en": "Estimated Complexity", "fr": "Complexité estimée"},
"userMessage": {"en": "User Message", "fr": "Message utilisateur"}
}
)
class TaskHandover(BaseModel, ModelMixin): class TaskHandover(BaseModel, ModelMixin):
"""Structured handover between workflow phases and tasks""" """Structured handover between workflow phases and tasks"""
@ -697,10 +782,40 @@ class ReviewResult(BaseModel, ModelMixin):
met_criteria: Optional[list[str]] = [] met_criteria: Optional[list[str]] = []
unmet_criteria: Optional[list[str]] = [] unmet_criteria: Optional[list[str]] = []
confidence: Optional[float] = 0.5 confidence: Optional[float] = 0.5
userMessage: Optional[str] = Field(None, description="User-friendly message in user's language")
# Register labels for ReviewResult
register_model_labels(
"ReviewResult",
{"en": "Review Result", "fr": "Résultat de l'évaluation"},
{
"status": {"en": "Status", "fr": "Statut"},
"reason": {"en": "Reason", "fr": "Raison"},
"improvements": {"en": "Improvements", "fr": "Améliorations"},
"quality_score": {"en": "Quality Score", "fr": "Score de qualité"},
"missing_outputs": {"en": "Missing Outputs", "fr": "Sorties manquantes"},
"met_criteria": {"en": "Met Criteria", "fr": "Critères respectés"},
"unmet_criteria": {"en": "Unmet Criteria", "fr": "Critères non respectés"},
"confidence": {"en": "Confidence", "fr": "Confiance"},
"userMessage": {"en": "User Message", "fr": "Message utilisateur"}
}
)
class TaskPlan(BaseModel, ModelMixin): class TaskPlan(BaseModel, ModelMixin):
overview: str overview: str
tasks: list[TaskStep] tasks: list[TaskStep]
userMessage: Optional[str] = Field(None, description="Overall user-friendly message for the task plan")
# Register labels for TaskPlan
register_model_labels(
"TaskPlan",
{"en": "Task Plan", "fr": "Plan de tâches"},
{
"overview": {"en": "Overview", "fr": "Aperçu"},
"tasks": {"en": "Tasks", "fr": "Tâches"},
"userMessage": {"en": "User Message", "fr": "Message utilisateur"}
}
)
class WorkflowResult(BaseModel, ModelMixin): class WorkflowResult(BaseModel, ModelMixin):
status: str status: str

View file

@ -140,14 +140,6 @@ class ChatObjects:
allWorkflows = self.db.getRecordset("workflows") allWorkflows = self.db.getRecordset("workflows")
return self._uam("workflows", allWorkflows) return self._uam("workflows", allWorkflows)
def getWorkflowsByUser(self, userId: str) -> List[Dict[str, Any]]:
"""Returns workflows for a specific user if current user has access."""
# Get workflows by userId
workflows = self.db.getRecordset("workflows", recordFilter={"_createdBy": userId})
# Apply access control
return self._uam("workflows", workflows)
def getWorkflow(self, workflowId: str) -> Optional[ChatWorkflow]: def getWorkflow(self, workflowId: str) -> Optional[ChatWorkflow]:
"""Returns a workflow by ID if user has access.""" """Returns a workflow by ID if user has access."""
workflows = self.db.getRecordset("workflows", recordFilter={"id": workflowId}) workflows = self.db.getRecordset("workflows", recordFilter={"id": workflowId})
@ -739,107 +731,9 @@ class ChatObjects:
# Return validated ChatLog instance # Return validated ChatLog instance
return ChatLog(**createdLog) return ChatLog(**createdLog)
# Workflow Management
def saveWorkflowState(self, workflow: ChatWorkflow, saveMessages: bool = True, saveLogs: bool = True) -> bool:
"""Saves workflow state if user has access."""
try:
workflowId = workflow.id
if not workflowId:
return False
# Check workflow access
existingWorkflow = self.getWorkflow(workflowId)
if not existingWorkflow and not self._canModify("workflows"):
logger.warning(f"No permission to create workflow {workflowId}")
return False
if existingWorkflow and not self._canModify("workflows", workflowId):
logger.warning(f"No permission to update workflow {workflowId}")
return False
# Extract only the database-relevant workflow fields
workflowDbData = {
"id": workflowId,
"mandateId": workflow.mandateId,
"name": workflow.name,
"status": workflow.status,
"startedAt": workflow.startedAt,
"lastActivity": workflow.lastActivity,
"dataStats": workflow.stats.dict() if workflow.stats else {}
}
# Check if workflow already exists
if existingWorkflow:
self.updateWorkflow(workflowId, workflowDbData)
else:
self.createWorkflow(workflowDbData)
# Save messages
if saveMessages and "messages" in workflow:
for message in workflow["messages"]:
messageId = message.get("id")
if not messageId:
continue
# Get existing message from database
existingMessages = self.getWorkflowMessages(workflowId)
existingMessage = next((m for m in existingMessages if m.get("id") == messageId), None)
if existingMessage:
# Check if updates are needed
hasChanges = False
for key in ["role", "agentName", "content", "status", "documents"]:
if key in message and message.get(key) != existingMessage.get(key):
hasChanges = True
break
if hasChanges:
# Extract only relevant data for the database
messageData = {
"role": message.get("role", existingMessage.get("role", "unknown")),
"content": message.get("content", existingMessage.get("content", "")),
"agentName": message.get("agentName", existingMessage.get("agentName", "")),
"status": message.get("status", existingMessage.get("status", "completed")),
"documents": message.get("documents", existingMessage.get("documents", []))
}
self.updateWorkflowMessage(messageId, messageData)
else:
# Message doesn't exist in database yet
logger.warning(f"Message {messageId} in workflow {workflowId} not found in database")
# Save logs
if saveLogs and "logs" in workflow:
# Get existing logs
existingLogs = {log["id"]: log for log in self.getWorkflowLogs(workflowId)}
for log in workflow["logs"]:
logId = log.get("id")
if not logId:
continue
# Extract only relevant data for the database
logData = {
"id": logId,
"workflowId": workflowId,
"message": log.get("message", ""),
"type": log.get("type", "info"),
"timestamp": log.get("timestamp", get_utc_timestamp()),
"agentName": log.get("agentName", "(undefined)"),
"status": log.get("status", "running"),
"progress": log.get("progress", 50)
}
# Create or update log
if logId in existingLogs:
self.db.recordModify("workflowLogs", logId, logData)
else:
self.db.recordCreate("workflowLogs", logData)
return True
except Exception as e:
logger.error(f"Error saving workflow state: {str(e)}")
return False
def loadWorkflowState(self, workflowId: str) -> Optional[ChatWorkflow]: def loadWorkflowState(self, workflowId: str) -> Optional[ChatWorkflow]:
"""Loads workflow state if user has access.""" """Loads workflow state if user has access."""
@ -1028,356 +922,6 @@ class ChatObjects:
logger.error(f"Error stopping workflow: {str(e)}") logger.error(f"Error stopping workflow: {str(e)}")
raise raise
# Task Management
def getTask(self, taskId: str) -> Optional[TaskItem]:
"""Returns a task by ID if user has access."""
tasks = self.db.getRecordset("tasks", recordFilter={"id": taskId})
if not tasks:
return None
filteredTasks = self._uam("tasks", tasks)
if not filteredTasks:
return None
task = filteredTasks[0]
try:
# Validate task data against TaskItem model
return TaskItem(
id=task["id"],
workflowId=task["workflowId"],
userInput=task.get("userInput", ""),
status=task.get("status", TaskStatus.PENDING),
error=task.get("error"),
startedAt=task.get("startedAt"),
finishedAt=task.get("finishedAt"),
actionList=[TaskAction(**action) for action in task.get("actionList", [])],
documentsOutput=task.get("documentsOutput", []),
retryCount=task.get("retryCount", 0),
retryMax=task.get("retryMax", 3),
rollbackOnFailure=task.get("rollbackOnFailure", True),
dependencies=task.get("dependencies", []),
feedback=task.get("feedback")
)
except Exception as e:
logger.error(f"Error validating task data: {str(e)}")
return None
def getWorkflowTasks(self, workflowId: str) -> List[TaskItem]:
"""Returns tasks for a workflow if user has access to the workflow."""
# Check workflow access first
workflow = self.getWorkflow(workflowId)
if not workflow:
return []
# Get tasks for this workflow
tasks = self.db.getRecordset("tasks", recordFilter={"workflowId": workflowId})
return [TaskItem(**task) for task in self._uam("tasks", tasks)]
def createTask(self, taskData: Dict[str, Any]) -> TaskItem:
"""Creates a new task if user has access to the workflow."""
try:
# Ensure ID is present
if "id" not in taskData or not taskData["id"]:
taskData["id"] = f"task_{uuid.uuid4()}"
# Check workflow access
workflowId = taskData.get("workflowId")
if not workflowId:
logger.error("No workflowId provided for createTask")
return None
workflow = self.getWorkflow(workflowId)
if not workflow:
logger.warning(f"No access to workflow {workflowId}")
return None
if not self._canModify("workflows", workflowId):
logger.warning(f"No permission to modify workflow {workflowId}")
return None
# Ensure required fields
if "status" not in taskData:
taskData["status"] = TaskStatus.PENDING
if "startedAt" not in taskData:
taskData["startedAt"] = get_utc_timestamp()
# Create task in database
createdTask = self.db.recordCreate("tasks", taskData)
# Clear cache to ensure fresh data
self._clearTableCache("tasks")
# Convert to TaskItem model
task = TaskItem(
id=createdTask["id"],
workflowId=createdTask["workflowId"],
userInput=createdTask.get("userInput", ""),
status=createdTask.get("status", TaskStatus.PENDING),
error=createdTask.get("error"),
startedAt=createdTask.get("startedAt"),
finishedAt=createdTask.get("finishedAt"),
actionList=[TaskAction(**action) for action in createdTask.get("actionList", [])],
documentsOutput=createdTask.get("documentsOutput", []),
retryCount=createdTask.get("retryCount", 0),
retryMax=createdTask.get("retryMax", 3),
rollbackOnFailure=createdTask.get("rollbackOnFailure", True),
dependencies=createdTask.get("dependencies", []),
feedback=createdTask.get("feedback")
)
# Update workflow's task list
workflowTasks = workflow.tasks if hasattr(workflow, 'tasks') else []
if task.id not in workflowTasks:
workflowTasks.append(task.id)
self.updateWorkflow(workflowId, {"tasks": workflowTasks})
return task
except Exception as e:
logger.error(f"Error creating task: {str(e)}")
return None
def updateTask(self, taskId: str, taskData: Dict[str, Any]) -> TaskItem:
"""Updates a task if user has access to the workflow."""
try:
# Get existing task
task = self.getTask(taskId)
if not task:
logger.warning(f"Task {taskId} not found")
return None
# Check workflow access
workflow = self.getWorkflow(task.workflowId)
if not workflow:
logger.warning(f"No access to workflow {task.workflowId}")
return None
if not self._canModify("workflows", task.workflowId):
logger.warning(f"No permission to modify workflow {task.workflowId}")
return None
# Update task in database
updatedTask = self.db.recordModify("tasks", taskId, taskData)
# Clear cache to ensure fresh data
self._clearTableCache("tasks")
# Convert to TaskItem model
return TaskItem(
id=updatedTask["id"],
workflowId=updatedTask["workflowId"],
userInput=updatedTask.get("userInput", task.userInput),
status=updatedTask.get("status", task.status),
error=updatedTask.get("error", task.error),
startedAt=updatedTask.get("startedAt", task.startedAt),
finishedAt=updatedTask.get("finishedAt", task.finishedAt),
actionList=[TaskAction(**action) for action in updatedTask.get("actionList", task.actionList)],
documentsOutput=updatedTask.get("documentsOutput", task.documentsOutput),
retryCount=updatedTask.get("retryCount", task.retryCount),
retryMax=updatedTask.get("retryMax", task.retryMax),
rollbackOnFailure=updatedTask.get("rollbackOnFailure", task.rollbackOnFailure),
dependencies=updatedTask.get("dependencies", task.dependencies),
feedback=updatedTask.get("feedback", task.feedback)
)
except Exception as e:
logger.error(f"Error updating task: {str(e)}")
return None
def deleteTask(self, taskId: str) -> bool:
"""Deletes a task if user has access to the workflow."""
try:
# Get existing task
task = self.getTask(taskId)
if not task:
logger.warning(f"Task {taskId} not found")
return False
# Check workflow access
workflow = self.getWorkflow(task.workflowId)
if not workflow:
logger.warning(f"No access to workflow {task.workflowId}")
return False
if not self._canModify("workflows", task.workflowId):
logger.warning(f"No permission to modify workflow {task.workflowId}")
return False
# Delete task
if self.db.recordDelete("tasks", taskId):
# Update workflow's task list
workflowTasks = workflow.tasks if hasattr(workflow, 'tasks') else []
if taskId in workflowTasks:
workflowTasks.remove(taskId)
self.updateWorkflow(task.workflowId, {"tasks": workflowTasks})
# Clear cache to ensure fresh data
self._clearTableCache("tasks")
return True
return False
except Exception as e:
logger.error(f"Error deleting task: {str(e)}")
return False
# Task Result Management
def createTaskResult(self, resultData: Dict[str, Any]) -> 'TaskResult':
"""Creates a new task result if user has access to the workflow."""
try:
# Ensure ID is present
if "id" not in resultData or not resultData["id"]:
resultData["id"] = f"result_{uuid.uuid4()}"
# Check workflow access if taskId is provided
taskId = resultData.get("taskId")
if taskId:
task = self.getTask(taskId)
if task:
workflow = self.getWorkflow(task.workflowId)
if not workflow:
logger.warning(f"No access to workflow {task.workflowId}")
return None
if not self._canModify("workflows", task.workflowId):
logger.warning(f"No permission to modify workflow {task.workflowId}")
return None
# Ensure required fields
if "status" not in resultData:
resultData["status"] = TaskStatus.PENDING
if "success" not in resultData:
resultData["success"] = False
# Create result in database
createdResult = self.db.recordCreate("taskResults", resultData)
# Convert to TaskResult model
return TaskResult(
taskId=createdResult.get("taskId", ""),
status=createdResult.get("status", TaskStatus.PENDING),
success=createdResult.get("success", False),
feedback=createdResult.get("feedback"),
error=createdResult.get("error")
)
except Exception as e:
logger.error(f"Error creating task result: {str(e)}")
return None
def createActionResult(self, resultData: Dict[str, Any]) -> 'ActionResult':
"""Creates a new action result."""
try:
# Ensure ID is present
if "id" not in resultData or not resultData["id"]:
resultData["id"] = f"action_result_{uuid.uuid4()}"
# Ensure required fields
if "success" not in resultData:
resultData["success"] = False
if "data" not in resultData:
resultData["data"] = {}
# Create result in database
createdResult = self.db.recordCreate("actionResults", resultData)
# Convert to ActionResult model
return ActionResult(
success=createdResult.get("success", False),
data=createdResult.get("data", {}),
metadata=createdResult.get("metadata", {}),
validation=createdResult.get("validation", {}),
error=createdResult.get("error")
)
except Exception as e:
logger.error(f"Error creating action result: {str(e)}")
return None
def createTaskAction(self, actionData: Dict[str, Any]) -> TaskAction:
"""Creates a new task action."""
try:
# Ensure ID is present
if "id" not in actionData or not actionData["id"]:
actionData["id"] = f"action_{uuid.uuid4()}"
# Ensure required fields
if "status" not in actionData:
actionData["status"] = TaskStatus.PENDING
if "execMethod" not in actionData:
logger.error("execMethod is required for task action")
return None
if "execAction" not in actionData:
logger.error("execAction is required for task action")
return None
if "execParameters" not in actionData:
actionData["execParameters"] = {}
# Create action in database
createdAction = self.db.recordCreate("taskActions", actionData)
# Convert to TaskAction model
return TaskAction(
id=createdAction["id"],
execMethod=createdAction["execMethod"],
execAction=createdAction["execAction"],
execParameters=createdAction.get("execParameters", {}),
execResultLabel=createdAction.get("execResultLabel"),
expectedDocumentFormats=createdAction.get("expectedDocumentFormats"),
status=createdAction.get("status", TaskStatus.PENDING),
error=createdAction.get("error"),
retryCount=createdAction.get("retryCount", 0),
retryMax=createdAction.get("retryMax", 3),
processingTime=createdAction.get("processingTime"),
timestamp=float(createdAction.get("timestamp", get_utc_timestamp())),
result=createdAction.get("result"),
resultDocuments=createdAction.get("resultDocuments", [])
)
except Exception as e:
logger.error(f"Error creating task action: {str(e)}")
return None
def createChatDocument(self, documentData: Dict[str, Any]) -> ChatDocument:
"""Creates a new ChatDocument with automatic ID generation."""
try:
# Ensure ID is present
if "id" not in documentData or not documentData["id"]:
documentData["id"] = f"doc_{uuid.uuid4()}"
# Ensure required fields
if "fileId" not in documentData:
logger.error("fileId is required for ChatDocument")
return None
if "filename" not in documentData:
documentData["filename"] = "unknown"
if "fileSize" not in documentData:
documentData["fileSize"] = 0
if "mimeType" not in documentData:
documentData["mimeType"] = "application/octet-stream"
# Create ChatDocument using the model
return ChatDocument(
id=documentData["id"],
fileId=documentData["fileId"],
filename=documentData["filename"],
fileSize=documentData["fileSize"],
mimeType=documentData["mimeType"]
)
except Exception as e:
logger.error(f"Error creating ChatDocument: {str(e)}")
return None
def getInterface(currentUser: Optional[User] = None) -> 'ChatObjects': def getInterface(currentUser: Optional[User] = None) -> 'ChatObjects':
""" """
Returns a ChatObjects instance for the current user. Returns a ChatObjects instance for the current user.

View file

@ -29,7 +29,7 @@ class FileItem(BaseModel, ModelMixin):
frontend_readonly=True, frontend_readonly=True,
frontend_required=False frontend_required=False
) )
filename: str = Field( fileName: str = Field(
description="Name of the file", description="Name of the file",
frontend_type="text", frontend_type="text",
frontend_readonly=False, frontend_readonly=False,
@ -72,7 +72,7 @@ register_model_labels(
{ {
"id": {"en": "ID", "fr": "ID"}, "id": {"en": "ID", "fr": "ID"},
"mandateId": {"en": "Mandate ID", "fr": "ID du mandat"}, "mandateId": {"en": "Mandate ID", "fr": "ID du mandat"},
"filename": {"en": "Filename", "fr": "Nom de fichier"}, "fileName": {"en": "fileName", "fr": "Nom de fichier"},
"mimeType": {"en": "MIME Type", "fr": "Type MIME"}, "mimeType": {"en": "MIME Type", "fr": "Type MIME"},
"fileHash": {"en": "File Hash", "fr": "Hash du fichier"}, "fileHash": {"en": "File Hash", "fr": "Hash du fichier"},
"fileSize": {"en": "File Size", "fr": "Taille du fichier"}, "fileSize": {"en": "File Size", "fr": "Taille du fichier"},
@ -84,7 +84,7 @@ class FilePreview(BaseModel, ModelMixin):
"""Data model for file preview""" """Data model for file preview"""
content: Union[str, bytes] = Field(description="File content (text or binary)") content: Union[str, bytes] = Field(description="File content (text or binary)")
mimeType: str = Field(description="MIME type of the file") mimeType: str = Field(description="MIME type of the file")
filename: str = Field(description="Original filename") fileName: str = Field(description="Original fileName")
isText: bool = Field(description="Whether the content is text (True) or binary (False)") isText: bool = Field(description="Whether the content is text (True) or binary (False)")
encoding: Optional[str] = Field(None, description="Text encoding if content is text") encoding: Optional[str] = Field(None, description="Text encoding if content is text")
size: int = Field(description="Size of the content in bytes") size: int = Field(description="Size of the content in bytes")
@ -105,7 +105,7 @@ register_model_labels(
{ {
"content": {"en": "Content", "fr": "Contenu"}, "content": {"en": "Content", "fr": "Contenu"},
"mimeType": {"en": "MIME Type", "fr": "Type MIME"}, "mimeType": {"en": "MIME Type", "fr": "Type MIME"},
"filename": {"en": "Filename", "fr": "Nom de fichier"}, "fileName": {"en": "fileName", "fr": "Nom de fichier"},
"isText": {"en": "Is Text", "fr": "Est du texte"}, "isText": {"en": "Is Text", "fr": "Est du texte"},
"encoding": {"en": "Encoding", "fr": "Encodage"}, "encoding": {"en": "Encoding", "fr": "Encodage"},
"size": {"en": "Size", "fr": "Taille"} "size": {"en": "Size", "fr": "Taille"}

View file

@ -332,33 +332,50 @@ class ComponentObjects:
# File Utilities # File Utilities
def calculateFileHash(self, fileContent: bytes) -> str: def checkForDuplicateFile(self, fileHash: str, fileName: str = None) -> Optional[FileItem]:
"""Calculates a SHA-256 hash for the file content""" """Checks if a file with the same hash already exists for the current user and mandate.
return hashlib.sha256(fileContent).hexdigest() If fileName is provided, also checks for exact name+hash match.
Only returns files the current user has access to."""
def checkForDuplicateFile(self, fileHash: str) -> Optional[FileItem]: # First get all files with the hash
"""Checks if a file with the same hash already exists for the current user and mandate.""" allFilesWithHash = self.db.getRecordset("files", recordFilter={
files = self.db.getRecordset("files", recordFilter={ "fileHash": fileHash
"fileHash": fileHash,
"mandateId": self.currentUser.mandateId,
"_createdBy": self.currentUser.id
}) })
if files:
return FileItem( # Filter by user access using UAM
id=files[0]["id"], accessibleFiles = self._uam("files", allFilesWithHash)
mandateId=files[0]["mandateId"],
filename=files[0]["filename"], if not accessibleFiles:
mimeType=files[0]["mimeType"], return None
workflowId=files[0]["workflowId"],
fileHash=files[0]["fileHash"], # If fileName is provided, check for exact name+hash match first
fileSize=files[0]["fileSize"] if fileName:
) for file in accessibleFiles:
return None if file["fileName"] == fileName:
return FileItem(
id=file["id"],
mandateId=file["mandateId"],
fileName=file["fileName"],
mimeType=file["mimeType"],
fileHash=file["fileHash"],
fileSize=file["fileSize"],
creationDate=file["creationDate"]
)
# Return first file with matching hash (for general duplicate detection)
return FileItem(
id=accessibleFiles[0]["id"],
mandateId=accessibleFiles[0]["mandateId"],
fileName=accessibleFiles[0]["fileName"],
mimeType=accessibleFiles[0]["mimeType"],
fileHash=accessibleFiles[0]["fileHash"],
fileSize=accessibleFiles[0]["fileSize"],
creationDate=accessibleFiles[0]["creationDate"]
)
def getMimeType(self, filename: str) -> str: def getMimeType(self, fileName: str) -> str:
"""Determines the MIME type based on the file extension.""" """Determines the MIME type based on the file extension."""
import os import os
ext = os.path.splitext(filename)[1].lower()[1:] ext = os.path.splitext(fileName)[1].lower()[1:]
extensionToMime = { extensionToMime = {
"pdf": "application/pdf", "pdf": "application/pdf",
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@ -440,7 +457,7 @@ class ComponentObjects:
fileItem = FileItem( fileItem = FileItem(
id=file.get("id"), id=file.get("id"),
mandateId=file.get("mandateId"), mandateId=file.get("mandateId"),
filename=file.get("filename"), fileName=file.get("fileName"),
mimeType=file.get("mimeType"), mimeType=file.get("mimeType"),
workflowId=file.get("workflowId"), workflowId=file.get("workflowId"),
fileHash=file.get("fileHash"), fileHash=file.get("fileHash"),
@ -474,7 +491,7 @@ class ComponentObjects:
return FileItem( return FileItem(
id=file.get("id"), id=file.get("id"),
mandateId=file.get("mandateId"), mandateId=file.get("mandateId"),
filename=file.get("filename"), fileName=file.get("fileName"),
mimeType=file.get("mimeType"), mimeType=file.get("mimeType"),
workflowId=file.get("workflowId"), workflowId=file.get("workflowId"),
fileHash=file.get("fileHash"), fileHash=file.get("fileHash"),
@ -485,33 +502,33 @@ class ComponentObjects:
logger.error(f"Error converting file record: {str(e)}") logger.error(f"Error converting file record: {str(e)}")
return None return None
def _isFilenameUnique(self, filename: str, excludeFileId: Optional[str] = None) -> bool: def _isfileNameUnique(self, fileName: str, excludeFileId: Optional[str] = None) -> bool:
"""Checks if a filename is unique for the current user.""" """Checks if a fileName is unique for the current user."""
# Get all files for current user # Get all files for current user
files = self.db.getRecordset("files", recordFilter={ files = self.db.getRecordset("files", recordFilter={
"_createdBy": self.currentUser.id "_createdBy": self.currentUser.id
}) })
# Check if filename exists (excluding the current file if updating) # Check if fileName exists (excluding the current file if updating)
for file in files: for file in files:
if file["filename"] == filename and (excludeFileId is None or file["id"] != excludeFileId): if file["fileName"] == fileName and (excludeFileId is None or file["id"] != excludeFileId):
return False return False
return True return True
def _generateUniqueFilename(self, filename: str, excludeFileId: Optional[str] = None) -> str: def _generateUniquefileName(self, fileName: str, excludeFileId: Optional[str] = None) -> str:
"""Generates a unique filename by adding a number if necessary.""" """Generates a unique fileName by adding a number if necessary."""
if self._isFilenameUnique(filename, excludeFileId): if self._isfileNameUnique(fileName, excludeFileId):
return filename return fileName
# Split filename into name and extension # Split fileName into name and extension
name, ext = os.path.splitext(filename) name, ext = os.path.splitext(fileName)
counter = 1 counter = 1
# Try filenames with increasing numbers until we find a unique one # Try fileNames with increasing numbers until we find a unique one
while True: while True:
newFilename = f"{name}_{counter}{ext}" newfileName = f"{name}_{counter}{ext}"
if self._isFilenameUnique(newFilename, excludeFileId): if self._isfileNameUnique(newfileName, excludeFileId):
return newFilename return newfileName
counter += 1 counter += 1
def createFile(self, name: str, mimeType: str, content: bytes) -> FileItem: def createFile(self, name: str, mimeType: str, content: bytes) -> FileItem:
@ -520,8 +537,8 @@ class ComponentObjects:
if not self._canModify("files"): if not self._canModify("files"):
raise PermissionError("No permission to create files") raise PermissionError("No permission to create files")
# Ensure filename is unique # Ensure fileName is unique
uniqueName = self._generateUniqueFilename(name) uniqueName = self._generateUniquefileName(name)
# Compute file size and hash # Compute file size and hash
fileSize = len(content) fileSize = len(content)
@ -530,7 +547,7 @@ class ComponentObjects:
# Create FileItem instance # Create FileItem instance
fileItem = FileItem( fileItem = FileItem(
mandateId=self.currentUser.mandateId, mandateId=self.currentUser.mandateId,
filename=uniqueName, fileName=uniqueName,
mimeType=mimeType, mimeType=mimeType,
fileSize=fileSize, fileSize=fileSize,
fileHash=fileHash fileHash=fileHash
@ -554,9 +571,9 @@ class ComponentObjects:
if not self._canModify("files", fileId): if not self._canModify("files", fileId):
raise PermissionError(f"No permission to update file {fileId}") raise PermissionError(f"No permission to update file {fileId}")
# If filename is being updated, ensure it's unique # If fileName is being updated, ensure it's unique
if "filename" in updateData: if "fileName" in updateData:
updateData["filename"] = self._generateUniqueFilename(updateData["filename"], fileId) updateData["fileName"] = self._generateUniquefileName(updateData["fileName"], fileId)
# Update file # Update file
success = self.db.recordModify("files", fileId, updateData) success = self.db.recordModify("files", fileId, updateData)
@ -764,7 +781,7 @@ class ComponentObjects:
return FilePreview( return FilePreview(
content=content, content=content,
mimeType=file.mimeType, mimeType=file.mimeType,
filename=file.filename, fileName=file.fileName,
isText=isText, isText=isText,
encoding=encoding, encoding=encoding,
size=file.fileSize size=file.fileSize
@ -774,97 +791,7 @@ class ComponentObjects:
logger.error(f"Error getting file content: {str(e)}") logger.error(f"Error getting file content: {str(e)}")
return None return None
def updateFileData(self, fileId: str, data: Union[bytes, str]) -> bool: def saveUploadedFile(self, fileContent: bytes, fileName: str) -> tuple[FileItem, str]:
"""Updates file data if user has access."""
# Check file access
file = self.getFile(fileId)
if not file:
logger.error(f"File with ID {fileId} not found when updating data")
return False
if not self._canModify("files", fileId):
logger.error(f"No permission to update file data for {fileId}")
return False
try:
import base64
# Determine if this is a text-based format
mimeType = file.mimeType
isTextFormat = self.isTextMimeType(mimeType)
base64Encoded = False
fileData = None
# Convert input data to the right format
if isinstance(data, bytes):
if isTextFormat:
try:
# Try to convert bytes to text
fileData = data.decode('utf-8')
base64Encoded = False
except UnicodeDecodeError:
# Fallback to base64 if text decoding fails
fileData = base64.b64encode(data).decode('utf-8')
base64Encoded = True
else:
# Binary format - use base64
fileData = base64.b64encode(data).decode('utf-8')
base64Encoded = True
elif isinstance(data, str):
if isTextFormat:
# Text format - store as text
fileData = data
base64Encoded = False
else:
# Check if it's already base64 encoded
try:
# Try to decode as base64 to validate
base64.b64decode(data)
fileData = data
base64Encoded = True
except:
# Not valid base64, encode the string
fileData = base64.b64encode(data.encode('utf-8')).decode('utf-8')
base64Encoded = True
else:
# Convert to string first
stringData = str(data)
if isTextFormat:
fileData = stringData
base64Encoded = False
else:
fileData = base64.b64encode(stringData.encode('utf-8')).decode('utf-8')
base64Encoded = True
# Check if a record already exists
fileDataEntries = self.db.getRecordset("fileData", recordFilter={"id": fileId})
dataUpdate = {
"data": fileData,
"base64Encoded": base64Encoded
}
if fileDataEntries:
# Update the existing record
self.db.recordModify("fileData", fileId, dataUpdate)
logger.debug(f"Updated file data for file ID {fileId} (base64Encoded: {base64Encoded})")
else:
# Create a new record
dataUpdate["id"] = fileId
self.db.recordCreate("fileData", dataUpdate)
logger.debug(f"Created new file data for file ID {fileId} (base64Encoded: {base64Encoded})")
# Clear cache to ensure fresh data
self._clearTableCache("fileData")
return True
except Exception as e:
logger.error(f"Error updating data for file {fileId}: {str(e)}")
return False
def saveUploadedFile(self, fileContent: bytes, fileName: str) -> FileItem:
"""Saves an uploaded file if user has permission.""" """Saves an uploaded file if user has permission."""
try: try:
# Check file creation permission # Check file creation permission
@ -877,6 +804,22 @@ class ComponentObjects:
logger.error(f"Invalid fileContent type: {type(fileContent)}") logger.error(f"Invalid fileContent type: {type(fileContent)}")
raise ValueError(f"fileContent must be bytes, got {type(fileContent)}") raise ValueError(f"fileContent must be bytes, got {type(fileContent)}")
# Compute file hash first to check for duplicates
import hashlib
fileHash = hashlib.sha256(fileContent).hexdigest()
# Check for exact name+hash match first (same name + same content)
existingFile = self.checkForDuplicateFile(fileHash, fileName)
if existingFile:
logger.info(f"Exact duplicate detected: {fileName} with same hash. Returning existing file reference.")
return existingFile, "exact_duplicate"
# Check for hash-only match (same content, different name)
existingFileWithSameHash = self.checkForDuplicateFile(fileHash)
if existingFileWithSameHash:
logger.info(f"Content duplicate detected: {fileName} has same content as {existingFileWithSameHash.fileName}")
# Continue with upload - filename will be made unique if needed
# Determine MIME type # Determine MIME type
mimeType = self.getMimeType(fileName) mimeType = self.getMimeType(fileName)
@ -893,40 +836,17 @@ class ComponentObjects:
self.createFileData(fileItem.id, fileContent) self.createFileData(fileItem.id, fileContent)
logger.debug(f"File upload process completed for: {fileName}") logger.debug(f"File upload process completed for: {fileName}")
return fileItem
# Check if filename was modified (indicating name conflict)
if fileItem.fileName != fileName:
return fileItem, "name_conflict"
else:
return fileItem, "new_file"
except Exception as e: except Exception as e:
logger.error(f"Error in saveUploadedFile for {fileName}: {str(e)}", exc_info=True) logger.error(f"Error in saveUploadedFile for {fileName}: {str(e)}", exc_info=True)
raise FileStorageError(f"Error saving file: {str(e)}") raise FileStorageError(f"Error saving file: {str(e)}")
def downloadFile(self, fileId: str) -> Optional[Dict[str, Any]]:
"""Returns a file for download if user has access."""
try:
# Check file access
file = self.getFile(fileId)
if not file:
raise FileNotFoundError(f"File with ID {fileId} not found")
# Get binary data
fileContent = self.getFileData(fileId)
if fileContent is None:
raise FileNotFoundError(f"Binary data for file with ID {fileId} not found")
return {
"id": fileId,
"name": file.filename,
"contentType": file.mimeType,
"size": file.fileSize,
"content": fileContent
}
except FileNotFoundError as e:
raise
except Exception as e:
logger.error(f"Error downloading file {fileId}: {str(e)}")
raise FileError(f"Error downloading file: {str(e)}")
def getInterface(currentUser: Optional[User] = None) -> 'ComponentObjects': def getInterface(currentUser: Optional[User] = None) -> 'ComponentObjects':
""" """

View file

@ -83,10 +83,10 @@ class MethodAi(MethodBase):
3. Include key information, data, and insights that the AI needs 3. Include key information, data, and insights that the AI needs
4. Provide clean, readable text without formatting artifacts 4. Provide clean, readable text without formatting artifacts
Document: {doc.filename} Document: {doc.fileName}
""" """
logger.debug(f"Extracting content from {doc.filename} with task-specific prompt: {extraction_prompt[:100]}...") logger.debug(f"Extracting content from {doc.fileName} with task-specific prompt: {extraction_prompt[:100]}...")
extracted_content = await self.service.extractContentFromDocument( extracted_content = await self.service.extractContentFromDocument(
prompt=extraction_prompt.strip(), prompt=extraction_prompt.strip(),
@ -111,16 +111,16 @@ class MethodAi(MethodBase):
# For detailed mode, include more context # For detailed mode, include more context
if processingMode == "detailed": if processingMode == "detailed":
context_parts.append(f"Document: {doc.filename}{metadata_info}\nRelevance to AI Task: This document contains content directly related to '{aiPrompt[:100]}...'\nContent:\n{content[:base_length]}...") context_parts.append(f"Document: {doc.fileName}{metadata_info}\nRelevance to AI Task: This document contains content directly related to '{aiPrompt[:100]}...'\nContent:\n{content[:base_length]}...")
else: else:
context_parts.append(f"Document: {doc.filename}{metadata_info}\nContent:\n{content[:base_length]}...") context_parts.append(f"Document: {doc.fileName}{metadata_info}\nContent:\n{content[:base_length]}...")
else: else:
context_parts.append(f"Document: {doc.filename} [No readable text content - binary file]") context_parts.append(f"Document: {doc.fileName} [No readable text content - binary file]")
else: else:
context_parts.append(f"Document: {doc.filename} [No readable text content - binary file]") context_parts.append(f"Document: {doc.fileName} [No readable text content - binary file]")
except Exception as extract_error: except Exception as extract_error:
context_parts.append(f"Document: {doc.filename} [Could not extract content - binary file]") context_parts.append(f"Document: {doc.fileName} [Could not extract content - binary file]")
if context_parts: if context_parts:
# Add a summary header to help the AI understand the context # Add a summary header to help the AI understand the context
@ -175,17 +175,17 @@ class MethodAi(MethodBase):
# Create result document # Create result document
timestamp = int(get_utc_timestamp()) timestamp = int(get_utc_timestamp())
filename = f"ai_{processingMode}_{timestamp}{output_extension}" fileName = f"ai_{processingMode}_{timestamp}{output_extension}"
# Return result in the standard ActionResult format # Return result in the standard ActionResult format
return ActionResult.success( return ActionResult.success(
documents=[{ documents=[{
"documentName": filename, "documentName": fileName,
"documentData": { "documentData": {
"result": result, "result": result,
"filename": filename, "fileName": fileName,
"processedDocuments": len(documentList) if documentList else 0 "processedDocuments": len(documentList) if documentList else 0
}, },
"mimeType": output_mime_type "mimeType": output_mime_type

View file

@ -74,12 +74,12 @@ class MethodDocument(MethodBase):
all_extracted_content.append(extracted_content) all_extracted_content.append(extracted_content)
if includeMetadata: if includeMetadata:
file_infos.append(file_info) file_infos.append(file_info)
logger.info(f"Successfully extracted content from {chatDocument.filename}") logger.info(f"Successfully extracted content from {chatDocument.fileName}")
else: else:
logger.warning(f"No content extracted from {chatDocument.filename}") logger.warning(f"No content extracted from {chatDocument.fileName}")
except Exception as e: except Exception as e:
logger.error(f"Error extracting content from {chatDocument.filename}: {str(e)}") logger.error(f"Error extracting content from {chatDocument.fileName}: {str(e)}")
continue continue
if not all_extracted_content: if not all_extracted_content:
@ -113,7 +113,7 @@ class MethodDocument(MethodBase):
# If fewer formats than documents, use the last format for remaining documents # If fewer formats than documents, use the last format for remaining documents
target_format = expectedDocumentFormats[-1] target_format = expectedDocumentFormats[-1]
# Determine output format and filename # Determine output format and fileName
if target_format: if target_format:
target_extension = target_format.get("extension", ".txt") target_extension = target_format.get("extension", ".txt")
target_mime_type = target_format.get("mimeType", "text/plain") target_mime_type = target_format.get("mimeType", "text/plain")
@ -137,24 +137,24 @@ class MethodDocument(MethodBase):
final_mime_type = "text/plain" final_mime_type = "text/plain"
final_extension = ".txt" final_extension = ".txt"
# Create output filename based on original filename and target format # Create output fileName based on original fileName and target format
original_filename = chatDocument.filename original_fileName = chatDocument.fileName
base_name = original_filename.rsplit('.', 1)[0] if '.' in original_filename else original_filename base_name = original_fileName.rsplit('.', 1)[0] if '.' in original_fileName else original_fileName
output_filename = f"{base_name}_extracted_{get_utc_timestamp()}{final_extension}" output_fileName = f"{base_name}_extracted_{get_utc_timestamp()}{final_extension}"
# Create result data for this document # Create result data for this document
result_data = { result_data = {
"documentCount": 1, "documentCount": 1,
"content": final_content, "content": final_content,
"originalFilename": original_filename, "originalfileName": original_fileName,
"fileInfos": [file_infos[i]] if includeMetadata and i < len(file_infos) else None, "fileInfos": [file_infos[i]] if includeMetadata and i < len(file_infos) else None,
"timestamp": get_utc_timestamp() "timestamp": get_utc_timestamp()
} }
logger.info(f"Created output document: {output_filename} with {len(final_content)} characters") logger.info(f"Created output document: {output_fileName} with {len(final_content)} characters")
output_documents.append({ output_documents.append({
"documentName": output_filename, "documentName": output_fileName,
"documentData": result_data, "documentData": result_data,
"mimeType": final_mime_type "mimeType": final_mime_type
}) })
@ -206,7 +206,7 @@ class MethodDocument(MethodBase):
# Update original documents list if not provided # Update original documents list if not provided
if not original_documents: if not original_documents:
original_documents = [doc.filename if hasattr(doc, 'filename') else str(doc.id) for doc in chat_documents] original_documents = [doc.fileName if hasattr(doc, 'fileName') else str(doc.id) for doc in chat_documents]
# Process each document individually with its own format conversion # Process each document individually with its own format conversion
output_documents = [] output_documents = []
@ -225,24 +225,24 @@ class MethodDocument(MethodBase):
try: try:
# Try to decode as UTF-8 to check if it's text # Try to decode as UTF-8 to check if it's text
content = file_data.decode('utf-8') content = file_data.decode('utf-8')
logger.info(f"Document {i+1} ({chat_document.filename}): Successfully decoded as UTF-8 text") logger.info(f"Document {i+1} ({chat_document.fileName}): Successfully decoded as UTF-8 text")
except UnicodeDecodeError: except UnicodeDecodeError:
logger.info(f"Document {i+1} ({chat_document.filename}): Binary data, not text - skipping") logger.info(f"Document {i+1} ({chat_document.fileName}): Binary data, not text - skipping")
continue continue
else: else:
# Already a string # Already a string
content = str(file_data) content = str(file_data)
logger.info(f"Document {i+1} ({chat_document.filename}): Already text data") logger.info(f"Document {i+1} ({chat_document.fileName}): Already text data")
else: else:
logger.warning(f"Document {i+1} ({chat_document.filename}): No file data found") logger.warning(f"Document {i+1} ({chat_document.fileName}): No file data found")
continue continue
if not content.strip(): if not content.strip():
logger.info(f"Document {i+1} ({chat_document.filename}): Empty text content, skipping") logger.info(f"Document {i+1} ({chat_document.fileName}): Empty text content, skipping")
continue continue
except Exception as e: except Exception as e:
logger.warning(f"Error reading document {i+1} ({chat_document.filename}): {str(e)}") logger.warning(f"Error reading document {i+1} ({chat_document.fileName}): {str(e)}")
continue continue
else: else:
logger.warning(f"Document {i+1} has no fileId, skipping") logger.warning(f"Document {i+1} has no fileId, skipping")
@ -271,13 +271,13 @@ class MethodDocument(MethodBase):
target_extension = target_format.get("extension", ".txt") target_extension = target_format.get("extension", ".txt")
target_mime_type = target_format.get("mimeType", "text/plain") target_mime_type = target_format.get("mimeType", "text/plain")
# Create output filename # Create output fileName
timestamp = int(get_utc_timestamp()) timestamp = int(get_utc_timestamp())
if i < len(original_documents): if i < len(original_documents):
base_name = original_documents[i].rsplit('.', 1)[0] if '.' in original_documents[i] else original_documents[i] base_name = original_documents[i].rsplit('.', 1)[0] if '.' in original_documents[i] else original_documents[i]
else: else:
base_name = f"document_{i+1}" base_name = f"document_{i+1}"
output_filename = f"{base_name}_generated_{timestamp}{target_extension}" output_fileName = f"{base_name}_generated_{timestamp}{target_extension}"
# Create result data # Create result data
result_data = { result_data = {
@ -288,10 +288,10 @@ class MethodDocument(MethodBase):
"timestamp": get_utc_timestamp() "timestamp": get_utc_timestamp()
} }
logger.info(f"Generated document: {output_filename} with {len(formatted_content)} characters") logger.info(f"Generated document: {output_fileName} with {len(formatted_content)} characters")
output_documents.append({ output_documents.append({
"documentName": output_filename, "documentName": output_fileName,
"documentData": result_data, "documentData": result_data,
"mimeType": target_mime_type "mimeType": target_mime_type
}) })
@ -527,9 +527,9 @@ class MethodDocument(MethodBase):
# Generate HTML report # Generate HTML report
html_content = await self._generateHtmlReport(chatDocuments, title, includeMetadata, prompt) html_content = await self._generateHtmlReport(chatDocuments, title, includeMetadata, prompt)
# Create output filename # Create output fileName
timestamp = int(get_utc_timestamp()) timestamp = int(get_utc_timestamp())
output_filename = f"report_{timestamp}.html" output_fileName = f"report_{timestamp}.html"
result_data = { result_data = {
"documentCount": len(chatDocuments), "documentCount": len(chatDocuments),
@ -538,11 +538,11 @@ class MethodDocument(MethodBase):
"timestamp": get_utc_timestamp() "timestamp": get_utc_timestamp()
} }
logger.info(f"Generated HTML report: {output_filename} with {len(html_content)} characters") logger.info(f"Generated HTML report: {output_fileName} with {len(html_content)} characters")
return ActionResult.success( return ActionResult.success(
documents=[{ documents=[{
"documentName": output_filename, "documentName": output_fileName,
"documentData": result_data, "documentData": result_data,
"mimeType": "text/html" "mimeType": "text/html"
}] }]
@ -591,7 +591,7 @@ class MethodDocument(MethodBase):
# Skip empty documents # Skip empty documents
if content and content.strip(): if content and content.strip():
validDocuments.append(doc) validDocuments.append(doc)
allContent.append(f"Document: {doc.filename}\n{content}\n") allContent.append(f"Document: {doc.fileName}\n{content}\n")
logger.info(f" Added document to valid documents list") logger.info(f" Added document to valid documents list")
else: else:
logger.info(f" Skipping document with no readable text content") logger.info(f" Skipping document with no readable text content")

View file

@ -31,7 +31,7 @@ ACTION CONTRACT DEFINITION:
"body": "Email body content", "body": "Email body content",
"cc": [], "cc": [],
"bcc": [], "bcc": [],
"attachments": ["docItem:uuid:filename.pdf"] "attachments": ["docItem:uuid:fileName.pdf"]
} }
Key Points: Key Points:
@ -58,7 +58,7 @@ ACTION CONTRACT DEFINITION:
"body": "Email body content", "body": "Email body content",
"cc": [], "cc": [],
"bcc": [], "bcc": [],
"attachments": ["docItem:uuid:filename.pdf"] "attachments": ["docItem:uuid:fileName.pdf"]
} }
Key Points: Key Points:
@ -517,7 +517,7 @@ class MethodOutlook(MethodBase):
"body": "Email body content", "body": "Email body content",
"cc": [], "cc": [],
"bcc": [], "bcc": [],
"attachments": ["docItem:uuid:filename.pdf"] "attachments": ["docItem:uuid:fileName.pdf"]
} }
Note: Email fields must be at root level, not wrapped in a nested structure. Note: Email fields must be at root level, not wrapped in a nested structure.
@ -725,18 +725,18 @@ class MethodOutlook(MethodBase):
# Create attachment object for Graph API # Create attachment object for Graph API
attachment = { attachment = {
"@odata.type": "#microsoft.graph.fileAttachment", "@odata.type": "#microsoft.graph.fileAttachment",
"name": doc.filename, "name": doc.fileName,
"contentType": doc.mimeType or "application/octet-stream", "contentType": doc.mimeType or "application/octet-stream",
"contentBytes": base64_content "contentBytes": base64_content
} }
message["attachments"].append(attachment) message["attachments"].append(attachment)
else: else:
logger.warning(f"⚠️ No content found for attachment: {doc.filename}") logger.warning(f"⚠️ No content found for attachment: {doc.fileName}")
except Exception as e: except Exception as e:
logger.error(f"❌ Error reading attachment file {doc.filename}: {str(e)}") logger.error(f"❌ Error reading attachment file {doc.fileName}: {str(e)}")
else: else:
logger.warning(f"⚠️ Attachment document has no fileId: {doc.filename}") logger.warning(f"⚠️ Attachment document has no fileId: {doc.fileName}")
else: else:
logger.warning(f"⚠️ No attachment documents found for reference: {attachment_ref}") logger.warning(f"⚠️ No attachment documents found for reference: {attachment_ref}")
@ -1418,7 +1418,7 @@ class MethodOutlook(MethodBase):
"body": "Email body content with document content integrated", "body": "Email body content with document content integrated",
"cc": [], "cc": [],
"bcc": [], "bcc": [],
"attachments": ["docItem:uuid:filename.pdf", "docItem:uuid:filename.html"] "attachments": ["docItem:uuid:fileName.pdf", "docItem:uuid:fileName.html"]
} }
Note: Email fields (to, subject, body, cc, bcc, attachments) are at root level Note: Email fields (to, subject, body, cc, bcc, attachments) are at root level
@ -1470,16 +1470,16 @@ class MethodOutlook(MethodBase):
if content_text.strip(): if content_text.strip():
# Truncate content for AI context (avoid token limits) # Truncate content for AI context (avoid token limits)
content_preview = content_text[:1000] + "..." if len(content_text) > 1000 else content_text content_preview = content_text[:1000] + "..." if len(content_text) > 1000 else content_text
document_content_summary += f"\nDocument: {doc.filename}\nContent Preview: {content_preview}\n" document_content_summary += f"\nDocument: {doc.fileName}\nContent Preview: {content_preview}\n"
# No content to extract # No content to extract
except Exception as extract_error: except Exception as extract_error:
# Content extraction failed (normal for binary files) # Content extraction failed (normal for binary files)
pass pass
else: else:
logger.warning(f"Document {doc.filename} has no fileId") logger.warning(f"Document {doc.fileName} has no fileId")
except Exception as e: except Exception as e:
logger.warning(f"Error processing document {doc.filename}: {str(e)}") logger.warning(f"Error processing document {doc.fileName}: {str(e)}")
else: else:
logger.warning("No documents found from documentList") logger.warning("No documents found from documentList")
except Exception as e: except Exception as e:
@ -1496,7 +1496,7 @@ class MethodOutlook(MethodBase):
if attachment_docs: if attachment_docs:
for doc in attachment_docs: for doc in attachment_docs:
# Add to attachments list # Add to attachments list
all_attachments.append(f"docItem:{doc.id}:{doc.filename}") all_attachments.append(f"docItem:{doc.id}:{doc.fileName}")
else: else:
logger.warning("No attachment documents found from attachmentDocumentList") logger.warning("No attachment documents found from attachmentDocumentList")

View file

@ -326,7 +326,7 @@ class MethodSharepoint(MethodBase):
for i, path in enumerate(documentPaths): for i, path in enumerate(documentPaths):
mock_doc = type('MockChatDocument', (), { mock_doc = type('MockChatDocument', (), {
'fileId': f'mock_file_id_{i}', 'fileId': f'mock_file_id_{i}',
'filename': path.split('/')[-1] if '/' in path else path 'fileName': path.split('/')[-1] if '/' in path else path
})() })()
chatDocuments.append(mock_doc) chatDocuments.append(mock_doc)
logger.info(f"Created {len(chatDocuments)} mock documents for testing") logger.info(f"Created {len(chatDocuments)} mock documents for testing")

View file

@ -84,7 +84,15 @@ async def upload_file(
) )
# Save file via LucyDOM interface in the database # Save file via LucyDOM interface in the database
fileItem = managementInterface.saveUploadedFile(fileContent, file.filename) fileItem, duplicateType = managementInterface.saveUploadedFile(fileContent, file.fileName)
# Determine response message based on duplicate type
if duplicateType == "exact_duplicate":
message = f"File '{file.fileName}' already exists with identical content. Reusing existing file."
elif duplicateType == "name_conflict":
message = f"File '{file.fileName}' already exists with different content. Uploaded as '{fileItem.fileName}'."
else: # new_file
message = "File uploaded successfully"
# If workflowId is provided, update the file information # If workflowId is provided, update the file information
if workflowId: if workflowId:
@ -95,10 +103,14 @@ async def upload_file(
# Convert FileItem to dictionary for JSON response # Convert FileItem to dictionary for JSON response
fileMeta = fileItem.to_dict() fileMeta = fileItem.to_dict()
# Successful response # Response with duplicate information
return JSONResponse({ return JSONResponse({
"message": "File uploaded successfully", "message": message,
"file": fileMeta "file": fileMeta,
"duplicateType": duplicateType,
"originalFileName": file.fileName,
"storedFileName": fileItem.fileName,
"isDuplicate": duplicateType != "new_file"
}) })
except interfaceComponentObjects.FileStorageError as e: except interfaceComponentObjects.FileStorageError as e:
@ -305,7 +317,7 @@ async def download_file(
content=fileContent, content=fileContent,
media_type=fileData.mimeType, media_type=fileData.mimeType,
headers={ headers={
"Content-Disposition": f"attachment; filename={fileData.filename}" "Content-Disposition": f"attachment; fileName={fileData.fileName}"
} }
) )
except HTTPException: except HTTPException:

View file

@ -212,8 +212,8 @@ async def logout(request: Request, currentUser: User = Depends(getCurrentUser))
# Get user interface with current user context # Get user interface with current user context
appInterface = getInterface(currentUser) appInterface = getInterface(currentUser)
# Revoke all sessions for the user # Note: JWT tokens are stateless, so no server-side cleanup needed
appInterface.revokeAllUserSessions(currentUser.id) # The client should discard the JWT token on logout
return JSONResponse({ return JSONResponse({
"message": "Successfully logged out" "message": "Successfully logged out"

View file

@ -15,7 +15,7 @@ from slowapi.util import get_remote_address
from modules.shared.configuration import APP_CONFIG from modules.shared.configuration import APP_CONFIG
from modules.shared.timezoneUtils import get_utc_now, get_utc_timestamp from modules.shared.timezoneUtils import get_utc_now, get_utc_timestamp
from modules.interfaces.interfaceAppObjects import getRootInterface from modules.interfaces.interfaceAppObjects import getRootInterface
from modules.interfaces.interfaceAppModel import Session, AuthEvent, UserPrivilege, User from modules.interfaces.interfaceAppModel import User
# Get Config Data # Get Config Data
SECRET_KEY = APP_CONFIG.get("APP_JWT_SECRET_SECRET") SECRET_KEY = APP_CONFIG.get("APP_JWT_SECRET_SECRET")
@ -55,24 +55,6 @@ def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> T
return encodedJwt, expire return encodedJwt, expire
def createRefreshToken(data: dict) -> Tuple[str, datetime]:
"""
Creates a JWT Refresh Token.
Args:
data: Data to encode (usually user ID or username)
Returns:
Tuple of (JWT Token as string, expiration datetime)
"""
toEncode = data.copy()
expire = get_utc_now() + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
toEncode.update({"exp": expire, "type": "refresh"})
encodedJwt = jwt.encode(toEncode, SECRET_KEY, algorithm=ALGORITHM)
return encodedJwt, expire
def _getUserBase(token: str = Depends(oauth2Scheme)) -> User: def _getUserBase(token: str = Depends(oauth2Scheme)) -> User:
""" """
Extracts and validates the current user from the JWT token. Extracts and validates the current user from the JWT token.
@ -149,100 +131,4 @@ def getCurrentUser(currentUser: User = Depends(_getUserBase)) -> User:
) )
return currentUser return currentUser
def createUserSession(userId: str, tokenId: str, request: Request) -> Session:
"""Create a new user session."""
appInterface = getRootInterface()
session = Session(
userId=userId,
tokenId=tokenId,
expiresAt=get_utc_now() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES),
ipAddress=request.client.host if request.client else None,
userAgent=request.headers.get("user-agent")
)
# Save session to database
appInterface.db.recordCreate("sessions", session.to_dict())
# Clear cache to ensure fresh data
appInterface.db.clearTableCache("sessions")
# Log auth event
event = AuthEvent(
userId=userId,
eventType="login",
details={"method": "local"},
ipAddress=request.client.host if request.client else None,
userAgent=request.headers.get("user-agent")
)
appInterface.db.recordCreate("auth_events", event.to_dict())
# Clear cache to ensure fresh data
appInterface.db.clearTableCache("auth_events")
return session
def logAuthEvent(userId: str, eventType: str, details: Dict[str, Any], request: Request) -> None:
"""Log an authentication event."""
appInterface = getRootInterface()
event = AuthEvent(
userId=userId,
eventType=eventType,
details=details,
ipAddress=request.client.host if request.client else None,
userAgent=request.headers.get("user-agent")
)
# Save event to database
appInterface.db.recordCreate("auth_events", event.to_dict())
# Clear cache to ensure fresh data
appInterface.db.clearTableCache("auth_events")
def validateSession(sessionId: str) -> bool:
"""Validate a user session."""
appInterface = getRootInterface()
session = appInterface.db.getRecordset("sessions", recordFilter={"id": sessionId})
if not session:
return False
session = session[0]
current_time = get_utc_timestamp()
if current_time > session["expiresAt"]:
return False
# Update last activity
appInterface.db.recordModify("sessions", sessionId, {
"lastActivity": get_utc_timestamp()
})
# Clear cache to ensure fresh data
appInterface.db.clearTableCache("sessions")
return True
def revokeSession(sessionId: str) -> None:
"""Revoke a user session."""
appInterface = getRootInterface()
# Delete session
appInterface.db.recordDelete("sessions", sessionId)
# Clear cache to ensure fresh data
appInterface.db.clearTableCache("sessions")
def revokeAllUserSessions(userId: str) -> None:
"""Revoke all sessions for a user."""
appInterface = getRootInterface()
# Get all sessions for user
sessions = appInterface.db.getRecordset("sessions", recordFilter={"userId": userId})
# Delete each session
for session in sessions:
appInterface.db.recordDelete("sessions", session["id"])
# Clear cache to ensure fresh data
appInterface.db.clearTableCache("sessions")

View file

@ -10,7 +10,7 @@ from typing import Optional, Dict, Any
from modules.interfaces.interfaceAppModel import Token, AuthAuthority from modules.interfaces.interfaceAppModel import Token, AuthAuthority
from modules.shared.configuration import APP_CONFIG from modules.shared.configuration import APP_CONFIG
from modules.shared.timezoneUtils import get_utc_timestamp, create_expiration_timestamp, is_expired_utc, get_expires_in_seconds from modules.shared.timezoneUtils import get_utc_timestamp, create_expiration_timestamp
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -142,28 +142,4 @@ class TokenManager:
except Exception as e: except Exception as e:
logger.error(f"Error refreshing token: {str(e)}") logger.error(f"Error refreshing token: {str(e)}")
return None return None
def is_token_expired(self, token: Token) -> bool:
"""Check if a token is expired"""
if not token.expiresAt:
return False
return is_expired_utc(token.expiresAt)
def get_token_status(self, token: Token) -> Dict[str, Any]:
"""Get comprehensive token status information"""
if not token.expiresAt:
return {
"status": "valid",
"expires_at": None,
"expires_in_seconds": None,
"expires_soon": False
}
expires_in = get_expires_in_seconds(token.expiresAt)
return {
"status": "expired" if expires_in and expires_in <= 0 else "valid",
"expires_at": token.expiresAt,
"expires_in_seconds": expires_in,
"expires_soon": expires_in and expires_in <= 3600 # 1 hour
}

View file

@ -274,10 +274,10 @@ def getModelClasses() -> Dict[str, Type[BaseModel]]:
interfaces_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'interfaces') interfaces_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'interfaces')
# Find all model files # Find all model files
for filename in os.listdir(interfaces_dir): for fileName in os.listdir(interfaces_dir):
if filename.endswith('Model.py'): if fileName.endswith('Model.py'):
# Convert filename to module name (e.g., gatewayModel.py -> gatewayModel) # Convert fileName to module name (e.g., gatewayModel.py -> gatewayModel)
module_name = filename[:-3] module_name = fileName[:-3]
# Import the module dynamically # Import the module dynamically
module = importlib.import_module(f'modules.interfaces.{module_name}') module = importlib.import_module(f'modules.interfaces.{module_name}')

View file

@ -39,118 +39,6 @@ def to_utc_timestamp(dt: datetime) -> float:
dt = dt.replace(tzinfo=timezone.utc) dt = dt.replace(tzinfo=timezone.utc)
return dt.timestamp() return dt.timestamp()
def from_utc_timestamp(timestamp: Union[int, float]) -> datetime:
"""
Convert UTC timestamp to datetime object.
Args:
timestamp (Union[int, float]): UTC timestamp in seconds
Returns:
datetime: Datetime object in UTC
"""
return datetime.fromtimestamp(timestamp, tz=timezone.utc)
def add_seconds_to_utc(seconds: int) -> datetime:
"""
Add seconds to current UTC time.
Args:
seconds (int): Seconds to add (can be negative)
Returns:
datetime: UTC time with seconds added
"""
return get_utc_now() + timedelta(seconds=seconds)
def add_seconds_to_utc_timestamp(seconds: int) -> float:
"""
Add seconds to current UTC timestamp.
Args:
seconds (int): Seconds to add (can be negative)
Returns:
float: UTC timestamp with seconds added
"""
return get_utc_timestamp() + seconds
def format_utc_for_display(dt: datetime, format_str: str = "%Y-%m-%d %H:%M:%S UTC") -> str:
"""
Format UTC datetime for display.
Args:
dt (datetime): UTC datetime to format
format_str (str): Format string (default: ISO-like with UTC indicator)
Returns:
str: Formatted datetime string
"""
if dt.tzinfo is None:
# If naive datetime, assume it's UTC
dt = dt.replace(tzinfo=timezone.utc)
return dt.strftime(format_str)
def is_expired_utc(expires_at: Union[datetime, float, str]) -> bool:
"""
Check if a UTC timestamp has expired.
Args:
expires_at (Union[datetime, float, str]): Expiration timestamp
Returns:
bool: True if expired, False otherwise
"""
if not expires_at:
return False
current_utc = get_utc_timestamp()
if isinstance(expires_at, datetime):
expires_timestamp = to_utc_timestamp(expires_at)
elif isinstance(expires_at, str):
try:
# Try to parse ISO string
dt = datetime.fromisoformat(expires_at.replace('Z', '+00:00'))
expires_timestamp = to_utc_timestamp(dt)
except ValueError:
# If parsing fails, try float conversion
expires_timestamp = float(expires_at)
else:
expires_timestamp = float(expires_at)
return current_utc > expires_timestamp
def get_expires_in_seconds(expires_at: Union[datetime, float, str]) -> Optional[int]:
"""
Get seconds until expiration (negative if expired).
Args:
expires_at (Union[datetime, float, str]): Expiration timestamp
Returns:
Optional[int]: Seconds until expiration, None if no expiration
"""
if not expires_at:
return None
current_utc = get_utc_timestamp()
if isinstance(expires_at, datetime):
expires_timestamp = to_utc_timestamp(expires_at)
elif isinstance(expires_at, str):
try:
# Try to parse ISO string
dt = datetime.fromisoformat(expires_at.replace('Z', '+00:00'))
expires_timestamp = to_utc_timestamp(dt)
except ValueError:
# If parsing fails, try float conversion
expires_timestamp = float(expires_at)
else:
expires_timestamp = float(expires_at)
return int(expires_timestamp - current_utc)
def create_expiration_timestamp(expires_in_seconds: int) -> float: def create_expiration_timestamp(expires_in_seconds: int) -> float:
""" """
Create a new expiration timestamp from seconds until expiration. Create a new expiration timestamp from seconds until expiration.

View file

@ -1,17 +1,71 @@
TODO TODO
- ui: Besseres Rendering der Tasks, Actions, Files (hierarchisch eingerückt) und der Log Entries ohne Rahmen
- ui: Beim Laden des Workflows die Logs und Messages synchron laden chronologisch # Document Handling and Handovers
- documents: Sprechende Filenamen für user, ein Label für die interne Nutzung we need to improve document handling in the chat workflow and prompting:
- Chat: Pro Action und Task eine Message an den User in der UserLanguage
- document list (result list) labeling including round#, task#, action#
- files: a user friendly name and a timestamp
- prompt: Section of document lists with documents
- document object: to have reference to message, to have label for chat including round#, task#, action#, to have reference to file
- message object: to have reference to round, task, action
- each action and task to deliver a user friendly message or error message to deliver to the user in his language
- check history --> tasks? # General inputs:
- Look at the interface*model.py files, here are the current models.
- This is an automated chat, no user input request etc. analyse the code, to understand the logic and the data flow.
- we have no database, we have data interfaces. data objects in pydantic model.
#Inputs for the models:
Enhanced ChatDocument model
id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Primary key")
label: str = Field(description="User-friendly display name")
fileId: str = Field(description="Foreign key to file")
(fileName, fileSize, mimeType, creationDate) --> to remove as attributes; those attributes to get from referenced file object to always have valid attributes (if e.g. file name has been changed by user) - to include a function in model or to adapt calling references
# New labeling system --> optional attributes
roundNumber: int = Field(description="Round number in workflow")
taskNumber: int = Field(description="Task number within round")
actionNumber: int = Field(description="Action number within task")
Enhanced File Management: we do not need a new model here.
Enhanced Prompt System: we don not need a pydantic model for this. it is about to enhance the action generation prompt to present the document lists with the label and the containing document references; both containing round, task, action number and the file name and extension
Enhanced Message Object: we do not need a new model here. The current model is good.
Enhanced Action and Task System: Here we do not need a new model. What we need is the possibility to return "userMessage" in the user language. userMessageType e.g. is not required, as we already have "success" flag. No messgae templates, as message is ai generated. For this the enhanced AI prompt for task plan generation and action plan generation needs a enhanced utput json model including user message instruction with user language indication
Can you analyse again with my inputs and present a revised plan.
# UI Rendering of the workflow
- Besseres Rendering der Tasks, Actions, Files (hierarchisch eingerückt) und der Log Entries (ohne Rahmen)
- Beim Laden des Workflows die Logs und Messages synchron laden, saubere chronologische Reihenfolge aller Objekte
- documents: den file namen anzeigen, nicht den internen dokumentnamen
- Chat: Die Benutzermeldungen pro Action und Task rendern
- Über dem Rendering Fenster der Logs und Messages ein professionelles Dashboard, welches die Animation zeigt und graphisch die Blöcke der Tasks und immer animiert die aktive Action eines Tasks. Die Ansicht dynamisch aktualisiert immer nach Abschluss einer Action
# System
- model reference diagram for all models. who uses who? --> to see the basic building blocks - model reference diagram for all models. who uses who? --> to see the basic building blocks
- neutralizer to activate AND put back placeholders to the returned data
- automatischer token refresh msft und google integrieren # Tests
- add a prompt --> then shall be visible in the workflow to select
- msft connection bei 2 verschiedene users
- chat 3x ausführen mit verschiedenen mailempfängern, test ob round greift
- check method outlook: alles - check method outlook: alles
- check method sharepoint: alles - check method sharepoint: alles
- check method webcrawler: alles - check method webcrawler: alles
@ -19,10 +73,10 @@ TODO
- check zusammenfassung von 10 dokumenten >10 MB - check zusammenfassung von 10 dokumenten >10 MB
- test case bewerbung - test case bewerbung
- neutralizer to activate AND put back placeholders to the returned data
********************
INIT INIT
@ -34,70 +88,11 @@ python app.py
********************
Test paths:
- Admin
- User
- MSFT-Google
- Alle Management items
- Workflow
- Connections on/off
- Mail 2 Connectors
Walkthroughs:
- register
- login local
- login msft
- management pages
- workflow
----------------------- OPEN ----------------------- OPEN
TODO: DOCUMENT handling in the workflow !!!!!!!!!!!!!!!!!!
- the workflow in "workflowManager.py" to run with pure documents and no content extraction from documents. To use revised Document model everywhere
- Prompts for tasklist to revise accordingly and to make clear, that the prompting for data extraction will be a job for each agent, not to be topic of the taskplan.
- task to the agent to include the prompt for his job to do and also no data extraction. also here to make clear, that data extraction will be done by the agent.
Implemented agents: they use following tools depending on their job:
- extract content using global function getContent(document list) --> define prompt for each document of to extract data based on agent's task. it creates an ai call to specify the prompt per document to extract relevant data in the required format using the global function documentProcessor() and stores extracted data in the content object to use
- produce message object (feedback prompt, document list)
function documentProcessor():
- return one content per document using ai call
- if there are many content objects in a document it uses one ai call per content to be specified, that if no relevant content is in the content object, an empty string is returned, otherwise the text in the required format
- the extraction of each content object has a limit for data size. this exceeded the content is handled by batches depending on the file type (csv --> set of records, text in tokens, etc.)
Other topics:
- agentCoder only to do algorythmic jobs, no data extraction
- agentDocumentation to extract data per chapter
NEXT:
! function callAI() to ask with userPrompt,systemPrompt optional), not with json
! in the taskplan to refer files always in context of user/mandate
! userinput to handle with object AgentQuery --> when received in frontend to enhance for full object
! user prompt to handle as directive AND file
! database to serialize list[] objects and replace by id-list -> already done in workflow?
! Prompts pro Agent mit prägnantem system prompt ergänzen. erfasse alle kontext-themen, regeln, anweisungen bei nichtwissen, format der antwort (generische stati)
agentDocumentation delivers a ".docx" file, but the content is a ".md" text markup file
Tools to transfer incl funds: Tools to transfer incl funds:
- Google SERPAPI (shelly) - Google SERPAPI (shelly)
@ -107,9 +102,10 @@ Tools to transfer incl funds:
- Github Pro - Github Pro
----------------------- DONE
----------------------- DONE
FRONTEND FRONTEND
- the application initiation gets userdata with the token over apiCall.js:/api/local/me --> object: - the application initiation gets userdata with the token over apiCall.js:/api/local/me --> object:
@ -677,8 +673,8 @@ the content provided to an agent will now be a document consisting of the conten
Like this we have not anymore the problem, that file content is not found by the agents. Like this we have not anymore the problem, that file content is not found by the agents.
For code implementation I see a big opportunity to massively reduce code. To build basic methods to be used everywhere: For code implementation I see a big opportunity to massively reduce code. To build basic methods to be used everywhere:
1. function "document_store_upload(message_id,filename,filepath...) --> function to store an uploaded or drag&drop document from the user and return the document object. This function does the steps for a) respectively b) like described above and identified the filetype 1. function "document_store_upload(message_id,fileName,filepath...) --> function to store an uploaded or drag&drop document from the user and return the document object. This function does the steps for a) respectively b) like described above and identified the filetype
2. function "document_store_agent(message_id,filename,document_content,document_type...) --> function to store the produced document from the agent and return the document object. This function does the steps like described in section a) above 2. function "document_store_agent(message_id,fileName,document_content,document_type...) --> function to store the produced document from the agent and return the document object. This function does the steps like described in section a) above
3. function "document_get_from_message() 3. function "document_get_from_message()
Based on these 3 functions all operations can be done much more comfortable in the workflow, but also in connection with the ui (download file, copy file, preview file), because all references to the files are always ensured. Based on these 3 functions all operations can be done much more comfortable in the workflow, but also in connection with the ui (download file, copy file, preview file), because all references to the files are always ensured.

View file

@ -396,7 +396,7 @@ class DocumentExtractionTester:
# Process the file with or without AI based on configuration # Process the file with or without AI based on configuration
extracted_content = await self.extractor.processFileData( extracted_content = await self.extractor.processFileData(
fileData=file_data, fileData=file_data,
filename=file_path.name, fileName=file_path.name,
mimeType=mime_type, mimeType=mime_type,
base64Encoded=False, base64Encoded=False,
prompt=self.prompt, prompt=self.prompt,
@ -423,7 +423,7 @@ class DocumentExtractionTester:
# Track processing result # Track processing result
result = { result = {
'filename': file_path.name, 'fileName': file_path.name,
'status': 'OK', 'status': 'OK',
'content_items': 0, 'content_items': 0,
'output_files': [], 'output_files': [],
@ -438,15 +438,15 @@ class DocumentExtractionTester:
result['total_content_size'] += content_size result['total_content_size'] += content_size
logger.debug(f"Content item {i+1}: {content_item.label}, size: {content_size} bytes") logger.debug(f"Content item {i+1}: {content_item.label}, size: {content_size} bytes")
# Generate filename with new naming convention # Generate fileName with new naming convention
if len(extracted_content.contents) == 1: if len(extracted_content.contents) == 1:
# Single content item # Single content item
output_filename = f"{file_path.stem} - {content_item.label} 1.txt" output_fileName = f"{file_path.stem} - {content_item.label} 1.txt"
else: else:
# Multiple content items - add sequence number # Multiple content items - add sequence number
output_filename = f"{file_path.stem} - {content_item.label} {i+1}.txt" output_fileName = f"{file_path.stem} - {content_item.label} {i+1}.txt"
output_file = self.output_dir / output_filename output_file = self.output_dir / output_fileName
# Write only the raw extracted content # Write only the raw extracted content
logger.debug(f"Attempting to write to: {output_file}") logger.debug(f"Attempting to write to: {output_file}")
@ -457,14 +457,14 @@ class DocumentExtractionTester:
# Verify file was created # Verify file was created
if output_file.exists(): if output_file.exists():
actual_size = output_file.stat().st_size actual_size = output_file.stat().st_size
logger.info(f"✓ File created successfully: {output_filename} (expected: {content_size} bytes, actual: {actual_size} bytes)") logger.info(f"✓ File created successfully: {output_fileName} (expected: {content_size} bytes, actual: {actual_size} bytes)")
else: else:
logger.error(f"✗ File was not created: {output_file}") logger.error(f"✗ File was not created: {output_file}")
result['output_files'].append(output_filename) result['output_files'].append(output_fileName)
result['content_items'] += 1 result['content_items'] += 1
except Exception as write_error: except Exception as write_error:
logger.error(f"✗ Error writing file {output_filename}: {write_error}") logger.error(f"✗ Error writing file {output_fileName}: {write_error}")
import traceback import traceback
traceback.print_exc() traceback.print_exc()
else: else:
@ -486,7 +486,7 @@ class DocumentExtractionTester:
# Track failed result # Track failed result
result = { result = {
'filename': file_path.name, 'fileName': file_path.name,
'status': 'FAIL', 'status': 'FAIL',
'content_items': 0, 'content_items': 0,
'output_files': [], 'output_files': [],
@ -696,7 +696,7 @@ class DocumentExtractionTester:
for result in self.processing_results: for result in self.processing_results:
status_icon = "" if result['status'] == 'OK' else "" status_icon = "" if result['status'] == 'OK' else ""
logger.info(f"{status_icon} {result['filename']} - {result['status']}") logger.info(f"{status_icon} {result['fileName']} - {result['status']}")
if result['status'] == 'OK': if result['status'] == 'OK':
if result['content_items'] == 1: if result['content_items'] == 1:

View file

@ -83,7 +83,7 @@ async def test_excel_processing():
logger.info("Processing Excel file...") logger.info("Processing Excel file...")
result = await extractor.processFileData( result = await extractor.processFileData(
fileData=file_data, fileData=file_data,
filename="test.xlsx", fileName="test.xlsx",
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
base64Encoded=False, base64Encoded=False,
prompt=None, prompt=None,
@ -151,7 +151,7 @@ async def test_excel_processing():
# Process with DocumentExtraction # Process with DocumentExtraction
result = await extractor.processFileData( result = await extractor.processFileData(
fileData=file_data, fileData=file_data,
filename="test_workbook.xlsx", fileName="test_workbook.xlsx",
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
base64Encoded=False, base64Encoded=False,
prompt=None, prompt=None,

100
test_pydantic_compat.py Normal file
View file

@ -0,0 +1,100 @@
#!/usr/bin/env python3
"""
Test script for Pydantic compatibility module.
This script tests the version-aware functionality for both Pydantic v1 and v2.
"""
import sys
import os
# Add the modules directory to the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'modules'))
def test_compatibility_module():
"""Test the Pydantic compatibility module"""
try:
from shared.pydanticCompat import (
PYDANTIC_VERSION,
create_private_field,
create_model_config,
model_to_dict,
model_from_dict,
get_version_info
)
print(f"✅ Successfully imported Pydantic compatibility module")
print(f"📊 Pydantic version detected: {PYDANTIC_VERSION}")
# Test version info
version_info = get_version_info()
print(f"🔍 Version info: {version_info}")
# Test field creation
private_field = create_private_field(default="test")
print(f"✅ Private field created: {type(private_field)}")
# Test model config
config = create_model_config(validate_assignment=True)
print(f"✅ Model config created: {type(config)}")
return True
except Exception as e:
print(f"❌ Error testing compatibility module: {e}")
return False
def test_chat_document_model():
"""Test the ChatDocument model with compatibility"""
try:
from interfaces.interfaceChatModel import ChatDocument
print(f"✅ Successfully imported ChatDocument model")
# Test creating a document
doc = ChatDocument(fileId="test-file-123")
print(f"✅ ChatDocument created: {doc.id}")
# Test setting component interface
doc.setComponentInterface("mock_interface")
print(f"✅ Component interface set")
# Test serialization
doc_dict = doc.to_dict()
print(f"✅ Document serialized: {doc_dict}")
# Test validation
is_valid = doc.validate_component_interface()
print(f"✅ Component interface validation: {is_valid}")
return True
except Exception as e:
print(f"❌ Error testing ChatDocument model: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Main test function"""
print("🧪 Testing Pydantic Compatibility Module")
print("=" * 50)
# Test compatibility module
compat_ok = test_compatibility_module()
print()
# Test ChatDocument model
model_ok = test_chat_document_model()
print()
# Summary
print("=" * 50)
if compat_ok and model_ok:
print("🎉 All tests passed! Pydantic compatibility is working correctly.")
return 0
else:
print("💥 Some tests failed. Check the errors above.")
return 1
if __name__ == "__main__":
sys.exit(main())

View file

@ -1,218 +0,0 @@
#!/usr/bin/env python3
"""
Test runner for timestamp standardization tests.
Executes all unit tests and provides a summary report.
"""
import sys
import os
import subprocess
import time
from pathlib import Path
def run_tests():
"""Run all timestamp standardization tests."""
print("🚀 Starting Timestamp Standardization Tests")
print("=" * 50)
# Get the gateway directory
gateway_dir = Path(__file__).parent.parent
os.chdir(gateway_dir)
# Test files to run
test_files = [
"tests/test_timestamp_models.py",
"tests/test_api_timestamps.py"
]
results = {}
total_tests = 0
passed_tests = 0
failed_tests = 0
for test_file in test_files:
if not os.path.exists(test_file):
print(f"⚠️ Test file not found: {test_file}")
continue
print(f"\n📋 Running tests from: {test_file}")
print("-" * 40)
try:
# Run pytest on the test file with better output format
result = subprocess.run(
[sys.executable, "-m", "pytest", test_file, "-v", "--tb=short", "--no-header"],
capture_output=True,
text=True,
timeout=120
)
# Parse results using pytest's actual output format
output = result.stdout
error_output = result.stderr
# Count tests using pytest's output format
lines = output.split('\n')
test_count = 0
passed = 0
failed = 0
for line in lines:
# Look for test results in pytest output
if line.strip() and ('::' in line or line.startswith('test_')):
if 'PASSED' in line or 'passed' in line or '' in line:
passed += 1
test_count += 1
elif 'FAILED' in line or 'failed' in line or '' in line or 'ERROR' in line:
failed += 1
test_count += 1
elif '::' in line and 'test_' in line:
# This is a test name line, count it
test_count += 1
# If we couldn't parse the output, try alternative method
if test_count == 0:
# Look for lines containing test names
for line in lines:
if '::' in line and 'test_' in line:
test_count += 1
# Assume passed if no explicit failure
passed += 1
total_tests += test_count
passed_tests += passed
failed_tests += failed
results[test_file] = {
'total': test_count,
'passed': passed,
'failed': failed,
'output': output,
'error': error_output,
'return_code': result.returncode
}
# Print summary for this file
if result.returncode == 0 and failed == 0:
print(f"{test_file}: {passed}/{test_count} tests passed")
else:
print(f"{test_file}: {failed}/{test_count} tests failed")
if error_output:
print(f"Error output: {error_output}")
# Show the actual test output for debugging
print("\n📋 Test Output:")
print("-" * 40)
print(output)
print("-" * 40)
except subprocess.TimeoutExpired:
print(f"{test_file}: Tests timed out after 120 seconds")
results[test_file] = {
'total': 0,
'passed': 0,
'failed': 0,
'output': '',
'error': 'Tests timed out',
'return_code': -1
}
except Exception as e:
print(f"💥 {test_file}: Error running tests: {e}")
results[test_file] = {
'total': 0,
'passed': 0,
'failed': 0,
'output': '',
'error': str(e),
'return_code': -1
}
# Print overall summary
print("\n" + "=" * 50)
print("📊 TEST SUMMARY")
print("=" * 50)
for test_file, result in results.items():
if result['total'] > 0:
status = "✅ PASSED" if result['failed'] == 0 else "❌ FAILED"
print(f"{test_file}: {status} ({result['passed']}/{result['total']} tests)")
else:
print(f"{test_file}: ⚠️ NO TESTS DETECTED")
print(f"\nTotal Tests: {total_tests}")
print(f"Passed: {passed_tests}")
print(f"Failed: {failed_tests}")
if failed_tests == 0 and total_tests > 0:
print("\n🎉 All tests passed! Timestamp standardization is working correctly.")
return True
elif total_tests == 0:
print("\n⚠️ No tests were detected. Please check test file structure.")
return False
else:
print(f"\n⚠️ {failed_tests} tests failed. Please review the output above.")
return False
def run_frontend_tests():
"""Run frontend timestamp tests if Node.js is available."""
print("\n🌐 Frontend Tests")
print("-" * 40)
frontend_test_file = "../frontend_agents/tests/test_timestamp_utils.js"
if not os.path.exists(frontend_test_file):
print(f"⚠️ Frontend test file not found: {frontend_test_file}")
return False
try:
# Check if Node.js is available
result = subprocess.run(['node', '--version'], capture_output=True, text=True)
if result.returncode != 0:
print("⚠️ Node.js not available. Skipping frontend tests.")
return False
print("✅ Node.js available. Frontend tests would run here.")
print(" (Frontend tests require Jest or similar test runner)")
return True
except FileNotFoundError:
print("⚠️ Node.js not found. Skipping frontend tests.")
return False
def main():
"""Main test runner function."""
start_time = time.time()
print("Timestamp Standardization Test Suite")
print("Testing Phase 5: Testing & Validation")
print(f"Started at: {time.strftime('%Y-%m-%d %H:%M:%S')}")
# Run backend tests
backend_success = run_tests()
# Run frontend tests
frontend_success = run_frontend_tests()
# Final summary
end_time = time.time()
duration = end_time - start_time
print("\n" + "=" * 50)
print("🏁 FINAL SUMMARY")
print("=" * 50)
print(f"Backend Tests: {'✅ PASSED' if backend_success else '❌ FAILED'}")
print(f"Frontend Tests: {'✅ AVAILABLE' if frontend_success else '⚠️ NOT AVAILABLE'}")
print(f"Total Duration: {duration:.2f} seconds")
if backend_success:
print("\n🎯 Phase 5: Testing & Validation - COMPLETED")
print("All timestamp standardization tests passed successfully!")
else:
print("\n❌ Phase 5: Testing & Validation - FAILED")
print("Some tests failed. Please review the output above.")
return backend_success
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)

View file

@ -1,155 +0,0 @@
"""
API endpoint tests for timestamp standardization.
Ensures all API endpoints return float UTC timestamps.
"""
import pytest
import json
import time
from modules.shared.timezoneUtils import get_utc_timestamp, create_expiration_timestamp
class TestAPITimestampFormat:
"""Test that all API endpoints return float timestamps."""
def test_connection_endpoints_return_float_timestamps(self):
"""Test connection endpoints return float timestamps."""
# Note: This test would require a running FastAPI app with actual endpoints
# For now, we'll test the timestamp generation functions instead
# Test timestamp generation functions
current_time = get_utc_timestamp()
expires_at = create_expiration_timestamp(3600)
# Verify the functions return float timestamps
assert isinstance(current_time, float)
assert isinstance(expires_at, float)
assert expires_at > current_time
def test_oauth_endpoints_return_float_timestamps(self):
"""Test OAuth endpoints return float timestamps in HTML responses."""
# Test Google OAuth callback (simulated)
# Note: This would need to be tested with actual OAuth flow
# For now, we'll test the timestamp generation functions
current_time = get_utc_timestamp()
expires_at = create_expiration_timestamp(3600)
# Verify the functions return float timestamps
assert isinstance(current_time, float)
assert isinstance(expires_at, float)
assert expires_at > current_time
def test_workflow_endpoints_return_float_timestamps(self):
"""Test workflow endpoints return float timestamps."""
# Test GET /api/workflows (if endpoint exists)
# This would need to be implemented based on actual workflow endpoints
# For now, test timestamp generation
current_time = get_utc_timestamp()
assert isinstance(current_time, float)
assert current_time > 1600000000
def test_chat_endpoints_return_float_timestamps(self):
"""Test chat endpoints return float timestamps."""
# Test chat message endpoints (if they exist)
# This would need to be implemented based on actual chat endpoints
# For now, test timestamp generation
current_time = get_utc_timestamp()
assert isinstance(current_time, float)
assert current_time > 1600000000
def test_component_endpoints_return_float_timestamps(self):
"""Test component endpoints return float timestamps."""
# Test file endpoints (if they exist)
# This would need to be implemented based on actual component endpoints
# For now, test timestamp generation
current_time = get_utc_timestamp()
assert isinstance(current_time, float)
assert current_time > 1600000000
class TestTimestampGenerationConsistency:
"""Test that timestamp generation is consistent across all endpoints."""
def test_utc_timestamp_consistency(self):
"""Test that get_utc_timestamp returns consistent values."""
timestamp1 = get_utc_timestamp()
time.sleep(0.1) # Small delay
timestamp2 = get_utc_timestamp()
# Both should be float
assert isinstance(timestamp1, float)
assert isinstance(timestamp2, float)
# Second should be greater than first
assert timestamp2 > timestamp1
# Both should be reasonable UTC timestamps
assert timestamp1 > 1600000000
assert timestamp2 > 1600000000
def test_expiration_timestamp_consistency(self):
"""Test that create_expiration_timestamp works consistently."""
current_time = get_utc_timestamp()
expires_in = 3600 # 1 hour
expiration1 = create_expiration_timestamp(expires_in)
expiration2 = create_expiration_timestamp(expires_in)
# Both should be float
assert isinstance(expiration1, float)
assert isinstance(expiration2, float)
# Both should be current_time + expires_in
assert expiration1 == current_time + expires_in
assert expiration2 == current_time + expires_in
# Both should be greater than current time
assert expiration1 > current_time
assert expiration2 > current_time
class TestTimestampValidation:
"""Test timestamp validation and error handling."""
def test_invalid_timestamp_handling(self):
"""Test how the system handles invalid timestamps."""
# Test with very old timestamp
old_timestamp = 1000000000.0 # Year 2001
# This should still be a valid float timestamp
assert isinstance(old_timestamp, float)
assert old_timestamp > 0
# Test with future timestamp (reasonable)
future_timestamp = get_utc_timestamp() + 86400 # 1 day from now
assert isinstance(future_timestamp, float)
assert future_timestamp > get_utc_timestamp()
def test_timestamp_range_validation(self):
"""Test that timestamps are within reasonable range."""
current_time = get_utc_timestamp()
# Current time should be after 2020
assert current_time > 1600000000 # 2020-01-01
# Current time should be before 2100
assert current_time < 4102444800 # 2100-01-01
# Test expiration timestamp
expires_at = create_expiration_timestamp(3600)
assert expires_at > current_time
assert expires_at < current_time + 86400 # Should not be more than 1 day in future
if __name__ == "__main__":
pytest.main([__file__])

View file

@ -1,385 +0,0 @@
"""
Unit tests for timestamp standardization across all models.
Ensures all timestamp fields use float UTC timestamps consistently.
"""
import pytest
from datetime import datetime, timedelta
import time
from modules.interfaces.interfaceAppModel import UserConnection, Session, AuthEvent, Token
from modules.interfaces.interfaceChatModel import TaskAction, ChatLog, ChatMessage, ChatWorkflow, TaskItem, TaskHandover
from modules.interfaces.interfaceComponentModel import FileItem
from modules.shared.timezoneUtils import get_utc_timestamp, create_expiration_timestamp
class TestTimestampModelConsistency:
"""Test that all models use float UTC timestamps consistently."""
def test_user_connection_timestamps(self):
"""Test UserConnection model timestamp fields."""
current_time = get_utc_timestamp()
expires_at = create_expiration_timestamp(3600) # 1 hour from now
connection = UserConnection(
userId="user123",
authority="msft",
externalId="ext123",
externalUsername="testuser",
connectedAt=current_time,
lastChecked=current_time,
expiresAt=expires_at
)
# Verify types
assert isinstance(connection.connectedAt, float)
assert isinstance(connection.lastChecked, float)
assert isinstance(connection.expiresAt, float)
# Verify values are reasonable UTC timestamps
assert connection.connectedAt > 1600000000 # After 2020
assert connection.lastChecked > 1600000000
assert connection.expiresAt > connection.connectedAt
# Test to_dict() method
connection_dict = connection.to_dict()
# Note: to_dict() converts timestamps to ISO strings, so we check for string type
assert isinstance(connection_dict["connectedAt"], str)
assert isinstance(connection_dict["lastChecked"], str)
assert isinstance(connection_dict["expiresAt"], str)
def test_session_timestamps(self):
"""Test Session model timestamp fields."""
current_time = get_utc_timestamp()
expires_at = create_expiration_timestamp(7200) # 2 hours from now
session = Session(
id="session123",
userId="user123",
tokenId="token123",
lastActivity=current_time,
expiresAt=expires_at
)
# Verify types
assert isinstance(session.lastActivity, float)
assert isinstance(session.expiresAt, float)
# Verify values
assert session.lastActivity > 1600000000
assert session.expiresAt > session.lastActivity
# Test to_dict() method
session_dict = session.to_dict()
# Note: to_dict() converts timestamps to ISO strings, so we check for string type
assert isinstance(session_dict["lastActivity"], str)
assert isinstance(session_dict["expiresAt"], str)
def test_auth_event_timestamps(self):
"""Test AuthEvent model timestamp fields."""
current_time = get_utc_timestamp()
auth_event = AuthEvent(
id="event123",
userId="user123",
eventType="login",
details={"action": "login", "success": True},
timestamp=current_time
)
# Verify types
assert isinstance(auth_event.timestamp, float)
# Verify values
assert auth_event.timestamp > 1600000000
# Test to_dict() method
event_dict = auth_event.to_dict()
# Note: to_dict() converts timestamps to ISO strings, so we check for string type
assert isinstance(event_dict["timestamp"], str)
def test_token_timestamps(self):
"""Test Token model timestamp fields."""
current_time = get_utc_timestamp()
expires_at = create_expiration_timestamp(3600)
token = Token(
userId="user123",
authority="msft",
tokenAccess="access_token",
expiresAt=expires_at,
createdAt=current_time
)
# Verify types
assert isinstance(token.expiresAt, float)
assert isinstance(token.createdAt, float)
# Verify values
assert token.expiresAt > 1600000000
assert token.createdAt > 1600000000
assert token.expiresAt > token.createdAt
# Test to_dict() method
token_dict = token.to_dict()
# Note: to_dict() converts timestamps to ISO strings, so we check for string type
assert isinstance(token_dict["expiresAt"], str)
assert isinstance(token_dict["createdAt"], str)
def test_task_action_timestamps(self):
"""Test TaskAction model timestamp fields."""
current_time = get_utc_timestamp()
task_action = TaskAction(
id="action123",
execMethod="test.method",
execAction="test_action",
timestamp=current_time
)
# Verify types
assert isinstance(task_action.timestamp, float)
# Verify values
assert task_action.timestamp > 1600000000
# Test default factory
task_action_default = TaskAction(
id="action124",
execMethod="test.method",
execAction="test_action"
)
assert isinstance(task_action_default.timestamp, float)
assert task_action_default.timestamp > 1600000000
def test_chat_log_timestamps(self):
"""Test ChatLog model timestamp fields."""
current_time = get_utc_timestamp()
chat_log = ChatLog(
id="log123",
workflowId="workflow123",
message="Test message",
type="info",
timestamp=current_time
)
# Verify types
assert isinstance(chat_log.timestamp, float)
# Verify values
assert chat_log.timestamp > 1600000000
def test_chat_message_timestamps(self):
"""Test ChatMessage model timestamp fields."""
current_time = get_utc_timestamp()
chat_message = ChatMessage(
id="msg123",
workflowId="workflow123",
role="user",
status="first",
sequenceNr=1,
message="Test message",
publishedAt=current_time
)
# Verify types
assert isinstance(chat_message.publishedAt, float)
# Verify values
assert chat_message.publishedAt > 1600000000
def test_chat_workflow_timestamps(self):
"""Test ChatWorkflow model timestamp fields."""
current_time = get_utc_timestamp()
workflow = ChatWorkflow(
id="workflow123",
mandateId="mandate123",
status="active",
currentRound=1,
startedAt=current_time,
lastActivity=current_time
)
# Verify types
assert isinstance(workflow.startedAt, float)
assert isinstance(workflow.lastActivity, float)
# Verify values
assert workflow.startedAt > 1600000000
assert workflow.lastActivity > 1600000000
def test_task_item_timestamps(self):
"""Test TaskItem model timestamp fields."""
current_time = get_utc_timestamp()
finished_time = current_time + 300 # 5 minutes later
task_item = TaskItem(
id="task123",
workflowId="workflow123",
userInput="Test user input",
startedAt=current_time,
finishedAt=finished_time
)
# Verify types
assert isinstance(task_item.startedAt, float)
assert isinstance(task_item.finishedAt, float)
# Verify values
assert task_item.startedAt > 1600000000
assert task_item.finishedAt > task_item.startedAt
def test_task_handover_timestamps(self):
"""Test TaskHandover model timestamp fields."""
current_time = get_utc_timestamp()
handover = TaskHandover(
taskId="task123",
timestamp=current_time
)
# Verify types
assert isinstance(handover.timestamp, float)
# Verify values
assert handover.timestamp > 1600000000
# Test default factory
handover_default = TaskHandover(
taskId="task124"
)
assert isinstance(handover_default.timestamp, float)
assert handover_default.timestamp > 1600000000
def test_file_item_timestamps(self):
"""Test FileItem model timestamp fields."""
current_time = get_utc_timestamp()
file_item = FileItem(
id="file123",
mandateId="mandate123",
filename="test.txt",
mimeType="text/plain",
fileHash="abc123hash",
fileSize=1024,
creationDate=current_time
)
# Verify types
assert isinstance(file_item.creationDate, float)
# Verify values
assert file_item.creationDate > 1600000000
# Test default factory
file_item_default = FileItem(
id="file124",
mandateId="mandate123",
filename="test.txt",
mimeType="text/plain",
fileHash="def456hash",
fileSize=2048
)
assert isinstance(file_item_default.creationDate, float)
assert file_item_default.creationDate > 1600000000
# Test to_dict() method
file_dict = file_item.to_dict()
# Note: to_dict() converts timestamps to ISO strings, so we check for string type
assert isinstance(file_dict["creationDate"], str)
class TestTimestampGenerationFunctions:
"""Test timestamp generation utility functions."""
def test_get_utc_timestamp(self):
"""Test get_utc_timestamp function."""
timestamp = get_utc_timestamp()
# Verify type
assert isinstance(timestamp, float)
# Verify value is reasonable
assert timestamp > 1600000000 # After 2020
assert timestamp < 4102444800 # Before 2100
# Verify it's close to current time (within 2 seconds to account for execution time)
current_time = time.time()
assert abs(timestamp - current_time) < 2
def test_create_expiration_timestamp(self):
"""Test create_expiration_timestamp function."""
current_time = get_utc_timestamp()
expires_in = 3600 # 1 hour
expiration_timestamp = create_expiration_timestamp(expires_in)
# Verify type
assert isinstance(expiration_timestamp, float)
# Verify value
assert expiration_timestamp > current_time
# Check if it's close to current_time + expires_in (within 1 second to account for execution time)
assert abs(expiration_timestamp - (current_time + expires_in)) < 1
# Verify it's reasonable
assert expiration_timestamp > 1600000000
assert expiration_timestamp < 4102444800
class TestModelValidation:
"""Test model validation and constraints."""
def test_timestamp_field_descriptions(self):
"""Test that all timestamp fields have proper descriptions mentioning UTC."""
# Test UserConnection
connection = UserConnection(
userId="user123",
authority="msft",
externalId="ext123",
externalUsername="testuser"
)
# Check field descriptions contain UTC timestamp info
# Note: This test depends on the actual field descriptions in the model
# For now, we'll just verify the fields exist
# Handle both Pydantic v1 and v2
if hasattr(connection, 'model_fields'):
fields = connection.model_fields
else:
fields = connection.__fields__
assert "connectedAt" in fields
assert "lastChecked" in fields
assert "expiresAt" in fields
def test_optional_timestamp_fields(self):
"""Test that optional timestamp fields work correctly."""
# Test Token with optional createdAt
token = Token(
userId="user123",
authority="msft",
tokenAccess="access_token",
expiresAt=create_expiration_timestamp(3600)
)
# createdAt should be None by default
assert token.createdAt is None
# Test UserConnection with optional expiresAt
connection = UserConnection(
userId="user123",
authority="msft",
externalId="ext123",
externalUsername="testuser"
)
# expiresAt should be None by default
assert connection.expiresAt is None
if __name__ == "__main__":
pytest.main([__file__])

210
tool_showUnusedFunctions.py Normal file
View file

@ -0,0 +1,210 @@
#!/usr/bin/env python3
"""
Script to find unused functions in Python files.
Analyzes all .py files in the codebase and reports functions that are never called.
"""
import os
import re
import ast
from pathlib import Path
from typing import Dict, List, Set, Tuple
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)
class FunctionUsageAnalyzer:
def __init__(self, root_dir: str):
self.root_dir = Path(root_dir)
self.all_functions: Dict[str, List[Tuple[str, str]]] = {} # function_name -> [(file_path, line_number)]
self.function_calls: Set[str] = set()
self.imports: Dict[str, Set[str]] = {} # file_path -> set of imported modules/classes
def find_python_files(self) -> List[Path]:
"""Find all Python files in the codebase."""
python_files = []
for py_file in self.root_dir.rglob("*.py"):
if "venv" not in str(py_file) and "env" not in str(py_file):
python_files.append(py_file)
return python_files
def extract_functions_from_file(self, file_path: Path) -> List[Tuple[str, int]]:
"""Extract function definitions from a Python file."""
functions = []
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
tree = ast.parse(content)
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
functions.append((node.name, node.lineno))
elif isinstance(node, ast.AsyncFunctionDef):
functions.append((node.name, node.lineno))
except Exception as e:
logger.warning(f"Error parsing {file_path}: {e}")
return functions
def extract_function_calls_from_file(self, file_path: Path) -> Set[str]:
"""Extract function calls from a Python file."""
calls = set()
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
tree = ast.parse(content)
for node in ast.walk(tree):
if isinstance(node, ast.Call):
if isinstance(node.func, ast.Name):
calls.add(node.func.id)
elif isinstance(node.func, ast.Attribute):
calls.add(node.func.attr)
except Exception as e:
logger.warning(f"Error parsing {file_path}: {e}")
return calls
def extract_imports_from_file(self, file_path: Path) -> Set[str]:
"""Extract imports from a Python file."""
imports = set()
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
tree = ast.parse(content)
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
imports.add(alias.name)
elif isinstance(node, ast.ImportFrom):
if node.module:
imports.add(node.module)
for alias in node.names:
if alias.name != "*":
imports.add(alias.name)
except Exception as e:
logger.warning(f"Error parsing {file_path}: {e}")
return imports
def analyze_codebase(self):
"""Analyze the entire codebase for function definitions and usage."""
python_files = self.find_python_files()
logger.info(f"Found {len(python_files)} Python files")
# First pass: collect all function definitions
for file_path in python_files:
relative_path = file_path.relative_to(self.root_dir)
functions = self.extract_functions_from_file(file_path)
for func_name, line_num in functions:
if func_name not in self.all_functions:
self.all_functions[func_name] = []
self.all_functions[func_name].append((str(relative_path), line_num))
logger.info(f"Found {len(self.all_functions)} unique function definitions")
# Second pass: collect all function calls
for file_path in python_files:
calls = self.extract_function_calls_from_file(file_path)
self.function_calls.update(calls)
imports = self.extract_imports_from_file(file_path)
self.imports[str(file_path.relative_to(self.root_dir))] = imports
logger.info(f"Found {len(self.function_calls)} unique function calls")
def find_unused_functions(self) -> Dict[str, List[Tuple[str, str]]]:
"""Find functions that are never called."""
unused_functions = {}
for func_name, locations in self.all_functions.items():
# Skip special methods and common patterns
if (func_name.startswith('_') or
func_name.startswith('__') or
func_name in ['main', 'if __name__', 'test_', 'setup', 'teardown']):
continue
# Check if function is called anywhere
if func_name not in self.function_calls:
unused_functions[func_name] = locations
return unused_functions
def generate_report(self) -> str:
"""Generate a comprehensive report of unused functions."""
unused_functions = self.find_unused_functions()
report = []
report.append("=" * 80)
report.append("UNUSED FUNCTIONS REPORT")
report.append("=" * 80)
report.append(f"Total functions found: {len(self.all_functions)}")
report.append(f"Unused functions: {len(unused_functions)}")
report.append(f"Usage rate: {((len(self.all_functions) - len(unused_functions)) / len(self.all_functions) * 100):.1f}%")
report.append("")
if not unused_functions:
report.append("🎉 All functions are being used!")
return "\n".join(report)
# Group by file
by_file = {}
for func_name, locations in unused_functions.items():
for file_path, line_num in locations:
if file_path not in by_file:
by_file[file_path] = []
by_file[file_path].append((func_name, line_num))
# Sort by file path
for file_path in sorted(by_file.keys()):
report.append(f"📁 {file_path}")
report.append("-" * 60)
functions_in_file = by_file[file_path]
functions_in_file.sort(key=lambda x: x[1]) # Sort by line number
for func_name, line_num in functions_in_file:
report.append(f" Line {line_num:3d}: {func_name}")
report.append("")
# Summary by function name
report.append("SUMMARY BY FUNCTION NAME:")
report.append("-" * 60)
for func_name in sorted(unused_functions.keys()):
locations = unused_functions[func_name]
report.append(f"{func_name:<30} ({len(locations)} location{'s' if len(locations) > 1 else ''})")
return "\n".join(report)
def main():
"""Main function to run the analysis."""
# Get the directory where this script is located
script_dir = Path(__file__).parent
logger.info(f"Analyzing codebase in: {script_dir}")
analyzer = FunctionUsageAnalyzer(script_dir)
analyzer.analyze_codebase()
report = analyzer.generate_report()
print(report)
# Save report to file
report_file = script_dir / "unused_functions_report.txt"
with open(report_file, 'w', encoding='utf-8') as f:
f.write(report)
logger.info(f"Report saved to: {report_file}")
if __name__ == "__main__":
main()