hot fixes: sharepoint folders and stats

This commit is contained in:
ValueOn AG 2025-12-07 08:48:49 +01:00
parent 4418dfb604
commit 13b7c4fdbe
8 changed files with 1229 additions and 968 deletions

View file

@ -15,6 +15,7 @@ from modules.security.auth import getCurrentUser, limiter
from modules.datamodels.datamodelChat import AutomationDefinition, ChatWorkflow from modules.datamodels.datamodelChat import AutomationDefinition, ChatWorkflow
from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata
from modules.shared.attributeUtils import getModelAttributeDefinitions from modules.shared.attributeUtils import getModelAttributeDefinitions
from modules.features.automation import executeAutomation
# Configure logger # Configure logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -217,7 +218,7 @@ async def execute_automation(
"""Execute an automation immediately (test mode)""" """Execute an automation immediately (test mode)"""
try: try:
chatInterface = getChatInterface(currentUser) chatInterface = getChatInterface(currentUser)
workflow = await chatInterface.executeAutomation(automationId) workflow = await executeAutomation(automationId, chatInterface)
return workflow return workflow
except HTTPException: except HTTPException:
raise raise

View file

@ -1013,7 +1013,8 @@ class ChatService:
return self._progressLogger return self._progressLogger
def createProgressLogger(self) -> ProgressLogger: def createProgressLogger(self) -> ProgressLogger:
return ProgressLogger(self.services) """Get or create the progress logger instance (singleton)"""
return self._getProgressLogger()
def progressLogStart(self, operationId: str, serviceName: str, actionName: str, context: str = "", parentOperationId: Optional[str] = None): def progressLogStart(self, operationId: str, serviceName: str, actionName: str, context: str = "", parentOperationId: Optional[str] = None):
"""Wrapper for ProgressLogger.startOperation """Wrapper for ProgressLogger.startOperation

View file

@ -287,7 +287,12 @@ class SharepointService:
try: try:
# Clean the path # Clean the path
cleanPath = folderPath.lstrip('/') cleanPath = folderPath.lstrip('/')
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}"
# If path is empty, get root directly
if not cleanPath:
endpoint = f"sites/{siteId}/drive/root"
else:
endpoint = f"sites/{siteId}/drive/root:/{cleanPath}"
result = await self._makeGraphApiCall(endpoint) result = await self._makeGraphApiCall(endpoint)
@ -499,4 +504,407 @@ class SharepointService:
except Exception as e: except Exception as e:
logger.error(f"Error downloading file by path: {str(e)}") logger.error(f"Error downloading file by path: {str(e)}")
return None return None
async def _getItemById(self, siteId: str, driveId: str, itemId: str) -> Optional[Dict[str, Any]]:
"""Verify that an item exists by getting it by ID.
Args:
siteId: SharePoint site ID
driveId: Drive ID (document library)
itemId: Item ID to verify
Returns:
Item dictionary if found, None otherwise
"""
try:
endpoint = f"sites/{siteId}/drives/{driveId}/items/{itemId}"
result = await self._makeGraphApiCall(endpoint)
if "error" in result:
logger.warning(f"Item {itemId} not found: {result['error']}")
return None
return result
except Exception as e:
logger.warning(f"Error verifying item {itemId}: {str(e)}")
return None
async def _findDriveForItem(self, siteId: str, itemId: str) -> Optional[str]:
"""Find which drive contains a specific item by trying to get it from all drives.
Args:
siteId: SharePoint site ID
itemId: Item ID to find
Returns:
Drive ID if found, None otherwise
"""
try:
# Get all drives for the site
endpoint = f"sites/{siteId}/drives"
drivesResult = await self._makeGraphApiCall(endpoint)
if "error" in drivesResult:
logger.warning(f"Could not get drives for site {siteId}: {drivesResult['error']}")
return None
drives = drivesResult.get("value", [])
if not drives:
logger.warning(f"No drives found for site {siteId}")
return None
# Try to find the item in each drive
for drive in drives:
driveId = drive.get("id")
if not driveId:
continue
itemInfo = await self._getItemById(siteId, driveId, itemId)
if itemInfo:
logger.info(f"Found item {itemId} in drive {drive.get('name', driveId)}")
return driveId
logger.warning(f"Item {itemId} not found in any drive for site {siteId}")
return None
except Exception as e:
logger.warning(f"Error finding drive for item {itemId}: {str(e)}")
return None
async def getFolderUsageAnalytics(self, siteId: str, driveId: str, itemId: str, startDateTime: Optional[str] = None, endDateTime: Optional[str] = None, interval: str = "day") -> Dict[str, Any]:
"""Get usage analytics for a folder or file.
Args:
siteId: SharePoint site ID
driveId: Drive ID (document library)
itemId: Folder or file item ID
startDateTime: Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). If None, uses 30 days ago.
endDateTime: End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). If None, uses current time.
interval: Time interval for grouping activities. Options: "day", "week", "month". Default: "day"
Returns:
Dictionary containing analytics data with activities grouped by interval.
If analytics are not available (404), returns empty analytics structure instead of error.
"""
try:
from datetime import datetime, timedelta, timezone
# Set default time range if not provided (last 30 days)
if not endDateTime:
endDateTime = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
if not startDateTime:
startDate = datetime.now(timezone.utc) - timedelta(days=30)
startDateTime = startDate.isoformat().replace('+00:00', 'Z')
# Build endpoint with query parameters
endpoint = f"sites/{siteId}/drives/{driveId}/items/{itemId}/getActivitiesByInterval"
endpoint += f"?startDateTime={startDateTime}&endDateTime={endDateTime}&interval={interval}"
result = await self._makeGraphApiCall(endpoint)
if "error" in result:
errorMsg = result.get('error', '')
# Check if it's a 404 error
if isinstance(errorMsg, str) and '404' in errorMsg:
# Verify if the item exists - first try with current driveId
itemInfo = await self._getItemById(siteId, driveId, itemId)
# If not found, try to find the correct drive for this item
if not itemInfo:
logger.info(f"Item {itemId} not found in drive {driveId}, searching for correct drive")
correctDriveId = await self._findDriveForItem(siteId, itemId)
if correctDriveId and correctDriveId != driveId:
logger.info(f"Found item in different drive {correctDriveId}, retrying analytics call")
# Retry with correct drive
endpoint = f"sites/{siteId}/drives/{correctDriveId}/items/{itemId}/getActivitiesByInterval"
endpoint += f"?startDateTime={startDateTime}&endDateTime={endDateTime}&interval={interval}"
result = await self._makeGraphApiCall(endpoint)
if "error" not in result:
logger.info(f"Successfully retrieved analytics using correct drive {correctDriveId}")
return result
# If still error, continue with original error handling
itemInfo = await self._getItemById(siteId, correctDriveId, itemId)
if itemInfo:
# Item exists but analytics are not available - return empty analytics
logger.warning(f"Usage analytics not available for item {itemId} (item exists but has no activity data or analytics not supported)")
return {
"value": [],
"note": "No analytics data available for this item. The item exists but may not have activity data or analytics may not be supported for this item type."
}
else:
# Item doesn't exist
logger.error(f"Item {itemId} not found when trying to get usage analytics")
return result
else:
# Other error
logger.error(f"Error getting usage analytics: {result['error']}")
return result
logger.info(f"Retrieved usage analytics for item {itemId} with interval {interval}")
return result
except Exception as e:
logger.error(f"Error getting folder usage analytics: {str(e)}")
return {"error": f"Error getting folder usage analytics: {str(e)}"}
async def getDriveId(self, siteId: str, driveName: Optional[str] = None) -> Optional[str]:
"""Get drive ID for a site. If driveName is provided, finds the specific drive, otherwise returns the default drive.
Args:
siteId: SharePoint site ID
driveName: Optional drive name (document library name). If None, returns default drive.
Returns:
Drive ID string or None if not found
"""
try:
endpoint = f"sites/{siteId}/drives"
result = await self._makeGraphApiCall(endpoint)
if "error" in result:
logger.error(f"Error getting drives: {result['error']}")
return None
drives = result.get("value", [])
if not driveName:
# Return default drive (usually the first one or the one named "Documents")
for drive in drives:
if drive.get("name") == "Documents" or drive.get("name") == "Shared Documents":
logger.info(f"Found default drive: {drive.get('name')} (ID: {drive.get('id')})")
return drive.get("id")
# If no Documents drive found, return first drive
if drives:
logger.info(f"Using first drive: {drives[0].get('name')} (ID: {drives[0].get('id')})")
return drives[0].get("id")
return None
# Find specific drive by name
for drive in drives:
if drive.get("name", "").lower() == driveName.lower():
logger.info(f"Found drive '{driveName}': {drive.get('id')}")
return drive.get("id")
logger.warning(f"Drive '{driveName}' not found")
return None
except Exception as e:
logger.error(f"Error getting drive ID: {str(e)}")
return None
def extractSiteFromStandardPath(self, pathQuery: str) -> Optional[Dict[str, str]]:
"""
Extract site name from Microsoft-standard server-relative path:
/sites/company-share/Freigegebene Dokumente/...
Returns dict with keys: siteName, innerPath (no leading slash) on success, else None.
"""
try:
if not pathQuery or not pathQuery.startswith('/sites/'):
return None
# Remove leading /sites/ prefix
remainder = pathQuery[7:] # len('/sites/') = 7
# Split on first '/' to get site name
if '/' not in remainder:
# Only site name, no inner path
return {"siteName": remainder, "innerPath": ""}
siteName, inner = remainder.split('/', 1)
siteName = siteName.strip()
innerPath = inner.strip()
if not siteName:
return None
return {"siteName": siteName, "innerPath": innerPath}
except Exception as e:
logger.error(f"Error extracting site from standard path '{pathQuery}': {str(e)}")
return None
async def getSiteByStandardPath(self, sitePath: str, allSites: Optional[List[Dict[str, Any]]] = None) -> Optional[Dict[str, Any]]:
"""
Get SharePoint site directly by Microsoft-standard path (/sites/SiteName)
without loading all sites. Uses hostname from first available site.
Parameters:
sitePath (str): Site path like 'company-share' (without /sites/ prefix)
allSites (Optional[List[Dict]]): Pre-discovered sites list (optional, for optimization)
Returns:
Optional[Dict[str, Any]]: Site information if found, None otherwise
"""
try:
# Get hostname from first available site (minimal load - only 1 site)
if allSites and len(allSites) > 0:
from urllib.parse import urlparse
webUrl = allSites[0].get("webUrl", "")
hostname = urlparse(webUrl).hostname if webUrl else None
else:
# Discover minimal sites to get hostname
minimalSites = await self.discoverSites()
if not minimalSites:
logger.warning("No sites available to extract hostname")
return None
from urllib.parse import urlparse
hostname = urlparse(minimalSites[0].get("webUrl", "")).hostname
if not hostname:
logger.warning("Could not extract hostname from site")
return None
logger.info(f"Extracted hostname '{hostname}' from first site, now getting site by path: {sitePath}")
# Get site directly using hostname + path
endpoint = f"sites/{hostname}:/sites/{sitePath}"
result = await self._makeGraphApiCall(endpoint)
if "error" in result:
logger.warning(f"Could not get site directly by path '{sitePath}': {result['error']}")
return None
siteInfo = {
"id": result.get("id"),
"displayName": result.get("displayName"),
"name": result.get("name"),
"webUrl": result.get("webUrl"),
"description": result.get("description"),
"createdDateTime": result.get("createdDateTime"),
"lastModifiedDateTime": result.get("lastModifiedDateTime")
}
logger.info(f"Successfully got site by standard path: {siteInfo['displayName']} (ID: {siteInfo['id']})")
return siteInfo
except Exception as e:
logger.error(f"Error getting site by standard path '{sitePath}': {str(e)}")
return None
def filterSitesByHint(self, sites: List[Dict[str, Any]], siteHint: str) -> List[Dict[str, Any]]:
"""Filter discovered sites by a human-entered site hint (case-insensitive substring)."""
try:
if not siteHint:
return sites
hint = siteHint.strip().lower()
filtered: List[Dict[str, Any]] = []
for site in sites:
name = (site.get("displayName") or "").lower()
webUrl = (site.get("webUrl") or "").lower()
if hint in name or hint in webUrl:
filtered.append(site)
return filtered if filtered else sites
except Exception as e:
logger.error(f"Error filtering sites by hint '{siteHint}': {str(e)}")
return sites
async def resolveSitesFromPathQuery(self, pathQuery: str, allSites: Optional[List[Dict[str, Any]]] = None) -> List[Dict[str, Any]]:
"""
Resolve sites from pathQuery. Handles both Microsoft-standard paths (/sites/SiteName/...)
and regular paths. Returns list of matching sites.
Parameters:
pathQuery (str): Path query string (e.g., /sites/SiteName/FolderPath)
allSites (Optional[List[Dict]]): Pre-discovered sites list (optional, for optimization)
Returns:
List[Dict[str, Any]]: List of matching sites
"""
try:
# If pathQuery starts with Microsoft-standard /sites/, try to get site directly
if pathQuery.startswith('/sites/'):
parsedPath = self.extractSiteFromStandardPath(pathQuery)
if parsedPath:
siteName = parsedPath.get("siteName")
directSite = await self.getSiteByStandardPath(siteName, allSites)
if directSite:
logger.info(f"Got site directly by standard path - no need to discover all sites")
return [directSite]
else:
logger.warning(f"Could not get site directly, falling back to site discovery")
# If we didn't get the site directly, use discovery and filtering
if not allSites:
allSites = await self.discoverSites()
if not allSites:
logger.warning("No SharePoint sites found or accessible")
return []
# If pathQuery starts with Microsoft-standard /sites/, extract site name and filter
if pathQuery.startswith('/sites/'):
parsedPath = self.extractSiteFromStandardPath(pathQuery)
if parsedPath:
siteName = parsedPath.get("siteName")
sites = self.filterSitesByHint(allSites, siteName)
if not sites:
logger.warning(f"No SharePoint site found matching '{siteName}'")
return []
logger.info(f"Filtered to site(s) matching '{siteName}': {[s['displayName'] for s in sites]}")
return sites
else:
return allSites
else:
return allSites
except Exception as e:
logger.error(f"Error resolving sites from pathQuery '{pathQuery}': {str(e)}")
return []
def validatePathQuery(self, pathQuery: str) -> tuple[bool, Optional[str]]:
"""
Validate pathQuery format. Returns (isValid, errorMessage).
Parameters:
pathQuery (str): Path query to validate
Returns:
tuple[bool, Optional[str]]: (True, None) if valid, (False, errorMessage) if invalid
"""
try:
if not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*":
return False, "pathQuery cannot be empty or '*'"
if not pathQuery.startswith('/'):
return False, "pathQuery must start with '/' and include site name with Microsoft-standard syntax /sites/<SiteName>/... e.g. /sites/company-share/Freigegebene Dokumente/Work"
# Check if pathQuery contains search terms (words without proper path structure)
validPathPrefixes = ['/sites/', '/Documents', '/documents', '/Shared Documents', '/shared documents']
if not any(pathQuery.startswith(prefix) for prefix in validPathPrefixes):
return False, f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery."
return True, None
except Exception as e:
logger.error(f"Error validating pathQuery '{pathQuery}': {str(e)}")
return False, f"Error validating pathQuery: {str(e)}"
def detectFolderType(self, item: Dict[str, Any]) -> bool:
"""
Detect if an item is a folder using improved detection logic.
Parameters:
item (Dict[str, Any]): Item from SharePoint API response
Returns:
bool: True if item is a folder, False otherwise
"""
try:
# Use improved folder detection logic
if 'folder' in item:
return True
# Try to detect by URL pattern or other indicators
webUrl = item.get('webUrl', '')
name = item.get('name', '')
# Check if URL has no file extension and looks like a folder path
if '.' not in name and ('/' in webUrl or '\\' in webUrl):
return True
return False
except Exception as e:
logger.error(f"Error detecting folder type: {str(e)}")
return False

View file

@ -49,11 +49,13 @@ class MethodAi(MethodBase):
operationId = f"ai_process_{workflowId}_{int(time.time())}" operationId = f"ai_process_{workflowId}_{int(time.time())}"
# Start progress tracking # Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart( self.services.chat.progressLogStart(
operationId, operationId,
"Generate", "Generate",
"AI Processing", "AI Processing",
f"Format: {parameters.get('resultType', 'txt')}" f"Format: {parameters.get('resultType', 'txt')}",
parentOperationId=parentOperationId
) )
aiPrompt = parameters.get("aiPrompt") aiPrompt = parameters.get("aiPrompt")
@ -256,11 +258,13 @@ class MethodAi(MethodBase):
operationId = f"web_research_{workflowId}_{int(time.time())}" operationId = f"web_research_{workflowId}_{int(time.time())}"
# Start progress tracking # Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart( self.services.chat.progressLogStart(
operationId, operationId,
"Web Research", "Web Research",
"Searching and Crawling", "Searching and Crawling",
"Extracting URLs and Content" "Extracting URLs and Content",
parentOperationId=parentOperationId
) )
# Call webcrawl service - service handles all AI intention analysis and processing # Call webcrawl service - service handles all AI intention analysis and processing

View file

@ -250,11 +250,13 @@ class MethodContext(MethodBase):
return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}") return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
# Start progress tracking # Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart( self.services.chat.progressLogStart(
operationId, operationId,
"Extracting content from documents", "Extracting content from documents",
"Content Extraction", "Content Extraction",
f"Documents: {len(documentList.references)}" f"Documents: {len(documentList.references)}",
parentOperationId=parentOperationId
) )
# Get ChatDocuments from documentList # Get ChatDocuments from documentList

View file

@ -334,11 +334,13 @@ class MethodOutlook(MethodBase):
operationId = f"outlook_read_{workflowId}_{int(time.time())}" operationId = f"outlook_read_{workflowId}_{int(time.time())}"
# Start progress tracking # Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart( self.services.chat.progressLogStart(
operationId, operationId,
"Read Emails", "Read Emails",
"Outlook Email Reading", "Outlook Email Reading",
f"Folder: {parameters.get('folder', 'Inbox')}" f"Folder: {parameters.get('folder', 'Inbox')}",
parentOperationId=parentOperationId
) )
connectionReference = parameters.get("connectionReference") connectionReference = parameters.get("connectionReference")
@ -1546,11 +1548,13 @@ Return JSON:
operationId = f"outlook_send_{workflowId}_{int(time.time())}" operationId = f"outlook_send_{workflowId}_{int(time.time())}"
# Start progress tracking # Start progress tracking
parentOperationId = parameters.get('parentOperationId')
self.services.chat.progressLogStart( self.services.chat.progressLogStart(
operationId, operationId,
"Send Draft Email", "Send Draft Email",
"Outlook Email Sending", "Outlook Email Sending",
f"Processing {len(parameters.get('documentList', []))} draft(s)" f"Processing {len(parameters.get('documentList', []))} draft(s)",
parentOperationId=parentOperationId
) )
connectionReference = parameters.get("connectionReference") connectionReference = parameters.get("connectionReference")

File diff suppressed because it is too large Load diff

View file

@ -82,6 +82,35 @@ class ActionExecutor:
enhancedParameters['expectedDocumentFormats'] = action.expectedDocumentFormats enhancedParameters['expectedDocumentFormats'] = action.expectedDocumentFormats
logger.info(f"Expected formats: {action.expectedDocumentFormats}") logger.info(f"Expected formats: {action.expectedDocumentFormats}")
# Get current task execution operationId to pass as parent to action methods
# This MUST be the "Service Workflow Execution" operation ID (taskExec_*)
parentOperationId = None
try:
progressLogger = self.services.chat.createProgressLogger()
activeOperations = progressLogger.getActiveOperations()
logger.debug(f"Looking for parent operation ID. Active operations: {list(activeOperations.keys())}")
# Look for task execution operation (starts with "taskExec_")
# This is the "Service Workflow Execution" level that should be parent of ALL actions
for opId in activeOperations.keys():
if opId.startswith("taskExec_"):
parentOperationId = opId
logger.info(f"Found parent operation ID: {parentOperationId} for action {action.execMethod}.{action.execAction}")
break
if not parentOperationId:
logger.warning(f"No taskExec_ operation found in active operations. Active operations: {list(activeOperations.keys())}")
except Exception as e:
logger.error(f"Error getting parent operation ID: {str(e)}")
# Add parentOperationId to parameters so action methods can use it
# This is critical for UI dashboard hierarchical display
if parentOperationId:
enhancedParameters['parentOperationId'] = parentOperationId
logger.info(f"Passing parentOperationId '{parentOperationId}' to action {action.execMethod}.{action.execAction}")
else:
logger.warning(f"WARNING: No parentOperationId found for action {action.execMethod}.{action.execAction}. Action logs will appear at root level!")
# Check workflow status before executing the action # Check workflow status before executing the action
checkWorkflowStopped(self.services) checkWorkflowStopped(self.services)