hot fixes: sharepoint folders and stats

2025-12-07 08:48:49 +01:00 · 2025-12-07 08:48:49 +01:00 · 13b7c4fdbe
commit 13b7c4fdbe
parent 4418dfb604
8 changed files with 1229 additions and 968 deletions
--- a/modules/routes/routeDataAutomation.py
+++ b/modules/routes/routeDataAutomation.py
@ -15,6 +15,7 @@ from modules.security.auth import getCurrentUser, limiter
 from modules.datamodels.datamodelChat import AutomationDefinition, ChatWorkflow
 from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResponse, PaginationMetadata
 from modules.shared.attributeUtils import getModelAttributeDefinitions
 from modules.features.automation import executeAutomation
 # Configure logger
 logger = logging.getLogger(__name__)
@ -217,7 +218,7 @@ async def execute_automation(
    """Execute an automation immediately (test mode)"""
    try:
        chatInterface = getChatInterface(currentUser)
-        workflow = await chatInterface.executeAutomation(automationId)
+        workflow = await executeAutomation(automationId, chatInterface)
        return workflow
    except HTTPException:
        raise
--- a/modules/services/serviceChat/mainServiceChat.py
+++ b/modules/services/serviceChat/mainServiceChat.py
@ -1013,7 +1013,8 @@ class ChatService:
        return self._progressLogger
    def createProgressLogger(self) -> ProgressLogger:
-        return ProgressLogger(self.services)
+        """Get or create the progress logger instance (singleton)"""
        return self._getProgressLogger()
    def progressLogStart(self, operationId: str, serviceName: str, actionName: str, context: str = "", parentOperationId: Optional[str] = None):
        """Wrapper for ProgressLogger.startOperation
--- a/modules/services/serviceSharepoint/mainServiceSharepoint.py
+++ b/modules/services/serviceSharepoint/mainServiceSharepoint.py
@ -287,7 +287,12 @@ class SharepointService:
        try:
            # Clean the path
            cleanPath = folderPath.lstrip('/')
-            endpoint = f"sites/{siteId}/drive/root:/{cleanPath}"
+            
            # If path is empty, get root directly
            if not cleanPath:
                endpoint = f"sites/{siteId}/drive/root"
            else:
                endpoint = f"sites/{siteId}/drive/root:/{cleanPath}"
            result = await self._makeGraphApiCall(endpoint)
@ -499,4 +504,407 @@ class SharepointService:
        except Exception as e:
            logger.error(f"Error downloading file by path: {str(e)}")
            return None
    async def _getItemById(self, siteId: str, driveId: str, itemId: str) -> Optional[Dict[str, Any]]:
        """Verify that an item exists by getting it by ID.
        Args:
            siteId: SharePoint site ID
            driveId: Drive ID (document library)
            itemId: Item ID to verify
        Returns:
            Item dictionary if found, None otherwise
        """
        try:
            endpoint = f"sites/{siteId}/drives/{driveId}/items/{itemId}"
            result = await self._makeGraphApiCall(endpoint)
            if "error" in result:
                logger.warning(f"Item {itemId} not found: {result['error']}")
                return None
            return result
        except Exception as e:
            logger.warning(f"Error verifying item {itemId}: {str(e)}")
            return None
    async def _findDriveForItem(self, siteId: str, itemId: str) -> Optional[str]:
        """Find which drive contains a specific item by trying to get it from all drives.
        Args:
            siteId: SharePoint site ID
            itemId: Item ID to find
        Returns:
            Drive ID if found, None otherwise
        """
        try:
            # Get all drives for the site
            endpoint = f"sites/{siteId}/drives"
            drivesResult = await self._makeGraphApiCall(endpoint)
            if "error" in drivesResult:
                logger.warning(f"Could not get drives for site {siteId}: {drivesResult['error']}")
                return None
            drives = drivesResult.get("value", [])
            if not drives:
                logger.warning(f"No drives found for site {siteId}")
                return None
            # Try to find the item in each drive
            for drive in drives:
                driveId = drive.get("id")
                if not driveId:
                    continue
                itemInfo = await self._getItemById(siteId, driveId, itemId)
                if itemInfo:
                    logger.info(f"Found item {itemId} in drive {drive.get('name', driveId)}")
                    return driveId
            logger.warning(f"Item {itemId} not found in any drive for site {siteId}")
            return None
        except Exception as e:
            logger.warning(f"Error finding drive for item {itemId}: {str(e)}")
            return None
    async def getFolderUsageAnalytics(self, siteId: str, driveId: str, itemId: str, startDateTime: Optional[str] = None, endDateTime: Optional[str] = None, interval: str = "day") -> Dict[str, Any]:
        """Get usage analytics for a folder or file.
        Args:
            siteId: SharePoint site ID
            driveId: Drive ID (document library)
            itemId: Folder or file item ID
            startDateTime: Start date/time in ISO format (e.g., "2025-11-01T00:00:00Z"). If None, uses 30 days ago.
            endDateTime: End date/time in ISO format (e.g., "2025-11-30T23:59:59Z"). If None, uses current time.
            interval: Time interval for grouping activities. Options: "day", "week", "month". Default: "day"
        Returns:
            Dictionary containing analytics data with activities grouped by interval.
            If analytics are not available (404), returns empty analytics structure instead of error.
        """
        try:
            from datetime import datetime, timedelta, timezone
            # Set default time range if not provided (last 30 days)
            if not endDateTime:
                endDateTime = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
            if not startDateTime:
                startDate = datetime.now(timezone.utc) - timedelta(days=30)
                startDateTime = startDate.isoformat().replace('+00:00', 'Z')
            # Build endpoint with query parameters
            endpoint = f"sites/{siteId}/drives/{driveId}/items/{itemId}/getActivitiesByInterval"
            endpoint += f"?startDateTime={startDateTime}&endDateTime={endDateTime}&interval={interval}"
            result = await self._makeGraphApiCall(endpoint)
            if "error" in result:
                errorMsg = result.get('error', '')
                # Check if it's a 404 error
                if isinstance(errorMsg, str) and '404' in errorMsg:
                    # Verify if the item exists - first try with current driveId
                    itemInfo = await self._getItemById(siteId, driveId, itemId)
                    # If not found, try to find the correct drive for this item
                    if not itemInfo:
                        logger.info(f"Item {itemId} not found in drive {driveId}, searching for correct drive")
                        correctDriveId = await self._findDriveForItem(siteId, itemId)
                        if correctDriveId and correctDriveId != driveId:
                            logger.info(f"Found item in different drive {correctDriveId}, retrying analytics call")
                            # Retry with correct drive
                            endpoint = f"sites/{siteId}/drives/{correctDriveId}/items/{itemId}/getActivitiesByInterval"
                            endpoint += f"?startDateTime={startDateTime}&endDateTime={endDateTime}&interval={interval}"
                            result = await self._makeGraphApiCall(endpoint)
                            if "error" not in result:
                                logger.info(f"Successfully retrieved analytics using correct drive {correctDriveId}")
                                return result
                            # If still error, continue with original error handling
                            itemInfo = await self._getItemById(siteId, correctDriveId, itemId)
                    if itemInfo:
                        # Item exists but analytics are not available - return empty analytics
                        logger.warning(f"Usage analytics not available for item {itemId} (item exists but has no activity data or analytics not supported)")
                        return {
                            "value": [],
                            "note": "No analytics data available for this item. The item exists but may not have activity data or analytics may not be supported for this item type."
                        }
                    else:
                        # Item doesn't exist
                        logger.error(f"Item {itemId} not found when trying to get usage analytics")
                        return result
                else:
                    # Other error
                    logger.error(f"Error getting usage analytics: {result['error']}")
                    return result
            logger.info(f"Retrieved usage analytics for item {itemId} with interval {interval}")
            return result
        except Exception as e:
            logger.error(f"Error getting folder usage analytics: {str(e)}")
            return {"error": f"Error getting folder usage analytics: {str(e)}"}
    async def getDriveId(self, siteId: str, driveName: Optional[str] = None) -> Optional[str]:
        """Get drive ID for a site. If driveName is provided, finds the specific drive, otherwise returns the default drive.
        Args:
            siteId: SharePoint site ID
            driveName: Optional drive name (document library name). If None, returns default drive.
        Returns:
            Drive ID string or None if not found
        """
        try:
            endpoint = f"sites/{siteId}/drives"
            result = await self._makeGraphApiCall(endpoint)
            if "error" in result:
                logger.error(f"Error getting drives: {result['error']}")
                return None
            drives = result.get("value", [])
            if not driveName:
                # Return default drive (usually the first one or the one named "Documents")
                for drive in drives:
                    if drive.get("name") == "Documents" or drive.get("name") == "Shared Documents":
                        logger.info(f"Found default drive: {drive.get('name')} (ID: {drive.get('id')})")
                        return drive.get("id")
                # If no Documents drive found, return first drive
                if drives:
                    logger.info(f"Using first drive: {drives[0].get('name')} (ID: {drives[0].get('id')})")
                    return drives[0].get("id")
                return None
            # Find specific drive by name
            for drive in drives:
                if drive.get("name", "").lower() == driveName.lower():
                    logger.info(f"Found drive '{driveName}': {drive.get('id')}")
                    return drive.get("id")
            logger.warning(f"Drive '{driveName}' not found")
            return None
        except Exception as e:
            logger.error(f"Error getting drive ID: {str(e)}")
            return None
    def extractSiteFromStandardPath(self, pathQuery: str) -> Optional[Dict[str, str]]:
        """
        Extract site name from Microsoft-standard server-relative path:
        /sites/company-share/Freigegebene Dokumente/...
        Returns dict with keys: siteName, innerPath (no leading slash) on success, else None.
        """
        try:
            if not pathQuery or not pathQuery.startswith('/sites/'):
                return None
            # Remove leading /sites/ prefix
            remainder = pathQuery[7:]  # len('/sites/') = 7
            # Split on first '/' to get site name
            if '/' not in remainder:
                # Only site name, no inner path
                return {"siteName": remainder, "innerPath": ""}
            siteName, inner = remainder.split('/', 1)
            siteName = siteName.strip()
            innerPath = inner.strip()
            if not siteName:
                return None
            return {"siteName": siteName, "innerPath": innerPath}
        except Exception as e:
            logger.error(f"Error extracting site from standard path '{pathQuery}': {str(e)}")
            return None
    async def getSiteByStandardPath(self, sitePath: str, allSites: Optional[List[Dict[str, Any]]] = None) -> Optional[Dict[str, Any]]:
        """
        Get SharePoint site directly by Microsoft-standard path (/sites/SiteName)
        without loading all sites. Uses hostname from first available site.
        Parameters:
            sitePath (str): Site path like 'company-share' (without /sites/ prefix)
            allSites (Optional[List[Dict]]): Pre-discovered sites list (optional, for optimization)
        Returns:
            Optional[Dict[str, Any]]: Site information if found, None otherwise
        """
        try:
            # Get hostname from first available site (minimal load - only 1 site)
            if allSites and len(allSites) > 0:
                from urllib.parse import urlparse
                webUrl = allSites[0].get("webUrl", "")
                hostname = urlparse(webUrl).hostname if webUrl else None
            else:
                # Discover minimal sites to get hostname
                minimalSites = await self.discoverSites()
                if not minimalSites:
                    logger.warning("No sites available to extract hostname")
                    return None
                from urllib.parse import urlparse
                hostname = urlparse(minimalSites[0].get("webUrl", "")).hostname
            if not hostname:
                logger.warning("Could not extract hostname from site")
                return None
            logger.info(f"Extracted hostname '{hostname}' from first site, now getting site by path: {sitePath}")
            # Get site directly using hostname + path
            endpoint = f"sites/{hostname}:/sites/{sitePath}"
            result = await self._makeGraphApiCall(endpoint)
            if "error" in result:
                logger.warning(f"Could not get site directly by path '{sitePath}': {result['error']}")
                return None
            siteInfo = {
                "id": result.get("id"),
                "displayName": result.get("displayName"),
                "name": result.get("name"),
                "webUrl": result.get("webUrl"),
                "description": result.get("description"),
                "createdDateTime": result.get("createdDateTime"),
                "lastModifiedDateTime": result.get("lastModifiedDateTime")
            }
            logger.info(f"Successfully got site by standard path: {siteInfo['displayName']} (ID: {siteInfo['id']})")
            return siteInfo
        except Exception as e:
            logger.error(f"Error getting site by standard path '{sitePath}': {str(e)}")
            return None
    def filterSitesByHint(self, sites: List[Dict[str, Any]], siteHint: str) -> List[Dict[str, Any]]:
        """Filter discovered sites by a human-entered site hint (case-insensitive substring)."""
        try:
            if not siteHint:
                return sites
            hint = siteHint.strip().lower()
            filtered: List[Dict[str, Any]] = []
            for site in sites:
                name = (site.get("displayName") or "").lower()
                webUrl = (site.get("webUrl") or "").lower()
                if hint in name or hint in webUrl:
                    filtered.append(site)
            return filtered if filtered else sites
        except Exception as e:
            logger.error(f"Error filtering sites by hint '{siteHint}': {str(e)}")
            return sites
    async def resolveSitesFromPathQuery(self, pathQuery: str, allSites: Optional[List[Dict[str, Any]]] = None) -> List[Dict[str, Any]]:
        """
        Resolve sites from pathQuery. Handles both Microsoft-standard paths (/sites/SiteName/...)
        and regular paths. Returns list of matching sites.
        Parameters:
            pathQuery (str): Path query string (e.g., /sites/SiteName/FolderPath)
            allSites (Optional[List[Dict]]): Pre-discovered sites list (optional, for optimization)
        Returns:
            List[Dict[str, Any]]: List of matching sites
        """
        try:
            # If pathQuery starts with Microsoft-standard /sites/, try to get site directly
            if pathQuery.startswith('/sites/'):
                parsedPath = self.extractSiteFromStandardPath(pathQuery)
                if parsedPath:
                    siteName = parsedPath.get("siteName")
                    directSite = await self.getSiteByStandardPath(siteName, allSites)
                    if directSite:
                        logger.info(f"Got site directly by standard path - no need to discover all sites")
                        return [directSite]
                    else:
                        logger.warning(f"Could not get site directly, falling back to site discovery")
            # If we didn't get the site directly, use discovery and filtering
            if not allSites:
                allSites = await self.discoverSites()
                if not allSites:
                    logger.warning("No SharePoint sites found or accessible")
                    return []
            # If pathQuery starts with Microsoft-standard /sites/, extract site name and filter
            if pathQuery.startswith('/sites/'):
                parsedPath = self.extractSiteFromStandardPath(pathQuery)
                if parsedPath:
                    siteName = parsedPath.get("siteName")
                    sites = self.filterSitesByHint(allSites, siteName)
                    if not sites:
                        logger.warning(f"No SharePoint site found matching '{siteName}'")
                        return []
                    logger.info(f"Filtered to site(s) matching '{siteName}': {[s['displayName'] for s in sites]}")
                    return sites
                else:
                    return allSites
            else:
                return allSites
        except Exception as e:
            logger.error(f"Error resolving sites from pathQuery '{pathQuery}': {str(e)}")
            return []
    def validatePathQuery(self, pathQuery: str) -> tuple[bool, Optional[str]]:
        """
        Validate pathQuery format. Returns (isValid, errorMessage).
        Parameters:
            pathQuery (str): Path query to validate
        Returns:
            tuple[bool, Optional[str]]: (True, None) if valid, (False, errorMessage) if invalid
        """
        try:
            if not pathQuery or pathQuery.strip() == "" or pathQuery.strip() == "*":
                return False, "pathQuery cannot be empty or '*'"
            if not pathQuery.startswith('/'):
                return False, "pathQuery must start with '/' and include site name with Microsoft-standard syntax /sites/<SiteName>/... e.g. /sites/company-share/Freigegebene Dokumente/Work"
            # Check if pathQuery contains search terms (words without proper path structure)
            validPathPrefixes = ['/sites/', '/Documents', '/documents', '/Shared Documents', '/shared documents']
            if not any(pathQuery.startswith(prefix) for prefix in validPathPrefixes):
                return False, f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery."
            return True, None
        except Exception as e:
            logger.error(f"Error validating pathQuery '{pathQuery}': {str(e)}")
            return False, f"Error validating pathQuery: {str(e)}"
    def detectFolderType(self, item: Dict[str, Any]) -> bool:
        """
        Detect if an item is a folder using improved detection logic.
        Parameters:
            item (Dict[str, Any]): Item from SharePoint API response
        Returns:
            bool: True if item is a folder, False otherwise
        """
        try:
            # Use improved folder detection logic
            if 'folder' in item:
                return True
            # Try to detect by URL pattern or other indicators
            webUrl = item.get('webUrl', '')
            name = item.get('name', '')
            # Check if URL has no file extension and looks like a folder path
            if '.' not in name and ('/' in webUrl or '\\' in webUrl):
                return True
            return False
        except Exception as e:
            logger.error(f"Error detecting folder type: {str(e)}")
            return False
--- a/modules/workflows/methods/methodAi.py
+++ b/modules/workflows/methods/methodAi.py
@ -49,11 +49,13 @@ class MethodAi(MethodBase):
            operationId = f"ai_process_{workflowId}_{int(time.time())}"
            # Start progress tracking
            parentOperationId = parameters.get('parentOperationId')
            self.services.chat.progressLogStart(
                        operationId,
                        "Generate",
                        "AI Processing",
-                        f"Format: {parameters.get('resultType', 'txt')}"
+                        f"Format: {parameters.get('resultType', 'txt')}",
                        parentOperationId=parentOperationId
                    )
            aiPrompt = parameters.get("aiPrompt")
@ -256,11 +258,13 @@ class MethodAi(MethodBase):
            operationId = f"web_research_{workflowId}_{int(time.time())}"
            # Start progress tracking
            parentOperationId = parameters.get('parentOperationId')
            self.services.chat.progressLogStart(
                operationId,
                "Web Research",
                "Searching and Crawling",
-                "Extracting URLs and Content"
+                "Extracting URLs and Content",
                parentOperationId=parentOperationId
            )
            # Call webcrawl service - service handles all AI intention analysis and processing
--- a/modules/workflows/methods/methodContext.py
+++ b/modules/workflows/methods/methodContext.py
@ -250,11 +250,13 @@ class MethodContext(MethodBase):
                return ActionResult.isFailure(error=f"Invalid documentList type: {type(documentListParam)}")
            # Start progress tracking
            parentOperationId = parameters.get('parentOperationId')
            self.services.chat.progressLogStart(
                operationId,
                "Extracting content from documents",
                "Content Extraction",
-                f"Documents: {len(documentList.references)}"
+                f"Documents: {len(documentList.references)}",
                parentOperationId=parentOperationId
            )
            # Get ChatDocuments from documentList
--- a/modules/workflows/methods/methodOutlook.py
+++ b/modules/workflows/methods/methodOutlook.py
@ -334,11 +334,13 @@ class MethodOutlook(MethodBase):
            operationId = f"outlook_read_{workflowId}_{int(time.time())}"
            # Start progress tracking
            parentOperationId = parameters.get('parentOperationId')
            self.services.chat.progressLogStart(
                operationId,
                "Read Emails",
                "Outlook Email Reading",
-                f"Folder: {parameters.get('folder', 'Inbox')}"
+                f"Folder: {parameters.get('folder', 'Inbox')}",
                parentOperationId=parentOperationId
            )
            connectionReference = parameters.get("connectionReference")
@ -1546,11 +1548,13 @@ Return JSON:
            operationId = f"outlook_send_{workflowId}_{int(time.time())}"
            # Start progress tracking
            parentOperationId = parameters.get('parentOperationId')
            self.services.chat.progressLogStart(
                operationId,
                "Send Draft Email",
                "Outlook Email Sending",
-                f"Processing {len(parameters.get('documentList', []))} draft(s)"
+                f"Processing {len(parameters.get('documentList', []))} draft(s)",
                parentOperationId=parentOperationId
            )
            connectionReference = parameters.get("connectionReference")
--- a/modules/workflows/methods/methodSharepoint.py
+++ b/modules/workflows/methods/methodSharepoint.py
--- a/modules/workflows/processing/core/actionExecutor.py
+++ b/modules/workflows/processing/core/actionExecutor.py
@ -82,6 +82,35 @@ class ActionExecutor:
                enhancedParameters['expectedDocumentFormats'] = action.expectedDocumentFormats
                logger.info(f"Expected formats: {action.expectedDocumentFormats}")
            # Get current task execution operationId to pass as parent to action methods
            # This MUST be the "Service Workflow Execution" operation ID (taskExec_*)
            parentOperationId = None
            try:
                progressLogger = self.services.chat.createProgressLogger()
                activeOperations = progressLogger.getActiveOperations()
                logger.debug(f"Looking for parent operation ID. Active operations: {list(activeOperations.keys())}")
                # Look for task execution operation (starts with "taskExec_")
                # This is the "Service Workflow Execution" level that should be parent of ALL actions
                for opId in activeOperations.keys():
                    if opId.startswith("taskExec_"):
                        parentOperationId = opId
                        logger.info(f"Found parent operation ID: {parentOperationId} for action {action.execMethod}.{action.execAction}")
                        break
                if not parentOperationId:
                    logger.warning(f"No taskExec_ operation found in active operations. Active operations: {list(activeOperations.keys())}")
            except Exception as e:
                logger.error(f"Error getting parent operation ID: {str(e)}")
            # Add parentOperationId to parameters so action methods can use it
            # This is critical for UI dashboard hierarchical display
            if parentOperationId:
                enhancedParameters['parentOperationId'] = parentOperationId
                logger.info(f"Passing parentOperationId '{parentOperationId}' to action {action.execMethod}.{action.execAction}")
            else:
                logger.warning(f"WARNING: No parentOperationId found for action {action.execMethod}.{action.execAction}. Action logs will appear at root level!")
            # Check workflow status before executing the action
            checkWorkflowStopped(self.services)