hotfix workflows dynamic and automation after pr integrations

2026-01-11 22:42:02 +01:00 · 2026-01-11 22:42:02 +01:00 · cda0cb1093
commit cda0cb1093
parent 5380e30f0d
10 changed files with 183 additions and 66 deletions
--- a/modules/datamodels/datamodelChat.py
+++ b/modules/datamodels/datamodelChat.py
@ -69,6 +69,9 @@ class ChatLog(BaseModel):
    operationId: Optional[str] = Field(
        None, description="Operation ID to group related log entries"
    )
+    roundNumber: Optional[int] = Field(None, description="Round number in workflow")
+    taskNumber: Optional[int] = Field(None, description="Task number within round")
+    actionNumber: Optional[int] = Field(None, description="Action number within task")


 registerModelLabels(
--- a/modules/interfaces/interfaceDbChatObjects.py
+++ b/modules/interfaces/interfaceDbChatObjects.py
@ -1411,26 +1411,28 @@ class ChatObjects:
        # Create log in normalized table
        createdLog = self.db.recordCreate(ChatLog, log_model)
        
-        # Emit log event for streaming (if event manager is available)
-        try:
-            from modules.features.chatbot.eventManager import get_event_manager
-            event_manager = get_event_manager()
-            log_timestamp = parseTimestamp(createdLog.get("timestamp"), default=getUtcTimestamp())
-            # Emit log event in exact chatData format: {type, createdAt, item}
-            asyncio.create_task(event_manager.emit_event(
-                workflowId,
-                "chatdata",
-                "New log",
-                "log",
-                {
-                    "type": "log",
-                    "createdAt": log_timestamp,
-                    "item": ChatLog(**createdLog).dict()
-                }
-            ))
-        except Exception as e:
-            # Event manager not available or error - continue without emitting
-            logger.debug(f"Could not emit log event: {e}")
+        # Emit log event for streaming (only for chatbot workflows)
+        # Only emit events for chatbot workflows, not for automation or dynamic workflows
+        if workflow.workflowMode == WorkflowModeEnum.WORKFLOW_CHATBOT:
+            try:
+                from modules.features.chatbot.eventManager import get_event_manager
+                event_manager = get_event_manager()
+                log_timestamp = parseTimestamp(createdLog.get("timestamp"), default=getUtcTimestamp())
+                # Emit log event in exact chatData format: {type, createdAt, item}
+                asyncio.create_task(event_manager.emit_event(
+                    workflowId,
+                    "chatdata",
+                    "New log",
+                    "log",
+                    {
+                        "type": "log",
+                        "createdAt": log_timestamp,
+                        "item": ChatLog(**createdLog).model_dump()
+                    }
+                ))
+            except Exception as e:
+                # Event manager not available or error - continue without emitting
+                logger.debug(f"Could not emit log event: {e}")
        
        # Return validated ChatLog instance
        return ChatLog(**createdLog)
@ -1541,7 +1543,7 @@ class ChatObjects:
                "item": chatMessage
            })
        
-        # Get logs
+        # Get logs - return all logs with roundNumber if available
        logs = getRecordsetWithRBAC(self.db, ChatLog, self.currentUser, recordFilter={"workflowId": workflowId})
        for log in logs:
            # Apply timestamp filtering in Python
--- a/modules/services/serviceAi/subContentExtraction.py
+++ b/modules/services/serviceAi/subContentExtraction.py
@ -138,6 +138,13 @@ class ContentExtractor:
                            
                            logger.debug(f"Part {part.id}: reference={hasReferenceIntent}, render={hasRenderIntent}, extract={hasExtractIntent}, hasData={hasPartData}")
                            
+                            # SAFETY: For images with any intent, always ensure render is included
+                            # This ensures the image object part is always available for later rendering
+                            isImage = part.typeGroup == "image" or (part.mimeType and part.mimeType.startswith("image/"))
+                            if isImage and hasPartData and not hasRenderIntent:
+                                logger.info(f"🖼️ Auto-adding render intent for image {part.id} (original intents: {partIntent})")
+                                hasRenderIntent = True
+                            
                            # Track ob der originale Part bereits hinzugefügt wurde
                            originalPartAdded = False
                            
@ -332,7 +339,13 @@ class ContentExtractor:
                # WICHTIG: "render" und "extract" können beide vorhanden sein!
                # In diesem Fall erzeugen wir BEIDE ContentParts
                
-                if "render" in intent.intents:
+                # SAFETY: For images with any intent, always create object part for later rendering
+                isImageDocument = document.mimeType and document.mimeType.startswith("image/")
+                shouldAutoRender = isImageDocument and "render" not in intent.intents and ("extract" in intent.intents or "reference" in intent.intents)
+                if shouldAutoRender:
+                    logger.info(f"🖼️ Auto-adding render for image document {document.id} (original intents: {intent.intents})")
+                
+                if "render" in intent.intents or shouldAutoRender:
                    # Für Images/Binary: extrahiere als Object
                    if document.mimeType.startswith("image/") or self._isBinary(document.mimeType):
                        try:
@ -391,6 +404,9 @@ class ContentExtractor:
                    )
                    
                    # Konvertiere extrahierte Ergebnisse zu ContentParts mit Metadaten
+                    # Check if object part exists (either explicit render or auto-render for images)
+                    hasObjectPart = "render" in intent.intents or shouldAutoRender
+                    
                    for extracted in extractedResults:
                        for part in extracted.parts:
                            # Markiere als extracted Format
@ -400,8 +416,8 @@ class ContentExtractor:
                                "extractionPrompt": extractionPrompt,
                                "intent": "extract",
                                "usageHint": f"Use extracted content from {document.fileName}",
-                                # Verknüpfung zu object Part (falls vorhanden)
-                                "relatedObjectPartId": f"obj_{document.id}" if "render" in intent.intents else None
+                                # Verknüpfung zu object Part (falls vorhanden - including auto-render for images)
+                                "relatedObjectPartId": f"obj_{document.id}" if hasObjectPart else None
                            })
                            
                            # For images: Mark that Vision AI extraction is needed during section generation
@ -410,7 +426,7 @@ class ContentExtractor:
                                logger.info(f"📷 Image part {part.id} marked for Vision AI extraction during section generation")
                            
                            # Stelle sicher, dass ID eindeutig ist (falls object Part existiert)
-                            if "render" in intent.intents:
+                            if hasObjectPart:
                                part.id = f"ext_{document.id}_{part.id}"
                            allContentParts.append(part)
            
--- a/modules/services/serviceChat/mainServiceChat.py
+++ b/modules/services/serviceChat/mainServiceChat.py
@ -647,6 +647,17 @@ class ChatService:
        """Persist ChatLog and map it into the in-memory workflow logs list."""
        logData = dict(logData or {})
        logData["workflowId"] = workflow.id
+        
+        # Auto-populate roundNumber from workflow's currentRound if not explicitly set
+        if "roundNumber" not in logData or logData["roundNumber"] is None:
+            currentRound = getattr(workflow, 'currentRound', None)
+            # Default to 1 if workflow doesn't have currentRound set
+            if currentRound is None:
+                currentRound = 1
+                logger.warning(f"storeLog: workflow.currentRound is None, defaulting to 1")
+            logData["roundNumber"] = currentRound
+            logger.debug(f"storeLog: Set roundNumber={currentRound} for log: {logData.get('message', '')[:50]}")
+        
        chatInterface = self.interfaceDbChat
        chatLog = chatInterface.createLog(logData)
        if not chatLog:
--- a/modules/services/serviceSharepoint/mainServiceSharepoint.py
+++ b/modules/services/serviceSharepoint/mainServiceSharepoint.py
@ -731,7 +731,7 @@ class SharepointService:
    async def getSiteByStandardPath(self, sitePath: str, allSites: Optional[List[Dict[str, Any]]] = None) -> Optional[Dict[str, Any]]:
        """
        Get SharePoint site directly by Microsoft-standard path (/sites/SiteName)
-        without loading all sites. Uses hostname from first available site.
+        without loading all sites. Uses hostname from root site (single API call).
        
        Parameters:
            sitePath (str): Site path like 'company-share' (without /sites/ prefix)
@ -741,18 +741,28 @@ class SharepointService:
            Optional[Dict[str, Any]]: Site information if found, None otherwise
        """
        try:
-            # Get hostname from first available site (minimal load - only 1 site)
+            from urllib.parse import urlparse
+            hostname = None
+            
+            # Get hostname - priority order: allSites > root site API > full discovery (fallback)
            if allSites and len(allSites) > 0:
-                from urllib.parse import urlparse
                webUrl = allSites[0].get("webUrl", "")
                hostname = urlparse(webUrl).hostname if webUrl else None
-            else:
-                # Discover minimal sites to get hostname
+            
+            if not hostname:
+                # Get hostname from root site (single efficient API call instead of discovering all sites)
+                rootSite = await self._makeGraphApiCall("sites/root")
+                if rootSite and "webUrl" in rootSite and "error" not in rootSite:
+                    hostname = urlparse(rootSite.get("webUrl", "")).hostname
+                    logger.debug(f"Got hostname '{hostname}' from root site (efficient)")
+            
+            if not hostname:
+                # Fallback: discover all sites (expensive, avoid if possible)
+                logger.warning("Could not get hostname from root site, falling back to full site discovery")
                minimalSites = await self.discoverSites()
                if not minimalSites:
                    logger.warning("No sites available to extract hostname")
                    return None
-                from urllib.parse import urlparse
                hostname = urlparse(minimalSites[0].get("webUrl", "")).hostname
            
            if not hostname:
--- a/modules/shared/progressLogger.py
+++ b/modules/shared/progressLogger.py
@ -149,6 +149,13 @@ class ProgressLogger:
                    # Parent operation never existed - log warning
                    logger.debug(f"WARNING: Parent operation '{parentOperationId}' not found in activeOperations when creating log for '{operationId}'. Available operations: {list(self.activeOperations.keys())}. Child operation may appear at root level.")
        
+        # Get round number from workflow - include in operationId for unique per-round operations
+        roundNumber = getattr(workflow, 'currentRound', None) or 1
+        
+        # Create round-specific operationId and parentId for the log
+        roundOperationId = f"{operationId}_r{roundNumber}"
+        roundParentId = f"{parentOperationId}_r{roundNumber}" if parentOperationId else None
+        
        # parentId in ChatLog should be the operationId of the parent operation, not the log entry ID
        logData = {
            "workflowId": workflow.id,
@ -156,8 +163,8 @@ class ProgressLogger:
            "type": "info",
            "status": status,
            "progress": progress,
-            "operationId": operationId,
-            "parentId": parentOperationId  # Set to parent's operationId, not log entry ID
+            "operationId": roundOperationId,
+            "parentId": roundParentId  # Set to parent's operationId, not log entry ID
        }
        
        try:
--- a/modules/workflows/methods/methodAi/actions/process.py
+++ b/modules/workflows/methods/methodAi/actions/process.py
@ -192,8 +192,14 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
            # automatically in _extractAndPrepareContent() (Phase 5B)
            logger.info(f"ai.process: Calling callAiContent with {len(documentList.references)} document references")
            if documentList.references:
+                from modules.datamodels.datamodelDocref import DocumentListReference, DocumentItemReference
                for idx, ref in enumerate(documentList.references):
-                    logger.info(f"  Passing reference {idx + 1}: documentId={ref.documentId}")
+                    if isinstance(ref, DocumentItemReference):
+                        logger.info(f"  Passing reference {idx + 1}: documentId={ref.documentId}")
+                    elif isinstance(ref, DocumentListReference):
+                        logger.info(f"  Passing reference {idx + 1}: label={ref.label}")
+                    else:
+                        logger.info(f"  Passing reference {idx + 1}: {ref}")
            
            aiResponse = await self.services.ai.callAiContent(
                prompt=aiPrompt,
--- a/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
+++ b/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
@ -142,8 +142,17 @@ async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
                        logger.info(f"Using folder ID upload endpoint: {uploadEndpoint}")
                    else:
                        # It's a regular path - use the root-based upload endpoint
-                        uploadPath = uploadPath.rstrip('/') + '/' + fileName
-                        uploadPathClean = uploadPath.lstrip('/')
+                        # Strip /sites/{siteName}/ prefix if present (Graph API path is relative to site's drive)
+                        uploadPathForApi = uploadPath
+                        if uploadPathForApi.startswith('/sites/'):
+                            # Extract path after /sites/{siteName}/
+                            parts = uploadPathForApi.split('/', 3)  # ['', 'sites', 'sitename', 'rest/of/path']
+                            if len(parts) >= 4:
+                                uploadPathForApi = '/' + parts[3]  # Keep the rest after /sites/sitename/
+                                logger.info(f"Stripped site prefix from upload path: '{uploadPath}' -> '{uploadPathForApi}'")
+                        
+                        uploadPathForApi = uploadPathForApi.rstrip('/') + '/' + fileName
+                        uploadPathClean = uploadPathForApi.lstrip('/')
                        uploadEndpoint = f"sites/{siteId}/drive/root:/{uploadPathClean}:/content"
                        logger.info(f"Using path-based upload endpoint: {uploadEndpoint}")
                    
--- a/modules/workflows/methods/methodSharepoint/helpers/documentParsing.py
+++ b/modules/workflows/methods/methodSharepoint/helpers/documentParsing.py
@ -62,6 +62,18 @@ class DocumentParsingHelper:
            
            try:
                resultData = json.loads(fileData)
+                
+                # Handle nested structure: documentData.data may contain the actual JSON as a string
+                if "documentData" in resultData and isinstance(resultData.get("documentData"), dict):
+                    innerData = resultData["documentData"].get("data")
+                    if innerData and isinstance(innerData, str):
+                        try:
+                            # Parse the inner JSON string
+                            resultData = json.loads(innerData)
+                            logger.debug(f"Parsed nested documentData.data structure")
+                        except json.JSONDecodeError:
+                            logger.debug(f"documentData.data is not valid JSON, using as-is")
+                
                foundDocuments = resultData.get("foundDocuments", [])
                
                # If no foundDocuments, check if it's a listDocuments result (has listResults)
@ -202,40 +214,67 @@ class DocumentParsingHelper:
            
            if fileData:
                try:
-                    resultData = json.loads(fileData)
-                    foundDocuments = resultData.get("foundDocuments", [])
+                    # Check if fileData is binary (not text/JSON)
+                    # Binary files (xlsx, pdf, etc.) can't be parsed as JSON
+                    isBinaryFile = False
+                    if isinstance(fileData, bytes):
+                        try:
+                            fileData = fileData.decode('utf-8')
+                        except UnicodeDecodeError:
+                            # Binary file - cannot parse as JSON
+                            isBinaryFile = True
+                            logger.debug(f"File is binary (not UTF-8 decodable), treating as regular file to upload")
                    
-                    if foundDocuments:
-                        # Extract folder path from first found document
-                        firstDoc = foundDocuments[0]
-                        parentPath = firstDoc.get("parentPath", "")
-                        if parentPath:
-                            folderPath = parentPath
+                    if isBinaryFile:
+                        # Binary file - treat as regular file to upload
+                        filesToUpload = chatDocuments
+                    else:
+                        resultData = json.loads(fileData)
                        
-                        # Extract site information
-                        siteName = firstDoc.get("siteName")
-                        siteId = firstDoc.get("siteId")
+                        # Handle nested structure: documentData.data may contain the actual JSON as a string
+                        if "documentData" in resultData and isinstance(resultData.get("documentData"), dict):
+                            innerData = resultData["documentData"].get("data")
+                            if innerData and isinstance(innerData, str):
+                                try:
+                                    # Parse the inner JSON string
+                                    resultData = json.loads(innerData)
+                                    logger.debug(f"Parsed nested documentData.data structure for folder parsing")
+                                except json.JSONDecodeError:
+                                    logger.debug(f"documentData.data is not valid JSON, using as-is")
                        
-                        if siteName and siteId:
-                            sites = [{
-                                "id": siteId,
-                                "displayName": siteName,
-                                "webUrl": firstDoc.get("webUrl", "")
-                            }]
-                        elif siteName:
-                            # Discover sites to find siteId
-                            allSites = await self.method.siteDiscovery.discoverSharePointSites()
-                            matchingSites = self.method.siteDiscovery.filterSitesByHint(allSites, siteName)
-                            if matchingSites:
+                        foundDocuments = resultData.get("foundDocuments", [])
+                        
+                        if foundDocuments:
+                            # Extract folder path from first found document
+                            firstDoc = foundDocuments[0]
+                            parentPath = firstDoc.get("parentPath", "")
+                            if parentPath:
+                                folderPath = parentPath
+                            
+                            # Extract site information
+                            siteName = firstDoc.get("siteName")
+                            siteId = firstDoc.get("siteId")
+                            
+                            if siteName and siteId:
                                sites = [{
-                                    "id": matchingSites[0].get("id"),
+                                    "id": siteId,
                                    "displayName": siteName,
-                                    "webUrl": matchingSites[0].get("webUrl", "")
+                                    "webUrl": firstDoc.get("webUrl", "")
                                }]
+                            elif siteName:
+                                # Discover sites to find siteId
+                                allSites = await self.method.siteDiscovery.discoverSharePointSites()
+                                matchingSites = self.method.siteDiscovery.filterSitesByHint(allSites, siteName)
+                                if matchingSites:
+                                    sites = [{
+                                        "id": matchingSites[0].get("id"),
+                                        "displayName": siteName,
+                                        "webUrl": matchingSites[0].get("webUrl", "")
+                                    }]
                        
-                    # For uploadDocument: filesToUpload are the chatDocuments themselves
-                    # (they contain the files to upload)
-                    filesToUpload = chatDocuments
+                        # For uploadDocument: filesToUpload are the chatDocuments themselves
+                        # (they contain the files to upload)
+                        filesToUpload = chatDocuments
                    
                except json.JSONDecodeError:
                    # Not a findDocumentPath result - treat as regular files to upload
--- a/modules/workflows/workflowManager.py
+++ b/modules/workflows/workflowManager.py
@ -209,6 +209,17 @@ class WorkflowManager:
                complexity = analysisResult.get('complexity', 'moderate')
                needsWorkflowHistory = analysisResult.get('needsWorkflowHistory', False)
                fastTrack = analysisResult.get('fastTrack', False)
+                workflowName = analysisResult.get('workflowName')
+                
+                # Update workflow name if provided by analysis
+                if workflowName and workflowName.strip():
+                    try:
+                        workflow = self.services.workflow
+                        if workflow:
+                            self.services.chat.updateWorkflow(workflow.id, {"name": workflowName.strip()})
+                            logger.debug(f"Updated workflow {workflow.id} name to: {workflowName.strip()}")
+                    except Exception as e:
+                        logger.warning(f"Failed to update workflow name: {e}")
                
                # Extract intent analysis fields and store as workflowIntent
                workflowIntent = {
@ -324,6 +335,7 @@ class WorkflowManager:
 9. expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., ["xlsx", "pdf"]). If format is unclear or not specified, use empty list []
 10. qualityRequirements: Quality requirements they have (accuracy, completeness) as {{accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}}
 11. successCriteria: Specific success criteria that define completion (array of strings)
+12. workflowName: Create a concise, descriptive name for this workflow in the detected language. The name should summarize the main task or goal (e.g., "Service Report January 2026", "Email Analysis", "Document Generation"). Keep it short (max 60 characters) and meaningful.

 Rules:
 - If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained
@ -357,7 +369,8 @@ Return ONLY JSON (no markdown) with this exact structure:
    "accuracyThreshold": 0.0-1.0,
    "completenessThreshold": 0.0-1.0
  }},
-  "successCriteria": ["specific criterion 1", "specific criterion 2"]
+  "successCriteria": ["specific criterion 1", "specific criterion 2"],
+  "workflowName": "Concise workflow name in detected language (max 40 characters)"
 }}

 ## User Message
@ -406,7 +419,8 @@ The following is the user's original input message. Analyze intent, normalize th
                "accuracyThreshold": 0.8,
                "completenessThreshold": 0.8
            },
-            "successCriteria": []
+            "successCriteria": [],
+            "workflowName": "New Workflow"
        }

    async def _executeFastPath(self, userInput: UserInputRequest, documents: List[ChatDocument]) -> None: