hotfix workflows dynamic and automation after pr integrations

2026-01-11 22:42:02 +01:00 · 2026-01-11 22:42:02 +01:00 · cda0cb1093
commit cda0cb1093
parent 5380e30f0d
10 changed files with 183 additions and 66 deletions
--- a/modules/datamodels/datamodelChat.py
+++ b/modules/datamodels/datamodelChat.py
@ -69,6 +69,9 @@ class ChatLog(BaseModel):
    operationId: Optional[str] = Field(
        None, description="Operation ID to group related log entries"
    )
    roundNumber: Optional[int] = Field(None, description="Round number in workflow")
    taskNumber: Optional[int] = Field(None, description="Task number within round")
    actionNumber: Optional[int] = Field(None, description="Action number within task")
 registerModelLabels(
--- a/modules/interfaces/interfaceDbChatObjects.py
+++ b/modules/interfaces/interfaceDbChatObjects.py
@ -1411,7 +1411,9 @@ class ChatObjects:
        # Create log in normalized table
        createdLog = self.db.recordCreate(ChatLog, log_model)
-        # Emit log event for streaming (if event manager is available)
+        # Emit log event for streaming (only for chatbot workflows)
        # Only emit events for chatbot workflows, not for automation or dynamic workflows
        if workflow.workflowMode == WorkflowModeEnum.WORKFLOW_CHATBOT:
            try:
                from modules.features.chatbot.eventManager import get_event_manager
                event_manager = get_event_manager()
@ -1425,7 +1427,7 @@ class ChatObjects:
                    {
                        "type": "log",
                        "createdAt": log_timestamp,
-                    "item": ChatLog(**createdLog).dict()
+                        "item": ChatLog(**createdLog).model_dump()
                    }
                ))
            except Exception as e:
@ -1541,7 +1543,7 @@ class ChatObjects:
                "item": chatMessage
            })
-        # Get logs
+        # Get logs - return all logs with roundNumber if available
        logs = getRecordsetWithRBAC(self.db, ChatLog, self.currentUser, recordFilter={"workflowId": workflowId})
        for log in logs:
            # Apply timestamp filtering in Python
--- a/modules/services/serviceAi/subContentExtraction.py
+++ b/modules/services/serviceAi/subContentExtraction.py
@ -138,6 +138,13 @@ class ContentExtractor:
                            logger.debug(f"Part {part.id}: reference={hasReferenceIntent}, render={hasRenderIntent}, extract={hasExtractIntent}, hasData={hasPartData}")
                            # SAFETY: For images with any intent, always ensure render is included
                            # This ensures the image object part is always available for later rendering
                            isImage = part.typeGroup == "image" or (part.mimeType and part.mimeType.startswith("image/"))
                            if isImage and hasPartData and not hasRenderIntent:
                                logger.info(f"🖼️ Auto-adding render intent for image {part.id} (original intents: {partIntent})")
                                hasRenderIntent = True
                            # Track ob der originale Part bereits hinzugefügt wurde
                            originalPartAdded = False
@ -332,7 +339,13 @@ class ContentExtractor:
                # WICHTIG: "render" und "extract" können beide vorhanden sein!
                # In diesem Fall erzeugen wir BEIDE ContentParts
-                if "render" in intent.intents:
+                # SAFETY: For images with any intent, always create object part for later rendering
                isImageDocument = document.mimeType and document.mimeType.startswith("image/")
                shouldAutoRender = isImageDocument and "render" not in intent.intents and ("extract" in intent.intents or "reference" in intent.intents)
                if shouldAutoRender:
                    logger.info(f"🖼️ Auto-adding render for image document {document.id} (original intents: {intent.intents})")
                if "render" in intent.intents or shouldAutoRender:
                    # Für Images/Binary: extrahiere als Object
                    if document.mimeType.startswith("image/") or self._isBinary(document.mimeType):
                        try:
@ -391,6 +404,9 @@ class ContentExtractor:
                    )
                    # Konvertiere extrahierte Ergebnisse zu ContentParts mit Metadaten
                    # Check if object part exists (either explicit render or auto-render for images)
                    hasObjectPart = "render" in intent.intents or shouldAutoRender
                    for extracted in extractedResults:
                        for part in extracted.parts:
                            # Markiere als extracted Format
@ -400,8 +416,8 @@ class ContentExtractor:
                                "extractionPrompt": extractionPrompt,
                                "intent": "extract",
                                "usageHint": f"Use extracted content from {document.fileName}",
-                                # Verknüpfung zu object Part (falls vorhanden)
+                                # Verknüpfung zu object Part (falls vorhanden - including auto-render for images)
-                                "relatedObjectPartId": f"obj_{document.id}" if "render" in intent.intents else None
+                                "relatedObjectPartId": f"obj_{document.id}" if hasObjectPart else None
                            })
                            # For images: Mark that Vision AI extraction is needed during section generation
@ -410,7 +426,7 @@ class ContentExtractor:
                                logger.info(f"📷 Image part {part.id} marked for Vision AI extraction during section generation")
                            # Stelle sicher, dass ID eindeutig ist (falls object Part existiert)
-                            if "render" in intent.intents:
+                            if hasObjectPart:
                                part.id = f"ext_{document.id}_{part.id}"
                            allContentParts.append(part)
--- a/modules/services/serviceChat/mainServiceChat.py
+++ b/modules/services/serviceChat/mainServiceChat.py
@ -647,6 +647,17 @@ class ChatService:
        """Persist ChatLog and map it into the in-memory workflow logs list."""
        logData = dict(logData or {})
        logData["workflowId"] = workflow.id
        # Auto-populate roundNumber from workflow's currentRound if not explicitly set
        if "roundNumber" not in logData or logData["roundNumber"] is None:
            currentRound = getattr(workflow, 'currentRound', None)
            # Default to 1 if workflow doesn't have currentRound set
            if currentRound is None:
                currentRound = 1
                logger.warning(f"storeLog: workflow.currentRound is None, defaulting to 1")
            logData["roundNumber"] = currentRound
            logger.debug(f"storeLog: Set roundNumber={currentRound} for log: {logData.get('message', '')[:50]}")
        chatInterface = self.interfaceDbChat
        chatLog = chatInterface.createLog(logData)
        if not chatLog:
--- a/modules/services/serviceSharepoint/mainServiceSharepoint.py
+++ b/modules/services/serviceSharepoint/mainServiceSharepoint.py
@ -731,7 +731,7 @@ class SharepointService:
    async def getSiteByStandardPath(self, sitePath: str, allSites: Optional[List[Dict[str, Any]]] = None) -> Optional[Dict[str, Any]]:
        """
        Get SharePoint site directly by Microsoft-standard path (/sites/SiteName)
-        without loading all sites. Uses hostname from first available site.
+        without loading all sites. Uses hostname from root site (single API call).
        Parameters:
            sitePath (str): Site path like 'company-share' (without /sites/ prefix)
@ -741,18 +741,28 @@ class SharepointService:
            Optional[Dict[str, Any]]: Site information if found, None otherwise
        """
        try:
            # Get hostname from first available site (minimal load - only 1 site)
            if allSites and len(allSites) > 0:
            from urllib.parse import urlparse
            hostname = None
            # Get hostname - priority order: allSites > root site API > full discovery (fallback)
            if allSites and len(allSites) > 0:
                webUrl = allSites[0].get("webUrl", "")
                hostname = urlparse(webUrl).hostname if webUrl else None
-            else:
+            
-                # Discover minimal sites to get hostname
+            if not hostname:
                # Get hostname from root site (single efficient API call instead of discovering all sites)
                rootSite = await self._makeGraphApiCall("sites/root")
                if rootSite and "webUrl" in rootSite and "error" not in rootSite:
                    hostname = urlparse(rootSite.get("webUrl", "")).hostname
                    logger.debug(f"Got hostname '{hostname}' from root site (efficient)")
            if not hostname:
                # Fallback: discover all sites (expensive, avoid if possible)
                logger.warning("Could not get hostname from root site, falling back to full site discovery")
                minimalSites = await self.discoverSites()
                if not minimalSites:
                    logger.warning("No sites available to extract hostname")
                    return None
                from urllib.parse import urlparse
                hostname = urlparse(minimalSites[0].get("webUrl", "")).hostname
            if not hostname:
--- a/modules/shared/progressLogger.py
+++ b/modules/shared/progressLogger.py
@ -149,6 +149,13 @@ class ProgressLogger:
                    # Parent operation never existed - log warning
                    logger.debug(f"WARNING: Parent operation '{parentOperationId}' not found in activeOperations when creating log for '{operationId}'. Available operations: {list(self.activeOperations.keys())}. Child operation may appear at root level.")
        # Get round number from workflow - include in operationId for unique per-round operations
        roundNumber = getattr(workflow, 'currentRound', None) or 1
        # Create round-specific operationId and parentId for the log
        roundOperationId = f"{operationId}_r{roundNumber}"
        roundParentId = f"{parentOperationId}_r{roundNumber}" if parentOperationId else None
        # parentId in ChatLog should be the operationId of the parent operation, not the log entry ID
        logData = {
            "workflowId": workflow.id,
@ -156,8 +163,8 @@ class ProgressLogger:
            "type": "info",
            "status": status,
            "progress": progress,
-            "operationId": operationId,
+            "operationId": roundOperationId,
-            "parentId": parentOperationId  # Set to parent's operationId, not log entry ID
+            "parentId": roundParentId  # Set to parent's operationId, not log entry ID
        }
        try:
--- a/modules/workflows/methods/methodAi/actions/process.py
+++ b/modules/workflows/methods/methodAi/actions/process.py
@ -192,8 +192,14 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult:
            # automatically in _extractAndPrepareContent() (Phase 5B)
            logger.info(f"ai.process: Calling callAiContent with {len(documentList.references)} document references")
            if documentList.references:
                from modules.datamodels.datamodelDocref import DocumentListReference, DocumentItemReference
                for idx, ref in enumerate(documentList.references):
                    if isinstance(ref, DocumentItemReference):
                        logger.info(f"  Passing reference {idx + 1}: documentId={ref.documentId}")
                    elif isinstance(ref, DocumentListReference):
                        logger.info(f"  Passing reference {idx + 1}: label={ref.label}")
                    else:
                        logger.info(f"  Passing reference {idx + 1}: {ref}")
            aiResponse = await self.services.ai.callAiContent(
                prompt=aiPrompt,
--- a/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
+++ b/modules/workflows/methods/methodSharepoint/actions/uploadDocument.py
@ -142,8 +142,17 @@ async def uploadDocument(self, parameters: Dict[str, Any]) -> ActionResult:
                        logger.info(f"Using folder ID upload endpoint: {uploadEndpoint}")
                    else:
                        # It's a regular path - use the root-based upload endpoint
-                        uploadPath = uploadPath.rstrip('/') + '/' + fileName
+                        # Strip /sites/{siteName}/ prefix if present (Graph API path is relative to site's drive)
-                        uploadPathClean = uploadPath.lstrip('/')
+                        uploadPathForApi = uploadPath
                        if uploadPathForApi.startswith('/sites/'):
                            # Extract path after /sites/{siteName}/
                            parts = uploadPathForApi.split('/', 3)  # ['', 'sites', 'sitename', 'rest/of/path']
                            if len(parts) >= 4:
                                uploadPathForApi = '/' + parts[3]  # Keep the rest after /sites/sitename/
                                logger.info(f"Stripped site prefix from upload path: '{uploadPath}' -> '{uploadPathForApi}'")
                        uploadPathForApi = uploadPathForApi.rstrip('/') + '/' + fileName
                        uploadPathClean = uploadPathForApi.lstrip('/')
                        uploadEndpoint = f"sites/{siteId}/drive/root:/{uploadPathClean}:/content"
                        logger.info(f"Using path-based upload endpoint: {uploadEndpoint}")
--- a/modules/workflows/methods/methodSharepoint/helpers/documentParsing.py
+++ b/modules/workflows/methods/methodSharepoint/helpers/documentParsing.py
@ -62,6 +62,18 @@ class DocumentParsingHelper:
            try:
                resultData = json.loads(fileData)
                # Handle nested structure: documentData.data may contain the actual JSON as a string
                if "documentData" in resultData and isinstance(resultData.get("documentData"), dict):
                    innerData = resultData["documentData"].get("data")
                    if innerData and isinstance(innerData, str):
                        try:
                            # Parse the inner JSON string
                            resultData = json.loads(innerData)
                            logger.debug(f"Parsed nested documentData.data structure")
                        except json.JSONDecodeError:
                            logger.debug(f"documentData.data is not valid JSON, using as-is")
                foundDocuments = resultData.get("foundDocuments", [])
                # If no foundDocuments, check if it's a listDocuments result (has listResults)
@ -202,7 +214,34 @@ class DocumentParsingHelper:
            if fileData:
                try:
                    # Check if fileData is binary (not text/JSON)
                    # Binary files (xlsx, pdf, etc.) can't be parsed as JSON
                    isBinaryFile = False
                    if isinstance(fileData, bytes):
                        try:
                            fileData = fileData.decode('utf-8')
                        except UnicodeDecodeError:
                            # Binary file - cannot parse as JSON
                            isBinaryFile = True
                            logger.debug(f"File is binary (not UTF-8 decodable), treating as regular file to upload")
                    if isBinaryFile:
                        # Binary file - treat as regular file to upload
                        filesToUpload = chatDocuments
                    else:
                        resultData = json.loads(fileData)
                        # Handle nested structure: documentData.data may contain the actual JSON as a string
                        if "documentData" in resultData and isinstance(resultData.get("documentData"), dict):
                            innerData = resultData["documentData"].get("data")
                            if innerData and isinstance(innerData, str):
                                try:
                                    # Parse the inner JSON string
                                    resultData = json.loads(innerData)
                                    logger.debug(f"Parsed nested documentData.data structure for folder parsing")
                                except json.JSONDecodeError:
                                    logger.debug(f"documentData.data is not valid JSON, using as-is")
                        foundDocuments = resultData.get("foundDocuments", [])
                        if foundDocuments:
--- a/modules/workflows/workflowManager.py
+++ b/modules/workflows/workflowManager.py
@ -209,6 +209,17 @@ class WorkflowManager:
                complexity = analysisResult.get('complexity', 'moderate')
                needsWorkflowHistory = analysisResult.get('needsWorkflowHistory', False)
                fastTrack = analysisResult.get('fastTrack', False)
                workflowName = analysisResult.get('workflowName')
                # Update workflow name if provided by analysis
                if workflowName and workflowName.strip():
                    try:
                        workflow = self.services.workflow
                        if workflow:
                            self.services.chat.updateWorkflow(workflow.id, {"name": workflowName.strip()})
                            logger.debug(f"Updated workflow {workflow.id} name to: {workflowName.strip()}")
                    except Exception as e:
                        logger.warning(f"Failed to update workflow name: {e}")
                # Extract intent analysis fields and store as workflowIntent
                workflowIntent = {
@ -324,6 +335,7 @@ class WorkflowManager:
 9. expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., ["xlsx", "pdf"]). If format is unclear or not specified, use empty list []
 10. qualityRequirements: Quality requirements they have (accuracy, completeness) as {{accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}}
 11. successCriteria: Specific success criteria that define completion (array of strings)
 12. workflowName: Create a concise, descriptive name for this workflow in the detected language. The name should summarize the main task or goal (e.g., "Service Report January 2026", "Email Analysis", "Document Generation"). Keep it short (max 60 characters) and meaningful.
 Rules:
 - If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained
@ -357,7 +369,8 @@ Return ONLY JSON (no markdown) with this exact structure:
    "accuracyThreshold": 0.0-1.0,
    "completenessThreshold": 0.0-1.0
  }},
-  "successCriteria": ["specific criterion 1", "specific criterion 2"]
+  "successCriteria": ["specific criterion 1", "specific criterion 2"],
  "workflowName": "Concise workflow name in detected language (max 40 characters)"
 }}
 ## User Message
@ -406,7 +419,8 @@ The following is the user's original input message. Analyze intent, normalize th
                "accuracyThreshold": 0.8,
                "completenessThreshold": 0.8
            },
-            "successCriteria": []
+            "successCriteria": [],
            "workflowName": "New Workflow"
        }
    async def _executeFastPath(self, userInput: UserInputRequest, documents: List[ChatDocument]) -> None: